Skip to content

Commit

Permalink
[Kernel] Fix Flashinfer Correctness (vllm-project#7284)
Browse files Browse the repository at this point in the history
  • Loading branch information
LiuXiaoxuanPKU authored Aug 7, 2024
1 parent 8d1cef2 commit 46cc7fe
Showing 1 changed file with 7 additions and 3 deletions.
10 changes: 7 additions & 3 deletions vllm/attention/backends/flashinfer.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ def __post_init__(self):
raise ValueError(
f"Only {supported_head_sizes} are supported for head_dim,",
f"received {self.head_dim}.")
self.is_profile_run = is_block_tables_empty(self.block_tables)

def begin_forward(self):
if self.num_prefill_tokens > 0:
Expand All @@ -140,11 +141,14 @@ def begin_forward(self):
assert self.paged_kv_last_page_len is not None
batch_size = self.query_start_loc.shape[0] - 1
assert batch_size >= 0
# The prefill stage does not read kv cache.
# The profile run does not read kv cache.
# Both paged_kv_indices and paged_kv_last_page_len are empty.
# paged_kv_indptr is a zero tensor with size batch_size + 1.
self.paged_kv_indptr = torch.zeros(batch_size + 1,
device=self.device)
if self.is_profile_run:
self.paged_kv_indptr = torch.zeros(batch_size + 1,
device=self.device)
else:
self.paged_kv_indptr = self.paged_kv_indptr.to(self.device)
self.paged_kv_last_page_len = self.paged_kv_last_page_len.to(
self.device)
self.paged_kv_indices = self.paged_kv_indices.to(self.device)
Expand Down

0 comments on commit 46cc7fe

Please sign in to comment.