Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
merrymercy committed Aug 15, 2024
1 parent 2609e01 commit 62fb110
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 6 deletions.
2 changes: 1 addition & 1 deletion benchmark/gsm8k/bench_sglang.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def main(args):
@sgl.function
def few_shot_gsm8k(s, question):
s += few_shot_examples + question
s += sgl.gen("answer", max_tokens=512, stop="Question")
s += sgl.gen("answer", max_tokens=512, stop=["Question", "Assistant:"])

#####################################
########## SGL Program End ##########
Expand Down
10 changes: 5 additions & 5 deletions python/sglang/srt/model_executor/cuda_graph_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,10 +123,10 @@ def __init__(self, model_runner, max_batch_size_to_capture, use_torch_compile):
self.model_runner.flashinfer_workspace_buffer
)
else:
self.flashinfer_workspace_buffer = [
self.model_runner.flashinfer_workspace_buffer,
self.model_runner.flashinfer_workspace_buffer,
]
self.flashinfer_workspace_buffer = (
self.model_runner.flashinfer_workspace_buffer
)

self.flashinfer_kv_indptr = [
self.flashinfer_kv_indptr,
self.flashinfer_kv_indptr.clone(),
Expand Down Expand Up @@ -200,7 +200,7 @@ def capture_one_batch_size(self, bs, forward):
for i in range(2):
flashinfer_decode_wrapper.append(
BatchDecodeWithPagedKVCacheWrapper(
self.flashinfer_workspace_buffer[i],
self.flashinfer_workspace_buffer,
"NHD",
use_cuda_graph=True,
use_tensor_cores=use_tensor_cores,
Expand Down

0 comments on commit 62fb110

Please sign in to comment.