Skip to content

Commit

Permalink
Update example script
Browse files Browse the repository at this point in the history
  • Loading branch information
WoosukKwon committed Jul 11, 2024
1 parent af7557e commit a40bcee
Showing 1 changed file with 10 additions and 6 deletions.
16 changes: 10 additions & 6 deletions examples/offline_inference_tpu.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
from vllm import LLM, SamplingParams

prompts = [
"Hello, my name is",
"The president of the United States is",
"The capital of France is",
"The future of AI is",
"A robot may not injure a human being",
"It is only with the heart that one can see rightly;",
"Life is like a box of chocolates.",
]
answers = [
" or, through inaction, allow a human being to come to harm.",
" what is essential is invisible to the eye.",
" You never know what you're gonna get.",
]
N = 1
# Currently, top-p sampling is disabled. `top_p` should be 1.0.
Expand All @@ -17,8 +21,8 @@
# In real workloads, `enforace_eager` should be `False`.
llm = LLM(model="google/gemma-2b", enforce_eager=True)
outputs = llm.generate(prompts, sampling_params)
for output in outputs:
for output, answer in zip(outputs, answers):
prompt = output.prompt
generated_text = output.outputs[0].text
print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
print(output.outputs)
assert generated_text.startswith(answer)

0 comments on commit a40bcee

Please sign in to comment.