Update example script

vllm-project · Jul 11, 2024 · a40bcee · a40bcee
1 parent af7557e
commit a40bcee
Showing 1 changed file with 10 additions and 6 deletions.
diff --git a/examples/offline_inference_tpu.py b/examples/offline_inference_tpu.py
@@ -1,10 +1,14 @@
 from vllm import LLM, SamplingParams
 
 prompts = [
-    "Hello, my name is",
-    "The president of the United States is",
-    "The capital of France is",
-    "The future of AI is",
+    "A robot may not injure a human being",
+    "It is only with the heart that one can see rightly;",
+    "Life is like a box of chocolates.",
+]
+answers = [
+    " or, through inaction, allow a human being to come to harm.",
+    " what is essential is invisible to the eye.",
+    " You never know what you're gonna get.",
 ]
 N = 1
 # Currently, top-p sampling is disabled. `top_p` should be 1.0.
@@ -17,8 +21,8 @@
 # In real workloads, `enforace_eager` should be `False`.
 llm = LLM(model="google/gemma-2b", enforce_eager=True)
 outputs = llm.generate(prompts, sampling_params)
-for output in outputs:
+for output, answer in zip(outputs, answers):
     prompt = output.prompt
     generated_text = output.outputs[0].text
     print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
-    print(output.outputs)
+    assert generated_text.startswith(answer)