diff --git a/serve/mlc_serve/engine/engine_common.py b/serve/mlc_serve/engine/engine_common.py index f8936ea9b7..25744f144f 100644 --- a/serve/mlc_serve/engine/engine_common.py +++ b/serve/mlc_serve/engine/engine_common.py @@ -98,7 +98,7 @@ def update_sequence( gen_seq.next_start_position = len(prompt_token_ids) + len(gen_seq.generated_token_ids) gen_seq.generated_token_ids.extend(new_token_ids) - delta = gen_seq.text_streamer.put([new_token_ids[-1]]) + delta = gen_seq.text_streamer.put([gen_seq.generated_token_ids[-1]]) gen_seq.output_text += delta gen_seq.output_text, delta, gen_seq.is_finished = check_stopping_sequences(