diff --git a/serve/mlc_serve/engine/sync_engine.py b/serve/mlc_serve/engine/sync_engine.py index 5250c3870e..075bced22b 100644 --- a/serve/mlc_serve/engine/sync_engine.py +++ b/serve/mlc_serve/engine/sync_engine.py @@ -172,7 +172,8 @@ def step(self) -> InferenceStepResult: state.token_ids.extend(new_token_ids) for res in results: - state = self.current_batch[res.sequence_id.request_id] + request_id = res.sequence_id.request_id + state = self.current_batch[request_id] delta = self._decode_last_output(state) state.output_text += delta