vllm-project · robertgshaw2-redhat · Aug 7, 2024 · Aug 6, 2024 · Aug 6, 2024 · Aug 7, 2024
diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py
@@ -116,7 +116,14 @@ async def build_async_engine_client(args) -> AsyncIterator[AsyncEngineClient]:
 
         # Build RPCClient, which conforms to AsyncEngineClient Protocol.
         async_engine_client = AsyncEngineRPCClient(port)
-        await async_engine_client.setup()
+
+        while True:
+            try:
+                await async_engine_client.setup()
+                break
+            except TimeoutError as e:
+                if not rpc_server_process.is_alive():
+                    raise RuntimeError("Server crashed") from e
 
         try:
             yield async_engine_client

diff --git a/vllm/entrypoints/openai/rpc/client.py b/vllm/entrypoints/openai/rpc/client.py
@@ -18,6 +18,9 @@
 from vllm.sampling_params import SamplingParams
 from vllm.transformers_utils.tokenizer_group import init_tokenizer_from_configs
 
+# Time to wait before checking it the server process is alive.
+SERVER_START_TIMEOUT = 1000
+
 
 class AsyncEngineRPCClient:
 
@@ -85,14 +88,19 @@ async def _send_get_data_rpc_request(self, request: RPCUtilityRequest,
 
         return data
 
-    async def _send_one_way_rpc_request(self, request: RPC_REQUEST_TYPE,
-                                        error_message: str):
+    async def _send_one_way_rpc_request(self,
+                                        request: RPC_REQUEST_TYPE,
+                                        error_message: str,
+                                        timeout: int = 0):
         """Send one-way RPC request to trigger an action."""
         with self.socket() as socket:
             # Ping RPC Server with request.
             await socket.send(cloudpickle.dumps(request))
 
             # Await acknowledgement from RPCServer.
+            if timeout > 0 and await socket.poll(timeout) == 0:
+                raise TimeoutError(f"server didn't reply within {timeout} ms")
+
             response = cloudpickle.loads(await socket.recv())
 
         if not isinstance(response, str) or response != VLLM_RPC_SUCCESS_STR:
@@ -117,7 +125,8 @@ async def wait_for_server(self):
 
         await self._send_one_way_rpc_request(
             request=RPCUtilityRequest.IS_SERVER_READY,
-            error_message="Unable to start RPC Server.")
+            error_message="Unable to start RPC Server.",
+            timeout=SERVER_START_TIMEOUT)
 
     async def _get_model_config_rpc(self) -> ModelConfig:
         """Get the ModelConfig object from the RPC Server"""