Fix timeout error in the FastAPI frontend (vllm-project#34)

zhuohan123 · web-flow · commit f1abc668ad5d · 2023-05-19T14:00:46.000-06:00
diff --git a/cacheflow/frontend/fastapi_frontend.py b/cacheflow/frontend/fastapi_frontend.py
@@ -83,7 +83,7 @@ async def server_step(self):
         self.is_server_running = True
         updated_seq_groups = await self.server.step.remote()
         self.is_server_running = False
-        # Notify the waiting coroutines that there new outputs ready.
+        # Notify the waiting coroutines that there are new outputs ready.
         for seq_group in updated_seq_groups:
             group_id = seq_group.group_id
             self.running_seq_groups[group_id] = seq_group
@@ -121,7 +121,10 @@ async def generate(self, request_dict: Dict):
             # Wait for new output. The group_event will be set in server_step
             # when there is new output available for the sequence group.
             # Added a timeout to prevent deadlock.
-            await asyncio.wait_for(group_event.wait(), timeout=TIMEOUT_TO_PREVENT_DEADLOCK)
+            try:
+                await asyncio.wait_for(group_event.wait(), timeout=TIMEOUT_TO_PREVENT_DEADLOCK)
+            except asyncio.TimeoutError:
+                continue
             # Reset the event to wait for the next output.
             group_event.clear()
             # Decode and return new outputs