[XPU] Fix xpu model runner call torch.cuda APIs (vllm-project#25011)

jikunshang · charlifu · commit ce7436610e43 · 2025-09-25T16:16:39.000Z
Signed-off-by: Kunshang Ji &lt;kunshang.ji@intel.com&gt;
Signed-off-by: charlifu &lt;charlifu@amd.com&gt;
diff --git a/vllm/v1/worker/xpu_model_runner.py b/vllm/v1/worker/xpu_model_runner.py
@@ -45,8 +45,12 @@ def __init__(self, *args, **kwargs) -> None:
             self.synchronize = lambda: None
 
     try:
-        # replace cuda Event with xpu Event, this should work by default
+        # replace cuda APIs with xpu APIs, this should work by default
         torch.cuda.Event = torch.xpu.Event
+        torch.cuda.Stream = torch.xpu.Stream
+        torch.cuda.default_stream = torch.xpu.current_stream
+        torch.cuda.current_stream = torch.xpu.current_stream
+        torch.cuda.stream = torch.xpu.stream
         yield
     finally:
         # if anything goes wrong, just patch it with a placeholder