diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index d5d1cecf27e..74f5cd72299 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -1106,15 +1106,12 @@ def _handle_deterministic_inference(self): # Check TP size if self.tp_size > 1: - raise ValueError( - "Currently only TP size 1 is supported for deterministic inference." + os.environ["NCCL_ALGO"] = "allreduce:tree" + self.disable_custom_all_reduce = True + logger.warning( + "NCCL_ALGO is set to 'allreduce:tree' and custom all reduce is disabled for deterministic inference when TP size > 1." ) - # Warnings on MoE models - logger.warning( - "Currently deterministic inference is only tested on dense models. Please be cautious when using it on MoE models." - ) - def _handle_other_validations(self): pass diff --git a/python/sglang/test/test_deterministic.py b/python/sglang/test/test_deterministic.py index 8c4e45c7cb5..28690267746 100644 --- a/python/sglang/test/test_deterministic.py +++ b/python/sglang/test/test_deterministic.py @@ -19,7 +19,7 @@ PROMPT_1 = "Tell me about Richard Feynman: " PROMPT_2 = "Generate 1000 random numbers. Go directly into it, don't say Sure and don't say here are numbers. Just start with a number." dirpath = os.path.dirname(__file__) -with open("python/sglang/test/long_prompt.txt", "r") as f: +with open(os.path.join(dirpath, "long_prompt.txt"), "r") as f: LONG_PROMPT = f.read()