From 0057b2b04202bf39a14ea6b2da4865ced77c62dc Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Tue, 29 Apr 2025 16:24:47 -0700 Subject: [PATCH 01/10] add more tests Signed-off-by: Yang Wang --- .buildkite/test-pipeline.yaml | 4 ++++ requirements/nightly_torch_test.txt | 8 +++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index d3c07cdda454..84ee991f5659 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -293,6 +293,7 @@ steps: parallelism: 4 - label: PyTorch Compilation Unit Tests + torch_nightly: true source_file_dependencies: - vllm/ - tests/compile @@ -302,6 +303,7 @@ steps: - pytest -v -s compile/test_sequence_parallelism.py - label: PyTorch Fullgraph Smoke Test # 9min + torch_nightly: true source_file_dependencies: - vllm/ - tests/compile @@ -312,6 +314,7 @@ steps: - pytest -v -s compile/piecewise/test_toy_llama.py - label: PyTorch Fullgraph Test # 18min + torch_nightly: true source_file_dependencies: - vllm/ - tests/compile @@ -436,6 +439,7 @@ steps: ##### models test ##### - label: Basic Models Test # 24min + torch_nightly: true source_file_dependencies: - vllm/ - tests/models diff --git a/requirements/nightly_torch_test.txt b/requirements/nightly_torch_test.txt index 199bcafe0bdd..e2711354ac10 100644 --- a/requirements/nightly_torch_test.txt +++ b/requirements/nightly_torch_test.txt @@ -23,5 +23,11 @@ runai-model-streamer-s3==0.11.0 tensorizer>=2.9.0 lm-eval==0.4.8 buildkite-test-collector==0.1.9 - lm-eval[api]==0.4.8 # required for model evaluation test + +# required for quantization test +bitsandbytes>=0.45.3 + +# required for minicpmo_26 test +vector_quantize_pytorch +vocos From 2ec84224245b8581b5db81edd4a08130b11bde14 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Tue, 29 Apr 2025 16:37:18 -0700 Subject: [PATCH 02/10] add more tests Signed-off-by: Yang Wang --- vllm/sampling_params.py | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py index 3ac5c5c3daab..8d1a52c329c6 100644 --- a/vllm/sampling_params.py +++ b/vllm/sampling_params.py @@ -14,7 +14,7 @@ from vllm.logits_process import LogitsProcessor from vllm.transformers_utils.tokenizer import AnyTokenizer from vllm.transformers_utils.tokenizers.mistral import MistralTokenizer - +# test logger = init_logger(__name__) _SAMPLING_EPS = 1e-5 @@ -73,6 +73,36 @@ def from_optional( structural_tag=structural_tag, ) + @property + def backend_name(self) -> str: + """Return the backend name without any options. + + For example if the backend is "xgrammar:no-fallback", returns "xgrammar" + """ + return (self.backend or "").split(":")[0] + + def backend_options(self) -> list[str]: + """Return the backend options as a list of strings.""" + if not self.backend or ":" not in self.backend: + return [] + return self.backend.split(":")[1].split(",") + + def add_option(self, opt_name: str) -> None: + """Adds an option to the backend options.""" + if not self.backend: + self.backend = f":{opt_name}" + elif ":" not in self.backend: + self.backend += f":{opt_name}" + else: + options = set(self.backend_options()) + options.add(opt_name) + self.backend = f"{self.backend_name}:{','.join(sorted(options))}" + + def no_fallback(self) -> bool: + """Returns True if the "no-fallback" option is supplied for the guided + decoding backend""" + return "no-fallback" in self.backend_options() + def __post_init__(self): """Validate that some fields are mutually exclusive.""" guide_count = sum([ From 0ac27fbc51125a519384f2af55ab84ec7d073601 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Tue, 29 Apr 2025 16:37:34 -0700 Subject: [PATCH 03/10] add more tests Signed-off-by: Yang Wang --- vllm/sampling_params.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py index 8d1a52c329c6..33738f0e5bc8 100644 --- a/vllm/sampling_params.py +++ b/vllm/sampling_params.py @@ -14,7 +14,7 @@ from vllm.logits_process import LogitsProcessor from vllm.transformers_utils.tokenizer import AnyTokenizer from vllm.transformers_utils.tokenizers.mistral import MistralTokenizer -# test + logger = init_logger(__name__) _SAMPLING_EPS = 1e-5 From d5f5684e227a37018aa5558ac7b5abc4d9e79299 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Tue, 29 Apr 2025 16:41:01 -0700 Subject: [PATCH 04/10] add more tests Signed-off-by: Yang Wang --- vllm/sampling_params.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py index 33738f0e5bc8..8d1a52c329c6 100644 --- a/vllm/sampling_params.py +++ b/vllm/sampling_params.py @@ -14,7 +14,7 @@ from vllm.logits_process import LogitsProcessor from vllm.transformers_utils.tokenizer import AnyTokenizer from vllm.transformers_utils.tokenizers.mistral import MistralTokenizer - +# test logger = init_logger(__name__) _SAMPLING_EPS = 1e-5 From 5ef55ff4f0771ae032ebe239bf69775989b73105 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Tue, 29 Apr 2025 16:41:41 -0700 Subject: [PATCH 05/10] add more tests Signed-off-by: Yang Wang --- vllm/sampling_params.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py index 8d1a52c329c6..33738f0e5bc8 100644 --- a/vllm/sampling_params.py +++ b/vllm/sampling_params.py @@ -14,7 +14,7 @@ from vllm.logits_process import LogitsProcessor from vllm.transformers_utils.tokenizer import AnyTokenizer from vllm.transformers_utils.tokenizers.mistral import MistralTokenizer -# test + logger = init_logger(__name__) _SAMPLING_EPS = 1e-5 From 92a7f3c756e48750ea05573c0dd40b1dc65e4899 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Tue, 29 Apr 2025 16:43:24 -0700 Subject: [PATCH 06/10] add more tests Signed-off-by: Yang Wang --- vllm/sampling_params.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py index 33738f0e5bc8..332f48708562 100644 --- a/vllm/sampling_params.py +++ b/vllm/sampling_params.py @@ -14,7 +14,7 @@ from vllm.logits_process import LogitsProcessor from vllm.transformers_utils.tokenizer import AnyTokenizer from vllm.transformers_utils.tokenizers.mistral import MistralTokenizer - +# comment logger = init_logger(__name__) _SAMPLING_EPS = 1e-5 From 525254bb0e5838dd79e2a70471b9372b3bdc4e4f Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Tue, 29 Apr 2025 16:43:37 -0700 Subject: [PATCH 07/10] add more tests Signed-off-by: Yang Wang --- vllm/sampling_params.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py index 332f48708562..33738f0e5bc8 100644 --- a/vllm/sampling_params.py +++ b/vllm/sampling_params.py @@ -14,7 +14,7 @@ from vllm.logits_process import LogitsProcessor from vllm.transformers_utils.tokenizer import AnyTokenizer from vllm.transformers_utils.tokenizers.mistral import MistralTokenizer -# comment + logger = init_logger(__name__) _SAMPLING_EPS = 1e-5 From 237c8befcfa6297443ee1c8c3dfadc1829cf4fdf Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 1 May 2025 10:02:17 -0700 Subject: [PATCH 08/10] add more tests Signed-off-by: Yang Wang --- vllm/sampling_params.py | 36 +++--------------------------------- 1 file changed, 3 insertions(+), 33 deletions(-) diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py index 33738f0e5bc8..66a77681be9a 100644 --- a/vllm/sampling_params.py +++ b/vllm/sampling_params.py @@ -73,36 +73,6 @@ def from_optional( structural_tag=structural_tag, ) - @property - def backend_name(self) -> str: - """Return the backend name without any options. - - For example if the backend is "xgrammar:no-fallback", returns "xgrammar" - """ - return (self.backend or "").split(":")[0] - - def backend_options(self) -> list[str]: - """Return the backend options as a list of strings.""" - if not self.backend or ":" not in self.backend: - return [] - return self.backend.split(":")[1].split(",") - - def add_option(self, opt_name: str) -> None: - """Adds an option to the backend options.""" - if not self.backend: - self.backend = f":{opt_name}" - elif ":" not in self.backend: - self.backend += f":{opt_name}" - else: - options = set(self.backend_options()) - options.add(opt_name) - self.backend = f"{self.backend_name}:{','.join(sorted(options))}" - - def no_fallback(self) -> bool: - """Returns True if the "no-fallback" option is supplied for the guided - decoding backend""" - return "no-fallback" in self.backend_options() - def __post_init__(self): """Validate that some fields are mutually exclusive.""" guide_count = sum([ @@ -216,9 +186,9 @@ class SamplingParams( logits_processors: list of functions that modify logits based on previously generated tokens, and optionally prompt tokens as a first argument. - truncate_prompt_tokens: If set to -1, will use the truncation size - supported by the model. If set to an integer k, will use only - the last k tokens from the prompt (i.e., left truncation). + truncate_prompt_tokens: If set to -1, will use the truncation size + supported by the model. If set to an integer k, will use only + the last k tokens from the prompt (i.e., left truncation). Defaults to None (i.e., no truncation). guided_decoding: If provided, the engine will construct a guided decoding logits processor from these parameters. Defaults to None. From 4e4cd0933b2a962e1986e5b5f532e87701498ced Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 1 May 2025 10:07:22 -0700 Subject: [PATCH 09/10] add more tests Signed-off-by: Yang Wang --- vllm/sampling_params.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py index 66a77681be9a..6748c31dff89 100644 --- a/vllm/sampling_params.py +++ b/vllm/sampling_params.py @@ -188,8 +188,7 @@ class SamplingParams( a first argument. truncate_prompt_tokens: If set to -1, will use the truncation size supported by the model. If set to an integer k, will use only - the last k tokens from the prompt (i.e., left truncation). - Defaults to None (i.e., no truncation). + the last k tokens from the prompt (i.e., left truncation). guided_decoding: If provided, the engine will construct a guided decoding logits processor from these parameters. Defaults to None. logit_bias: If provided, the engine will construct a logits processor From fcd71d03145255302933bc6dd0ec804a3170289e Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 1 May 2025 10:07:54 -0700 Subject: [PATCH 10/10] add more tests Signed-off-by: Yang Wang --- vllm/sampling_params.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py index 6748c31dff89..66a77681be9a 100644 --- a/vllm/sampling_params.py +++ b/vllm/sampling_params.py @@ -188,7 +188,8 @@ class SamplingParams( a first argument. truncate_prompt_tokens: If set to -1, will use the truncation size supported by the model. If set to an integer k, will use only - the last k tokens from the prompt (i.e., left truncation). + the last k tokens from the prompt (i.e., left truncation). + Defaults to None (i.e., no truncation). guided_decoding: If provided, the engine will construct a guided decoding logits processor from these parameters. Defaults to None. logit_bias: If provided, the engine will construct a logits processor