From 4b5bd40057d2d8047aeee38a0e49181269389c44 Mon Sep 17 00:00:00 2001 From: Russell Bryant Date: Mon, 10 Mar 2025 09:25:54 -0400 Subject: [PATCH 1/3] [Build/CI] Upgrade xgrammar to >=0.1.15 This update includes support for aarch64 among other fixes and improvements. Closes #11886 Closes #13986 Implements part of #13894 Signed-off-by: Russell Bryant --- requirements/common.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/common.txt b/requirements/common.txt index 27f5aad96aa3..9640bd17c2e2 100644 --- a/requirements/common.txt +++ b/requirements/common.txt @@ -19,7 +19,7 @@ tiktoken >= 0.6.0 # Required for DBRX tokenizer lm-format-enforcer >= 0.10.11, < 0.11 outlines == 0.1.11 lark == 1.2.2 -xgrammar == 0.1.11; platform_machine == "x86_64" +xgrammar >= 0.1.15; platform_machine == "x86_64" or platform_machine == "aarch64" typing_extensions >= 4.10 filelock >= 3.16.1 # need to contain https://github.com/tox-dev/filelock/pull/317 partial-json-parser # used for parsing partial JSON outputs @@ -37,4 +37,4 @@ depyf==0.18.0 # required for profiling and debugging with compilation config cloudpickle # allows pickling lambda functions in model_executor/models/registry.py watchfiles # required for http server to monitor the updates of TLS files python-json-logger # Used by logging as per examples/other/logging_configuration.md -scipy # Required for phi-4-multimodal-instruct \ No newline at end of file +scipy # Required for phi-4-multimodal-instruct From 4aa2ff355dd6b5374e02f51ac106a191a8bdf5ac Mon Sep 17 00:00:00 2001 From: Russell Bryant Date: Mon, 10 Mar 2025 21:19:21 -0400 Subject: [PATCH 2/3] Switch xgrammar dep to be pinned to 0.1.15 Signed-off-by: Russell Bryant --- requirements/common.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/common.txt b/requirements/common.txt index 9640bd17c2e2..13a06011e409 100644 --- a/requirements/common.txt +++ b/requirements/common.txt @@ -19,7 +19,7 @@ tiktoken >= 0.6.0 # Required for DBRX tokenizer lm-format-enforcer >= 0.10.11, < 0.11 outlines == 0.1.11 lark == 1.2.2 -xgrammar >= 0.1.15; platform_machine == "x86_64" or platform_machine == "aarch64" +xgrammar == 0.1.15; platform_machine == "x86_64" or platform_machine == "aarch64" typing_extensions >= 4.10 filelock >= 3.16.1 # need to contain https://github.com/tox-dev/filelock/pull/317 partial-json-parser # used for parsing partial JSON outputs From d55cfbdaf0cac3c9d8dd2c2365da3320c741dd8a Mon Sep 17 00:00:00 2001 From: Russell Bryant Date: Mon, 10 Mar 2025 21:35:50 -0400 Subject: [PATCH 3/3] [V1] Add regex structured output support with xgrammar This is built on top of #14563, as the upgraded xgrammar is required for regex support to work. Signed-off-by: Russell Bryant --- .../llm/test_struct_output_generate.py | 32 +++++++++---------- vllm/v1/structured_output/__init__.py | 2 ++ vllm/v1/structured_output/utils.py | 6 +++- 3 files changed, 23 insertions(+), 17 deletions(-) diff --git a/tests/v1/entrypoints/llm/test_struct_output_generate.py b/tests/v1/entrypoints/llm/test_struct_output_generate.py index 871739bcf164..bddd224548c8 100644 --- a/tests/v1/entrypoints/llm/test_struct_output_generate.py +++ b/tests/v1/entrypoints/llm/test_struct_output_generate.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 import json +import re import jsonschema import pytest @@ -219,25 +220,24 @@ def test_guided_regex(monkeypatch, sample_regex, guided_decoding_backend: str): guided_decoding=GuidedDecodingParams( regex=sample_regex, backend=guided_decoding_backend)) - with pytest.raises(ValueError, - match="Regex guided decoding is not supported."): - llm.generate(prompts=[ + outputs = llm.generate( + prompts=[ f"Give an example IPv4 address with this regex: {sample_regex}" ] * 2, - sampling_params=sampling_params, - use_tqdm=True) + sampling_params=sampling_params, + use_tqdm=True, + ) - # Once regex is supported -- - #assert outputs is not None - #for output in outputs: - # assert output is not None - # assert isinstance(output, RequestOutput) - # prompt = output.prompt - # generated_text = output.outputs[0].text - # print(generated_text) - # assert generated_text is not None - # assert re.fullmatch(sample_regex, generated_text) is not None - # print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") + assert outputs is not None + for output in outputs: + assert output is not None + assert isinstance(output, RequestOutput) + prompt = output.prompt + generated_text = output.outputs[0].text + print(generated_text) + assert generated_text is not None + assert re.fullmatch(sample_regex, generated_text) is not None + print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") @pytest.mark.skip_global_cleanup diff --git a/vllm/v1/structured_output/__init__.py b/vllm/v1/structured_output/__init__.py index 0c2e0ac2aa73..efd65737337f 100644 --- a/vllm/v1/structured_output/__init__.py +++ b/vllm/v1/structured_output/__init__.py @@ -112,6 +112,8 @@ def initialize_grammar(self, key: StructuredOutputKey) -> Grammar: ctx = self.compiler.compile_builtin_json_grammar() elif request_type == StructuredOutputOptions.GRAMMAR: ctx = self.compiler.compile_grammar(grammar_spec) + elif request_type == StructuredOutputOptions.REGEX: + ctx = self.compiler.compile_regex(grammar_spec) else: logger.error("Validation should have already occurred. " "Please file an issue.") diff --git a/vllm/v1/structured_output/utils.py b/vllm/v1/structured_output/utils.py index 7b1adb834e74..b373d31e0abe 100644 --- a/vllm/v1/structured_output/utils.py +++ b/vllm/v1/structured_output/utils.py @@ -251,7 +251,11 @@ def validate_structured_output_request( gd_params = sampling_params.guided_decoding if gd_params.regex: - raise ValueError("Regex structured output is not supported.") + try: + xgr.Grammar.from_regex(gd_params.regex) + except Exception as err: + raise ValueError("Failed to transform regex into a grammar: " + f"{err}") from err if gd_params.choice: choice_grammar = choice_as_grammar(gd_params.choice)