Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
69068cd
chore: finalize cleanup from v0
aarnphm Aug 13, 2025
47ef968
fix: remove unecessary frontmatter
aarnphm Aug 13, 2025
f5d594c
fix: tests to use correct CLI args
aarnphm Aug 13, 2025
0365951
Merge branch 'main' into pr/aarnphm/22772
hmellor Sep 1, 2025
bb884cd
Sweep for `guided_{choice/regex/json/grammar}`
hmellor Sep 1, 2025
f2cd9e0
`gd_params` -> `so_params`
hmellor Sep 1, 2025
8f37583
`g` -> `grammar`
hmellor Sep 1, 2025
6e972b8
Fix `config/__init__.py`
hmellor Sep 1, 2025
8810d84
`engine_level_backend` -> `backend`
hmellor Sep 1, 2025
03962b2
`guided_decoding` -> `structured_outputs`
hmellor Sep 1, 2025
ea86735
Fix `protocol.py`
hmellor Sep 1, 2025
fc0ce57
Missing docstring
hmellor Sep 1, 2025
36772c6
Add missing backend selection
hmellor Sep 2, 2025
6ac63c6
Remove last references to `guided_`
hmellor Sep 2, 2025
17c574d
Fix arg utils
hmellor Sep 2, 2025
b0c2916
`reasoning_backend` -> `reasoning_parser`, fix `args.reasoning_parser`
hmellor Sep 2, 2025
c0ace42
Merge branch 'main' into pr/aarnphm/22772
hmellor Sep 2, 2025
48328d7
Replace more instances of guided/guided decoding
hmellor Sep 2, 2025
8b35c08
Remove `StructuredOutputsParams.from_optional` as it's not necessary
hmellor Sep 2, 2025
f8947e1
Fix tests
hmellor Sep 2, 2025
ec81c2b
pre-commit
hmellor Sep 2, 2025
b991f92
Fix tests
hmellor Sep 2, 2025
ce73903
Fix test pipeline for removed file
hmellor Sep 2, 2025
37e7751
`--reasoning-parser` -> `--structured-outputs-config.reasoning_parser`
hmellor Sep 2, 2025
0e62898
Merge branch 'main' into feat/decoding-args-rename-all
hmellor Sep 4, 2025
eefd512
Merge branch 'main' into pr/aarnphm/22772
hmellor Sep 16, 2025
f85abd7
Add reasoning parser back as an `InitVar`
hmellor Sep 16, 2025
85b3b43
`pre-commit`
hmellor Sep 16, 2025
b4f70f7
Reinstate deprecated guided decoding CLI args
hmellor Sep 16, 2025
b52aab9
`--structured-outputs-config.reasoning_parser` -> `--reasoning-parser`
hmellor Sep 16, 2025
ac75b2a
`InitVar` didn't work
hmellor Sep 16, 2025
2929036
`pre-commit`
hmellor Sep 16, 2025
295ac17
`sample_choices` -> `sample_structured_outputs_choices`
hmellor Sep 16, 2025
6954712
Update mergify path
hmellor Sep 16, 2025
49b6d89
Fix wrong kwarg
hmellor Sep 16, 2025
e361772
Simplify dict -> config
hmellor Sep 16, 2025
94a2e74
Simplify request validation
hmellor Sep 16, 2025
02ef1a7
Small typo
hmellor Sep 16, 2025
a2b0c18
Fix type checking of `structured_outputs` in protocol
hmellor Sep 16, 2025
176ecce
Fix incorrect condition for enabling disable_any_whitespace in test
hmellor Sep 16, 2025
553d7a5
Fix opinionated backend selection when `backend="auto"`
hmellor Sep 16, 2025
bd5ef94
Remove badly merged change
hmellor Sep 16, 2025
8b38bc4
Fix opinionated backend selection part 2
hmellor Sep 16, 2025
76cb011
Fix comment
hmellor Sep 16, 2025
87488f5
Merge branch 'main' into pr/aarnphm/22772
hmellor Sep 17, 2025
f869f9c
Make failing test less flaky
hmellor Sep 17, 2025
ec94b4a
Fix structured output being enabled by response format and tool calling
hmellor Sep 17, 2025
2f2f3bc
Merge branch 'main' into feat/decoding-args-rename-all
hmellor Sep 17, 2025
476950a
Merge branch 'main' into feat/decoding-args-rename-all
DarkLight1337 Sep 18, 2025
5872fe7
Fix test
hmellor Sep 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions .buildkite/scripts/hardware_ci/run-amd-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -167,12 +167,6 @@ if [[ $commands == *" entrypoints/llm "* ]]; then
--ignore=entrypoints/llm/test_prompt_validation.py "}
fi

#Obsolete currently
##ignore certain Entrypoints/llm tests
#if [[ $commands == *" && pytest -v -s entrypoints/llm/test_guided_generate.py"* ]]; then
# commands=${commands//" && pytest -v -s entrypoints/llm/test_guided_generate.py"/" "}
#fi

# --ignore=entrypoints/openai/test_encoder_decoder.py \
# --ignore=entrypoints/openai/test_embedding.py \
# --ignore=entrypoints/openai/test_oot_registration.py
Expand Down
3 changes: 1 addition & 2 deletions .buildkite/test-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,7 @@ steps:
- tests/entrypoints/offline_mode
commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_lazy_outlines.py --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_collective_rpc.py
- pytest -v -s entrypoints/llm/test_lazy_outlines.py # it needs a clean process
- pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_collective_rpc.py
- pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process
- VLLM_USE_V1=0 pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests

Expand Down
2 changes: 1 addition & 1 deletion .github/mergify.yml
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ pull_request_rules:
- files=examples/online_serving/openai_chat_completion_structured_outputs.py
- files=examples/online_serving/openai_chat_completion_structured_outputs_with_reasoning.py
- files~=^tests/v1/structured_output/
- files=tests/v1/entrypoints/llm/test_guided_generate.py
- files=tests/v1/entrypoints/llm/test_struct_output_generate.py
- files~=^vllm/v1/structured_output/
actions:
label:
Expand Down
16 changes: 8 additions & 8 deletions benchmarks/benchmark_serving_structured_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -696,11 +696,11 @@ def _eval_correctness_regex(expected, actual):
return re.match(args.regex, actual) is not None

def _eval_correctness(expected, actual):
if args.structure_type == "guided_json":
if args.structure_type == "json":
return _eval_correctness_json(expected, actual)
elif args.structure_type == "guided_regex":
elif args.structure_type == "regex":
return _eval_correctness_regex(expected, actual)
elif args.structure_type == "guided_choice":
elif args.structure_type == "choice":
return _eval_correctness_choice(expected, actual)
else:
return None
Expand Down Expand Up @@ -780,18 +780,18 @@ def main(args: argparse.Namespace):
)

if args.dataset == "grammar":
args.structure_type = "guided_grammar"
args.structure_type = "grammar"
elif args.dataset == "regex":
args.structure_type = "guided_regex"
args.structure_type = "regex"
elif args.dataset == "choice":
args.structure_type = "guided_choice"
args.structure_type = "choice"
else:
args.structure_type = "guided_json"
args.structure_type = "json"

if args.no_structured_output:
args.structured_output_ratio = 0
if args.save_results:
result_file_name = f"{args.structured_output_ratio}guided"
result_file_name = f"{args.structured_output_ratio}so"
result_file_name += f"_{backend}"
result_file_name += f"_{args.request_rate}qps"
result_file_name += f"_{args.model.split('/')[-1]}"
Expand Down
2 changes: 1 addition & 1 deletion docs/api/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ API documentation for vLLM's configuration classes.
- [vllm.config.LoRAConfig][]
- [vllm.config.MultiModalConfig][]
- [vllm.config.PoolerConfig][]
- [vllm.config.DecodingConfig][]
- [vllm.config.StructuredOutputsConfig][]
- [vllm.config.ObservabilityConfig][]
- [vllm.config.KVTransferConfig][]
- [vllm.config.CompilationConfig][]
Expand Down
10 changes: 5 additions & 5 deletions docs/features/reasoning_outputs.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ vLLM currently supports the following reasoning models:

| Model Series | Parser Name | Structured Output Support | Tool Calling |
|--------------|-------------|------------------|-------------|
| [DeepSeek R1 series](https://huggingface.co/collections/deepseek-ai/deepseek-r1-678e1e131c0169c0bc89728d) | `deepseek_r1` | `guided_json`, `guided_regex` | ❌ |
| [QwQ-32B](https://huggingface.co/Qwen/QwQ-32B) | `deepseek_r1` | `guided_json`, `guided_regex` | ✅ |
| [DeepSeek R1 series](https://huggingface.co/collections/deepseek-ai/deepseek-r1-678e1e131c0169c0bc89728d) | `deepseek_r1` | `json`, `regex` | ❌ |
| [QwQ-32B](https://huggingface.co/Qwen/QwQ-32B) | `deepseek_r1` | `json`, `regex` | ✅ |
| [IBM Granite 3.2 language models](https://huggingface.co/collections/ibm-granite/granite-32-language-models-67b3bc8c13508f6d064cff9a) | `granite` | ❌ | ❌ |
| [Qwen3 series](https://huggingface.co/collections/Qwen/qwen3-67dd247413f0e2e4f653967f) | `qwen3` | `guided_json`, `guided_regex` | ✅ |
| [Hunyuan A13B series](https://huggingface.co/collections/tencent/hunyuan-a13b-685ec38e5b46321e3ea7c4be) | `hunyuan_a13b` | `guided_json`, `guided_regex` | ✅ |
| [GLM-4.5 series](https://huggingface.co/collections/zai-org/glm-45-687c621d34bda8c9e4bf503b) | `glm45` | `guided_json`, `guided_regex` | ✅ |
| [Qwen3 series](https://huggingface.co/collections/Qwen/qwen3-67dd247413f0e2e4f653967f) | `qwen3` | `json`, `regex` | ✅ |
| [Hunyuan A13B series](https://huggingface.co/collections/tencent/hunyuan-a13b-685ec38e5b46321e3ea7c4be) | `hunyuan_a13b` | `json`, `regex` | ✅ |
| [GLM-4.5 series](https://huggingface.co/collections/zai-org/glm-45-687c621d34bda8c9e4bf503b) | `glm45` | `json`, `regex` | ✅ |

!!! note
IBM Granite 3.2 reasoning is disabled by default; to enable it, you must also pass `thinking=True` in your `chat_template_kwargs`.
Expand Down
36 changes: 18 additions & 18 deletions docs/features/structured_outputs.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,23 @@ You can generate structured outputs using the OpenAI's [Completions](https://pla

The following parameters are supported, which must be added as extra parameters:

- `guided_choice`: the output will be exactly one of the choices.
- `guided_regex`: the output will follow the regex pattern.
- `guided_json`: the output will follow the JSON schema.
- `guided_grammar`: the output will follow the context free grammar.
- `choice`: the output will be exactly one of the choices.
- `regex`: the output will follow the regex pattern.
- `json`: the output will follow the JSON schema.
- `grammar`: the output will follow the context free grammar.
- `structural_tag`: Follow a JSON schema within a set of specified tags within the generated text.

You can see the complete list of supported parameters on the [OpenAI-Compatible Server](../serving/openai_compatible_server.md) page.

Structured outputs are supported by default in the OpenAI-Compatible Server. You
may choose to specify the backend to use by setting the
`--guided-decoding-backend` flag to `vllm serve`. The default backend is `auto`,
`--structured-outputs-config.backend` flag to `vllm serve`. The default backend is `auto`,
which will try to choose an appropriate backend based on the details of the
request. You may also choose a specific backend, along with
some options. A full set of options is available in the `vllm serve --help`
text.

Now let´s see an example for each of the cases, starting with the `guided_choice`, as it´s the easiest one:
Now let´s see an example for each of the cases, starting with the `choice`, as it´s the easiest one:

??? code

Expand All @@ -45,12 +45,12 @@ Now let´s see an example for each of the cases, starting with the `guided_choic
messages=[
{"role": "user", "content": "Classify this sentiment: vLLM is wonderful!"}
],
extra_body={"guided_choice": ["positive", "negative"]},
extra_body={"structured_outputs": {"choice": ["positive", "negative"]}},
)
print(completion.choices[0].message.content)
```

The next example shows how to use the `guided_regex`. The idea is to generate an email address, given a simple regex template:
The next example shows how to use the `regex`. The idea is to generate an email address, given a simple regex template:

??? code

Expand All @@ -63,18 +63,18 @@ The next example shows how to use the `guided_regex`. The idea is to generate an
"content": "Generate an example email address for Alan Turing, who works in Enigma. End in .com and new line. Example result: [email protected]\n",
}
],
extra_body={"guided_regex": r"\w+@\w+\.com\n", "stop": ["\n"]},
extra_body={"structured_outputs": {"regex": r"\w+@\w+\.com\n"}, "stop": ["\n"]},
)
print(completion.choices[0].message.content)
```

One of the most relevant features in structured text generation is the option to generate a valid JSON with pre-defined fields and formats.
For this we can use the `guided_json` parameter in two different ways:
For this we can use the `json` parameter in two different ways:

- Using directly a [JSON Schema](https://json-schema.org/)
- Defining a [Pydantic model](https://docs.pydantic.dev/latest/) and then extracting the JSON Schema from it (which is normally an easier option).

The next example shows how to use the `guided_json` parameter with a Pydantic model:
The next example shows how to use the `response_format` parameter with a Pydantic model:

??? code

Expand Down Expand Up @@ -119,7 +119,7 @@ The next example shows how to use the `guided_json` parameter with a Pydantic mo
JSON schema and how the fields should be populated. This can improve the
results notably in most cases.

Finally we have the `guided_grammar` option, which is probably the most
Finally we have the `grammar` option, which is probably the most
difficult to use, but it´s really powerful. It allows us to define complete
languages like SQL queries. It works by using a context free EBNF grammar.
As an example, we can use to define a specific format of simplified SQL queries:
Expand Down Expand Up @@ -149,7 +149,7 @@ As an example, we can use to define a specific format of simplified SQL queries:
"content": "Generate an SQL query to show the 'username' and 'email' from the 'users' table.",
}
],
extra_body={"guided_grammar": simplified_sql_grammar},
extra_body={"structured_outputs": {"grammar": simplified_sql_grammar}},
)
print(completion.choices[0].message.content)
```
Expand Down Expand Up @@ -292,8 +292,8 @@ An example of using `structural_tag` can be found here: <gh-file:examples/online
## Offline Inference

Offline inference allows for the same types of structured outputs.
To use it, we´ll need to configure the guided decoding using the class `GuidedDecodingParams` inside `SamplingParams`.
The main available options inside `GuidedDecodingParams` are:
To use it, we´ll need to configure the structured outputs using the class `StructuredOutputsParams` inside `SamplingParams`.
The main available options inside `StructuredOutputsParams` are:

- `json`
- `regex`
Expand All @@ -309,12 +309,12 @@ shown below:

```python
from vllm import LLM, SamplingParams
from vllm.sampling_params import GuidedDecodingParams
from vllm.sampling_params import StructuredOutputsParams

llm = LLM(model="HuggingFaceTB/SmolLM2-1.7B-Instruct")

guided_decoding_params = GuidedDecodingParams(choice=["Positive", "Negative"])
sampling_params = SamplingParams(guided_decoding=guided_decoding_params)
structured_outputs_params = StructuredOutputsParams(choice=["Positive", "Negative"])
sampling_params = SamplingParams(structured_outputs=structured_outputs_params)
outputs = llm.generate(
prompts="Classify this sentiment: vLLM is wonderful!",
sampling_params=sampling_params,
Expand Down
11 changes: 5 additions & 6 deletions docs/features/tool_calling.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ This example demonstrates:
* Making a request with `tool_choice="auto"`
* Handling the structured response and executing the corresponding function

You can also specify a particular function using named function calling by setting `tool_choice={"type": "function", "function": {"name": "get_weather"}}`. Note that this will use the guided decoding backend - so the first time this is used, there will be several seconds of latency (or more) as the FSM is compiled for the first time before it is cached for subsequent requests.
You can also specify a particular function using named function calling by setting `tool_choice={"type": "function", "function": {"name": "get_weather"}}`. Note that this will use the structured outputs backend - so the first time this is used, there will be several seconds of latency (or more) as the FSM is compiled for the first time before it is cached for subsequent requests.

Remember that it's the caller's responsibility to:

Expand All @@ -83,19 +83,18 @@ For more advanced usage, including parallel tool calls and different model-speci

## Named Function Calling

vLLM supports named function calling in the chat completion API by default. It does so using Outlines through guided decoding, so this is
enabled by default and will work with any supported model. You are guaranteed a validly-parsable function call - not a
vLLM supports named function calling in the chat completion API by default. This should work with most structured outputs backends supported by vLLM. You are guaranteed a validly-parsable function call - not a
high-quality one.

vLLM will use guided decoding to ensure the response matches the tool parameter object defined by the JSON schema in the `tools` parameter.
For best results, we recommend ensuring that the expected output format / schema is specified in the prompt to ensure that the model's intended generation is aligned with the schema that it's being forced to generate by the guided decoding backend.
vLLM will use structured outputs to ensure the response matches the tool parameter object defined by the JSON schema in the `tools` parameter.
For best results, we recommend ensuring that the expected output format / schema is specified in the prompt to ensure that the model's intended generation is aligned with the schema that it's being forced to generate by the structured outputs backend.

To use a named function, you need to define the functions in the `tools` parameter of the chat completion request, and
specify the `name` of one of the tools in the `tool_choice` parameter of the chat completion request.

## Required Function Calling

vLLM supports the `tool_choice='required'` option in the chat completion API. Similar to the named function calling, it also uses guided decoding, so this is enabled by default and will work with any supported model. The guided decoding features for `tool_choice='required'` (such as JSON schema with `anyOf`) are currently only supported in the V0 engine with the guided decoding backend `outlines`. However, support for alternative decoding backends are on the [roadmap](../usage/v1_guide.md#features) for the V1 engine.
vLLM supports the `tool_choice='required'` option in the chat completion API. Similar to the named function calling, it also uses structured outputs, so this is enabled by default and will work with any supported model. However, support for alternative decoding backends are on the [roadmap](../usage/v1_guide.md#features) for the V1 engine.

When tool_choice='required' is set, the model is guaranteed to generate one or more tool calls based on the specified tool list in the `tools` parameter. The number of tool calls depends on the user's query. The output format strictly follows the schema defined in the `tools` parameter.

Expand Down
4 changes: 2 additions & 2 deletions docs/serving/openai_compatible_server.md
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ completion = client.chat.completions.create(
{"role": "user", "content": "Classify this sentiment: vLLM is wonderful!"}
],
extra_body={
"guided_choice": ["positive", "negative"]
"structured_outputs": {"choice": ["positive", "negative"]}
}
)
```
Expand Down Expand Up @@ -374,7 +374,7 @@ The following extra parameters are supported:
```python
--8<-- "vllm/entrypoints/openai/protocol.py:transcription-extra-params"
```

[](){ #translations-api }

### Translations API
Expand Down
54 changes: 29 additions & 25 deletions examples/offline_inference/structured_outputs.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,34 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
This file demonstrates the example usage of guided decoding
to generate structured outputs using vLLM. It shows how to apply
different guided decoding techniques such as Choice, Regex, JSON schema,
and Grammar to produce structured and formatted results
based on specific prompts.
This file demonstrates the example usage of structured outputs
in vLLM. It shows how to apply different constraints such as choice,
regex, json schema, and grammar to produce structured and formatted
results based on specific prompts.
"""

from enum import Enum

from pydantic import BaseModel

from vllm import LLM, SamplingParams
from vllm.sampling_params import GuidedDecodingParams
from vllm.sampling_params import StructuredOutputsParams

MAX_TOKENS = 50

# Guided decoding by Choice (list of possible options)
guided_decoding_params_choice = GuidedDecodingParams(choice=["Positive", "Negative"])
sampling_params_choice = SamplingParams(guided_decoding=guided_decoding_params_choice)
# Structured outputs by Choice (list of possible options)
structured_outputs_params_choice = StructuredOutputsParams(
choice=["Positive", "Negative"]
)
sampling_params_choice = SamplingParams(
structured_outputs=structured_outputs_params_choice
)
prompt_choice = "Classify this sentiment: vLLM is wonderful!"

# Guided decoding by Regex
guided_decoding_params_regex = GuidedDecodingParams(regex=r"\w+@\w+\.com\n")
# Structured outputs by Regex
structured_outputs_params_regex = StructuredOutputsParams(regex=r"\w+@\w+\.com\n")
sampling_params_regex = SamplingParams(
guided_decoding=guided_decoding_params_regex,
structured_outputs=structured_outputs_params_regex,
stop=["\n"],
max_tokens=MAX_TOKENS,
)
Expand All @@ -36,7 +39,7 @@
)


# Guided decoding by JSON using Pydantic schema
# Structured outputs by JSON using Pydantic schema
class CarType(str, Enum):
sedan = "sedan"
suv = "SUV"
Expand All @@ -51,17 +54,16 @@ class CarDescription(BaseModel):


json_schema = CarDescription.model_json_schema()
guided_decoding_params_json = GuidedDecodingParams(json=json_schema)
structured_outputs_params_json = StructuredOutputsParams(json=json_schema)
sampling_params_json = SamplingParams(
guided_decoding=guided_decoding_params_json,
max_tokens=MAX_TOKENS,
structured_outputs=structured_outputs_params_json, max_tokens=MAX_TOKENS
)
prompt_json = (
"Generate a JSON with the brand, model and car_type of"
"Generate a JSON with the brand, model and car_type of "
"the most iconic car from the 90's"
)

# Guided decoding by Grammar
# Structured outputs by Grammar
simplified_sql_grammar = """
root ::= select_statement
select_statement ::= "SELECT " column " from " table " where " condition
Expand All @@ -70,13 +72,15 @@ class CarDescription(BaseModel):
condition ::= column "= " number
number ::= "1 " | "2 "
"""
guided_decoding_params_grammar = GuidedDecodingParams(grammar=simplified_sql_grammar)
structured_outputs_params_grammar = StructuredOutputsParams(
grammar=simplified_sql_grammar
)
sampling_params_grammar = SamplingParams(
guided_decoding=guided_decoding_params_grammar,
structured_outputs=structured_outputs_params_grammar,
max_tokens=MAX_TOKENS,
)
prompt_grammar = (
"Generate an SQL query to show the 'username' and 'email'from the 'users' table."
"Generate an SQL query to show the 'username' and 'email' from the 'users' table."
)


Expand All @@ -93,16 +97,16 @@ def main():
llm = LLM(model="Qwen/Qwen2.5-3B-Instruct", max_model_len=100)

choice_output = generate_output(prompt_choice, sampling_params_choice, llm)
format_output("Guided decoding by Choice", choice_output)
format_output("Structured outputs by Choice", choice_output)

regex_output = generate_output(prompt_regex, sampling_params_regex, llm)
format_output("Guided decoding by Regex", regex_output)
format_output("Structured outputs by Regex", regex_output)

json_output = generate_output(prompt_json, sampling_params_json, llm)
format_output("Guided decoding by JSON", json_output)
format_output("Structured outputs by JSON", json_output)

grammar_output = generate_output(prompt_grammar, sampling_params_grammar, llm)
format_output("Guided decoding by Grammar", grammar_output)
format_output("Structured outputs by Grammar", grammar_output)


if __name__ == "__main__":
Expand Down
Loading