1111AsyncLLMEngine are working correctly.
1212"""
1313
14- import lm_eval
15- import pytest
1614import json
17- import threading
1815import os
19-
16+ import threading
2017from pathlib import Path
2118from vllm .platforms import current_platform
2219
20+ import lm_eval
21+ import pytest
22+
2323MODEL_NAMES = [
2424 "Qwen/Qwen3-1.7B" ,
2525 "google/gemma-3-1b-it" ,
4242}
4343
4444# Parametrize test cases based on CLI arguments or default values
45- def parametrize_by_cli_or_default (metafunc , fixture_name , cli_parameter , default_list ):
45+ def parametrize_by_cli_or_default (metafunc , fixture_name , cli_parameter ,
46+ default_list ):
4647 if fixture_name in metafunc .fixturenames :
4748 print (f"Checking CLI parameter '{ cli_parameter } ' for '{ fixture_name } '" )
4849 names_str = metafunc .config .getoption (cli_parameter )
4950 if names_str :
5051 print (f"Using '{ cli_parameter } ' parameter for '{ fixture_name } '" )
51- param_list = [name .strip () for name in names_str .split (',' ) if name .strip ()]
52+ param_list = [
53+ name .strip () for name in names_str .split (',' ) if name .strip ()
54+ ]
5255 metafunc .parametrize (fixture_name , param_list )
5356 else :
5457 print (f"Using default list for '{ fixture_name } '" )
5558 metafunc .parametrize (fixture_name , default_list )
5659
5760def pytest_generate_tests (metafunc ):
58- parametrize_by_cli_or_default (metafunc , fixture_name = "model" , cli_parameter = "--model-names" , default_list = MODEL_NAMES )
59- parametrize_by_cli_or_default (metafunc , fixture_name = "fp8_kv_model" , cli_parameter = "--fp8-kv-model-names" , default_list = FP8_KV_MODEL_NAMES )
61+ parametrize_by_cli_or_default (metafunc ,
62+ fixture_name = "model" ,
63+ cli_parameter = "--model-names" ,
64+ default_list = MODEL_NAMES )
65+ parametrize_by_cli_or_default (metafunc ,
66+ fixture_name = "fp8_kv_model" ,
67+ cli_parameter = "--fp8-kv-model-names" ,
68+ default_list = FP8_KV_MODEL_NAMES )
6069
6170# Write expected values to json file
6271# TBD: To support the functionality of connecting GPU and TPU expected values in the future
@@ -67,17 +76,23 @@ def write_expected_value_to_json(model_name, measured_value, json_filepath):
6776 with open (json_filepath , 'r' , encoding = 'utf-8' ) as f :
6877 data = json .load (f )
6978 except (FileNotFoundError , json .JSONDecodeError ):
70- print (f"'{ json_filepath } ' not found or is empty/invalid. A new one will be created." )
79+ print (
80+ f"'{ json_filepath } ' not found or is empty/invalid. A new one will be created."
81+ )
7182 data = {}
7283
7384 data [model_name ] = measured_value
7485
7586 try :
7687 with open (json_filepath , 'w' , encoding = 'utf-8' ) as f :
7788 json .dump (data , f , indent = 4 )
78- print (f"Successfully updated '{ json_filepath } ' with the result for { model_name } ." )
89+ print (
90+ f"Successfully updated '{ json_filepath } ' with the result for { model_name } ."
91+ )
7992 except IOError as e :
80- print (f"Error: Failed to write to file '{ json_filepath } '. Reason: { e } " )
93+ print (
94+ f"Error: Failed to write to file '{ json_filepath } '. Reason: { e } "
95+ )
8196
8297# Read expected values from json file if exist
8398# TBD: To support the functionality of connecting GPU and TPU expected values in the future
@@ -93,19 +108,24 @@ def read_expected_value(expected_json_filepath=None):
93108 with open (expected_json_filepath , 'r' , encoding = 'utf-8' ) as f :
94109 expected_values_data = json .load (f )
95110 else :
96- raise FileNotFoundError (f"Expected values file not found: { expected_json_filepath } " )
111+ raise FileNotFoundError (
112+ f"Expected values file not found: { expected_json_filepath } " )
97113 return expected_values_data
98114
99115
100- def run_test (model_name , expected_values_data , expected_json_filepath , more_args = None ):
116+ def run_test (model_name ,
117+ expected_values_data ,
118+ expected_json_filepath ,
119+ more_args = None ):
101120 """Run the end to end accuracy test."""
102121 print (f"Running test for model: { model_name } " )
103122
104123 model_args = f"pretrained={ model_name } ,max_model_len=4096"
105124
106125 download_path = "/mnt/disks/persist"
107126 # download_path = "/tmp/hf_model"
108- if os .path .isdir (download_path ) and os .access (download_path , os .R_OK ) and os .access (download_path , os .W_OK ):
127+ if os .path .isdir (download_path ) and os .access (
128+ download_path , os .R_OK ) and os .access (download_path , os .W_OK ):
109129 model_args = f"{ model_args } ,download_dir={ download_path } "
110130
111131 if more_args is not None :
@@ -120,7 +140,7 @@ def run_test(model_name, expected_values_data, expected_json_filepath, more_args
120140
121141 # Execute default behavior when `expected_json_filepath` is not set.
122142 if expected_json_filepath is None :
123- print (f "Execute default behavior" )
143+ print ("Execute default behavior" )
124144 measured_value = results ["results" ][TASK ][FILTER ]
125145 assert model_name in EXPECTED_VALUES , (
126146 f"Cannot find the expected value for the model { model_name = } " )
@@ -129,27 +149,32 @@ def run_test(model_name, expected_values_data, expected_json_filepath, more_args
129149 and measured_value + RTOL > expected_value
130150 ), f"Expected: { expected_value } | Measured: { measured_value } "
131151 else :
132- print (f "Execute specific models behavior" )
152+ print ("Execute specific models behavior" )
133153 measured_value = results ["results" ][TASK ][FILTER ]
134154 expected_value = expected_values_data .get (model_name )
135155
136156 # Model expected value not exist, write in file
137157 if model_name not in expected_values_data :
138158 print (f"Warning: No expected value found for { model_name } . "
139- "Skipping accuracy check." )
159+ "Skipping accuracy check." )
140160 print (f"Measured value: { measured_value } " )
141- write_expected_value_to_json (model_name , measured_value , expected_json_filepath )
161+ write_expected_value_to_json (model_name , measured_value ,
162+ expected_json_filepath )
142163
143164 else :
144- print (f"Found expected value! { model_name = } , { measured_value = } , { expected_value = } " )
145- assert (measured_value - RTOL < expected_value
165+ print (
166+ f"Found expected value! { model_name = } , { measured_value = } , { expected_value = } "
167+ )
168+ assert (
169+ measured_value - RTOL < expected_value
146170 and measured_value + RTOL > expected_value
147- ), f"Expected: { expected_value } | Measured: { measured_value } "
171+ ), f"Expected: { expected_value } | Measured: { measured_value } "
148172
149173@pytest .mark .skipif (not current_platform .is_cuda ()
150174 and not current_platform .is_tpu (),
151175 reason = "V1 is currently only supported on CUDA and TPU" )
152- def test_lm_eval_accuracy_v1_engine (model , monkeypatch : pytest .MonkeyPatch , request : pytest .FixtureRequest ):
176+ def test_lm_eval_accuracy_v1_engine (model , monkeypatch : pytest .MonkeyPatch ,
177+ request : pytest .FixtureRequest ):
153178 """Run with the V1 Engine."""
154179 print (f"Testing model: { model } ..." )
155180
@@ -174,14 +199,16 @@ def test_lm_eval_accuracy_v1_engine(model, monkeypatch: pytest.MonkeyPatch, requ
174199
175200 print (f"common args: { more_args } " )
176201
177- run_test (model , expected_values_data , expected_json_filepath , more_args )
202+ run_test (model , expected_values_data , expected_json_filepath ,
203+ more_args )
178204
179205
180206@pytest .mark .skipif (not current_platform .is_cuda ()
181207 and not current_platform .is_tpu (),
182208 reason = "V1 is currently only supported on CUDA and TPU" )
183209def test_lm_eval_accuracy_v1_engine_fp8_kv_cache (
184- fp8_kv_model , monkeypatch : pytest .MonkeyPatch , request : pytest .FixtureRequest ):
210+ fp8_kv_model , monkeypatch : pytest .MonkeyPatch ,
211+ request : pytest .FixtureRequest ):
185212 """Run with the V1 Engine."""
186213 print (f"Testing fp8_kv_model: { fp8_kv_model } ..." )
187214
@@ -206,4 +233,5 @@ def test_lm_eval_accuracy_v1_engine_fp8_kv_cache(
206233
207234 print (f"common args: { more_args } " )
208235
209- run_test (fp8_kv_model , expected_values_data , expected_json_filepath , more_args )
236+ run_test (fp8_kv_model , expected_values_data , expected_json_filepath ,
237+ more_args )
0 commit comments