1414
1515
1616@pytest .fixture (scope = "module" )
17- def server (zephyr_lora_added_tokens_files : str ): # noqa: F811
17+ def server ():
1818 args = [
1919 # use half precision for speed and memory savings in CI environment
2020 "--dtype" ,
@@ -24,12 +24,6 @@ def server(zephyr_lora_added_tokens_files: str): # noqa: F811
2424 "--enforce-eager" ,
2525 "--max-num-seqs" ,
2626 "128" ,
27- # lora config
28- "--enable-lora" ,
29- "--lora-modules" ,
30- f"zephyr-lora2={ zephyr_lora_added_tokens_files } " ,
31- "--max-lora-rank" ,
32- "64" ,
3327 "--enable-tokenizer-info-endpoint" ,
3428 ]
3529
@@ -38,10 +32,8 @@ def server(zephyr_lora_added_tokens_files: str): # noqa: F811
3832
3933
4034@pytest .fixture (scope = "module" )
41- def tokenizer_name (model_name : str ,
42- zephyr_lora_added_tokens_files : str ): # noqa: F811
43- return zephyr_lora_added_tokens_files if (
44- model_name == "zephyr-lora2" ) else model_name
35+ def tokenizer_name (model_name : str ):
36+ return model_name
4537
4638
4739@pytest_asyncio .fixture
@@ -53,7 +45,7 @@ async def client(server):
5345@pytest .mark .asyncio
5446@pytest .mark .parametrize (
5547 "model_name,tokenizer_name" ,
56- [(MODEL_NAME , MODEL_NAME ), ( "zephyr-lora2" , "zephyr-lora2" ) ],
48+ [(MODEL_NAME , MODEL_NAME )],
5749 indirect = ["tokenizer_name" ],
5850)
5951async def test_tokenize_completions (
@@ -86,7 +78,7 @@ async def test_tokenize_completions(
8678@pytest .mark .asyncio
8779@pytest .mark .parametrize (
8880 "model_name,tokenizer_name" ,
89- [(MODEL_NAME , MODEL_NAME ), ( "zephyr-lora2" , "zephyr-lora2" ) ],
81+ [(MODEL_NAME , MODEL_NAME )],
9082 indirect = ["tokenizer_name" ],
9183)
9284async def test_tokenize_chat (
@@ -148,7 +140,7 @@ async def test_tokenize_chat(
148140@pytest .mark .asyncio
149141@pytest .mark .parametrize (
150142 "model_name,tokenizer_name" ,
151- [(MODEL_NAME , MODEL_NAME ), ( "zephyr-lora2" , "zephyr-lora2" ) ],
143+ [(MODEL_NAME , MODEL_NAME )],
152144 indirect = ["tokenizer_name" ],
153145)
154146async def test_tokenize_chat_with_tools (
@@ -225,7 +217,7 @@ async def test_tokenize_chat_with_tools(
225217@pytest .mark .asyncio
226218@pytest .mark .parametrize (
227219 "model_name, tokenizer_name" ,
228- [(MODEL_NAME , MODEL_NAME ), ( "zephyr-lora2" , "zephyr-lora2" ) ],
220+ [(MODEL_NAME , MODEL_NAME )],
229221 indirect = ["tokenizer_name" ],
230222)
231223async def test_tokenize_with_return_token_strs (
@@ -260,7 +252,7 @@ async def test_tokenize_with_return_token_strs(
260252@pytest .mark .asyncio
261253@pytest .mark .parametrize (
262254 "model_name,tokenizer_name" ,
263- [(MODEL_NAME , MODEL_NAME ), ( "zephyr-lora2" , "zephyr-lora2" ) ],
255+ [(MODEL_NAME , MODEL_NAME )],
264256 indirect = ["tokenizer_name" ],
265257)
266258async def test_detokenize (
@@ -287,7 +279,7 @@ async def test_detokenize(
287279@pytest .mark .asyncio
288280@pytest .mark .parametrize (
289281 "model_name,tokenizer_name" ,
290- [(MODEL_NAME , MODEL_NAME ), ( "zephyr-lora2" , "zephyr-lora2" ) ],
282+ [(MODEL_NAME , MODEL_NAME )],
291283 indirect = ["tokenizer_name" ],
292284)
293285async def test_tokenizer_info_basic (
@@ -384,4 +376,4 @@ async def test_tokenizer_info_chat_template(server: RemoteOpenAIServer):
384376 if chat_template :
385377 assert isinstance (chat_template ,
386378 str ), ("Chat template should be a string" )
387- assert chat_template .strip (), "Chat template should not be empty"
379+ assert chat_template .strip (), "Chat template should not be empty"
0 commit comments