Skip to content

Commit e90280a

Browse files
[TRTLLM-6541][test] Add NIM Related Cases [StarCoder2_7B] and [Codestral_22B_V01] (#6939)
Signed-off-by: FredricZ-2007 <[email protected]>
1 parent 816a120 commit e90280a

File tree

6 files changed

+91
-0
lines changed

6 files changed

+91
-0
lines changed

tests/integration/defs/accuracy/references/cnn_dailymail.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,14 @@ microsoft/Phi-3.5-mini-instruct:
4545
- accuracy: 31.354
4646
microsoft/Phi-4-mini-instruct:
4747
- accuracy: 32.921
48+
bigcode/starcoder2-7b:
49+
- accuracy: 26.611
50+
- quant_algo: FP8
51+
accuracy: 26.611
52+
mistralai/Codestral-22B-v0.1:
53+
- accuracy: 30.316
54+
- quant_algo: FP8
55+
accuracy: 30.316
4856
state-spaces/mamba-130m-hf:
4957
- accuracy: 19.470
5058
lmsys/vicuna-7b-v1.3:

tests/integration/defs/accuracy/references/gsm8k.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,8 @@ microsoft/Phi-4-multimodal-instruct-long-rope:
162162
- accuracy: 75.85
163163
microsoft/Phi-4-mini-instruct:
164164
- accuracy: 82.30
165+
mistralai/Codestral-22B-v0.1:
166+
- accuracy: 67.10
165167
GPT-OSS/BF16:
166168
- accuracy: 90.3
167169
GPT-OSS/MXFP4:

tests/integration/defs/accuracy/references/mmlu.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,14 @@ nvidia/Nemotron-H-56B-Base-8K:
232232
accuracy: 83.82
233233
microsoft/Phi-4-mini-instruct:
234234
- accuracy: 68.98
235+
bigcode/starcoder2-7b:
236+
- accuracy: 41.35
237+
- quant_algo: FP8
238+
accuracy: 41.35
239+
mistralai/Codestral-22B-v0.1:
240+
- accuracy: 61.72
241+
- quant_algo: FP8
242+
accuracy: 61.72
235243
# Created a dummy accuracy to track tp_size=2 for phi4-mini model.
236244
# TODO: update once https://nvbugs/5393849 is fixed.
237245
microsoft/Phi-4-mini-instruct-tp2:

tests/integration/defs/accuracy/test_llm_api.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,3 +433,55 @@ def test_auto_dtype(self):
433433
speculative_config=self.speculative_config) as llm:
434434
task = CnnDailymail(self.MODEL_NAME)
435435
task.evaluate(llm)
436+
437+
438+
class TestStarCoder2_7B(LlmapiAccuracyTestHarness):
439+
MODEL_NAME = "bigcode/starcoder2-7b"
440+
MODEL_PATH = f"{llm_models_root()}/starcoder2-7b"
441+
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.6)
442+
443+
@pytest.mark.skip_less_device_memory(70000)
444+
def test_auto_dtype(self):
445+
with LLM(self.MODEL_PATH, kv_cache_config=self.kv_cache_config) as llm:
446+
task = CnnDailymail(self.MODEL_NAME)
447+
task.evaluate(llm)
448+
task = MMLU(self.MODEL_NAME)
449+
task.evaluate(llm)
450+
451+
@skip_pre_ada
452+
@pytest.mark.skip_less_device_memory(70000)
453+
def test_fp8(self):
454+
quant_config = QuantConfig(QuantAlgo.FP8)
455+
with LLM(self.MODEL_PATH,
456+
quant_config=quant_config,
457+
kv_cache_config=self.kv_cache_config) as llm:
458+
task = CnnDailymail(self.MODEL_NAME)
459+
task.evaluate(llm)
460+
task = MMLU(self.MODEL_NAME)
461+
task.evaluate(llm)
462+
463+
464+
class TestCodestral_22B_V01(LlmapiAccuracyTestHarness):
465+
MODEL_NAME = "mistralai/Codestral-22B-v0.1"
466+
MODEL_PATH = f"{llm_models_root()}/Codestral-22B-v0.1"
467+
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.6)
468+
469+
@pytest.mark.skip_less_device_memory(80000)
470+
def test_auto_dtype(self):
471+
with LLM(self.MODEL_PATH, kv_cache_config=self.kv_cache_config) as llm:
472+
task = CnnDailymail(self.MODEL_NAME)
473+
task.evaluate(llm)
474+
task = MMLU(self.MODEL_NAME)
475+
task.evaluate(llm)
476+
477+
@skip_pre_ada
478+
@pytest.mark.skip_less_device_memory(80000)
479+
def test_fp8(self):
480+
quant_config = QuantConfig(QuantAlgo.FP8)
481+
with LLM(self.MODEL_PATH,
482+
quant_config=quant_config,
483+
kv_cache_config=self.kv_cache_config) as llm:
484+
task = CnnDailymail(self.MODEL_NAME)
485+
task.evaluate(llm)
486+
task = MMLU(self.MODEL_NAME)
487+
task.evaluate(llm)

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2438,6 +2438,22 @@ def test_auto_dtype(self):
24382438
task.evaluate(llm)
24392439

24402440

2441+
class TestCodestral_22B_V01(LlmapiAccuracyTestHarness):
2442+
MODEL_NAME = "mistralai/Codestral-22B-v0.1"
2443+
MODEL_PATH = f"{llm_models_root()}/Codestral-22B-v0.1"
2444+
2445+
@pytest.mark.skip_less_device_memory(80000)
2446+
def test_auto_dtype(self):
2447+
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.6)
2448+
with LLM(self.MODEL_PATH, kv_cache_config=kv_cache_config) as llm:
2449+
task = CnnDailymail(self.MODEL_NAME)
2450+
task.evaluate(llm)
2451+
task = MMLU(self.MODEL_NAME)
2452+
task.evaluate(llm)
2453+
task = GSM8K(self.MODEL_NAME)
2454+
task.evaluate(llm)
2455+
2456+
24412457
class TestKanana_Instruct(LlmapiAccuracyTestHarness):
24422458
MODEL_NAME = "kanana-1.5-2.1b-instruct-2505"
24432459
MODEL_PATH = f"{llm_models_root()}/kanana-1.5-2.1b-instruct-2505"

tests/integration/test_lists/qa/llm_function_nim.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,8 @@ accuracy/test_llm_api_pytorch.py::TestNemotronH_56B_Base::test_auto_dtype[tp8-cu
2121
accuracy/test_llm_api_pytorch.py::TestNemotronUltra::test_auto_dtype[tp8ep4-cuda_graph=True]
2222
accuracy/test_llm_api_pytorch.py::TestNemotronUltra::test_fp8_prequantized[tp8ep4-cuda_graph=True]
2323
accuracy/test_llm_api_pytorch.py::TestNemotronUltra::test_fp8_prequantized[tp8-cuda_graph=True]
24+
accuracy/test_llm_api.py::TestStarCoder2_7B::test_auto_dtype
25+
accuracy/test_llm_api.py::TestStarCoder2_7B::test_fp8
26+
accuracy/test_llm_api.py::TestCodestral_22B_V01::test_auto_dtype
27+
accuracy/test_llm_api.py::TestCodestral_22B_V01::test_fp8
28+
accuracy/test_llm_api_pytorch.py::TestCodestral_22B_V01::test_auto_dtype

0 commit comments

Comments
 (0)