|
14 | 14 | # limitations under the License. |
15 | 15 | import pytest |
16 | 16 |
|
17 | | -from tensorrt_llm.llmapi import LLM |
| 17 | +from tensorrt_llm.llmapi import LLM, EagleDecodingConfig |
18 | 18 | from tensorrt_llm.models.modeling_utils import QuantConfig |
19 | 19 | from tensorrt_llm.quantization import QuantAlgo |
20 | 20 |
|
@@ -290,3 +290,50 @@ def test_fp8_kvcache(self): |
290 | 290 | extra_evaluator_kwargs=self.EXTRA_EVALUATOR_KWARGS) |
291 | 291 | task = MMLU(self.MODEL_NAME) |
292 | 292 | task.evaluate(llm) |
| 293 | + |
| 294 | + |
| 295 | +class TestEagleVicuna_7B_v1_3(LlmapiAccuracyTestHarness): |
| 296 | + MODEL_NAME = "lmsys/vicuna-7b-v1.3" |
| 297 | + MODEL_PATH = f"{llm_models_root()}/vicuna-7b-v1.3" |
| 298 | + |
| 299 | + speculative_config = EagleDecodingConfig( |
| 300 | + max_draft_len=63, |
| 301 | + speculative_model=f"{llm_models_root()}/EAGLE-Vicuna-7B-v1.3", |
| 302 | + num_eagle_layers=4, |
| 303 | + max_non_leaves_per_layer=10, |
| 304 | + eagle_choices=[[0], [0, 0], [1], [0, 1], [2], [0, 0, 0], [1, 0], [0, 2], [3], [0, 3], [4], [0, 4], [2, 0], \ |
| 305 | + [0, 5], [0, 0, 1], [5], [0, 6], [6], [0, 7], [0, 1, 0], [1, 1], [7], [0, 8], [0, 0, 2], [3, 0], \ |
| 306 | + [0, 9], [8], [9], [1, 0, 0], [0, 2, 0], [1, 2], [0, 0, 3], [4, 0], [2, 1], [0, 0, 4], [0, 0, 5], \ |
| 307 | + [0, 0, 0, 0], [0, 1, 1], [0, 0, 6], [0, 3, 0], [5, 0], [1, 3], [0, 0, 7], [0, 0, 8], [0, 0, 9], \ |
| 308 | + [6, 0], [0, 4, 0], [1, 4], [7, 0], [0, 1, 2], [2, 0, 0], [3, 1], [2, 2], [8, 0], \ |
| 309 | + [0, 5, 0], [1, 5], [1, 0, 1], [0, 2, 1], [9, 0], [0, 6, 0], [0, 0, 0, 1], [1, 6], [0, 7, 0]] |
| 310 | + ) |
| 311 | + |
| 312 | + def test_auto_dtype(self): |
| 313 | + with LLM( |
| 314 | + self.MODEL_PATH, |
| 315 | + max_batch_size=8, # Spec-dec use case less than bs=8 |
| 316 | + speculative_config=self.speculative_config) as llm: |
| 317 | + task = CnnDailymail(self.MODEL_NAME) |
| 318 | + task.evaluate(llm) |
| 319 | + |
| 320 | + |
| 321 | +class TestEagle2Vicuna_7B_v1_3(LlmapiAccuracyTestHarness): |
| 322 | + MODEL_NAME = "lmsys/vicuna-7b-v1.3" |
| 323 | + MODEL_PATH = f"{llm_models_root()}/vicuna-7b-v1.3" |
| 324 | + |
| 325 | + speculative_config = EagleDecodingConfig( |
| 326 | + max_draft_len=63, |
| 327 | + speculative_model=f"{llm_models_root()}/EAGLE-Vicuna-7B-v1.3", |
| 328 | + num_eagle_layers=4, |
| 329 | + max_non_leaves_per_layer=10, |
| 330 | + use_dynamic_tree=True, |
| 331 | + dynamic_tree_max_topK=10) |
| 332 | + |
| 333 | + def test_auto_dtype(self): |
| 334 | + with LLM( |
| 335 | + self.MODEL_PATH, |
| 336 | + max_batch_size=8, # Spec-dec use case less than bs=8 |
| 337 | + speculative_config=self.speculative_config) as llm: |
| 338 | + task = CnnDailymail(self.MODEL_NAME) |
| 339 | + task.evaluate(llm) |
0 commit comments