|
6 | 6 |
|
7 | 7 | # isort: off
|
8 | 8 | from .lora_test_utils import check_llama_7b_multi_unique_lora_adapters_from_request
|
9 |
| -from .test_llm import ( |
10 |
| - get_model_path, global_kvcache_config, llama_model_path, |
11 |
| - llm_get_stats_async_test_harness, llm_get_stats_test_harness, prompts, |
12 |
| - run_llm_abort_request, run_llm_with_postprocess_parallel_and_result_handler, |
13 |
| - tinyllama_logits_processor_test_harness, _test_llm_capture_request_error) |
| 9 | +from .test_llm import (get_model_path, global_kvcache_config, llama_model_path, |
| 10 | + llm_get_stats_async_test_harness, |
| 11 | + llm_get_stats_test_harness, prompts, |
| 12 | + run_llm_abort_request, |
| 13 | + run_llm_with_postprocess_parallel_and_result_handler, |
| 14 | + tinyllama_logits_processor_test_harness) |
14 | 15 | from utils.util import (EnvVarsContextManager, force_ampere,
|
15 | 16 | run_function_in_sub_process, similar,
|
16 | 17 | skip_gpu_memory_less_than_40gb,
|
@@ -69,10 +70,6 @@ def test_llm_get_stats_async(return_context_logits, use_overlap,
|
69 | 70 | enable_iter_req_stats=enable_iter_req_stats)
|
70 | 71 |
|
71 | 72 |
|
72 |
| -def test_llm_capture_request_error(): |
73 |
| - _test_llm_capture_request_error(pytorch_backend=True, tp_size=1) |
74 |
| - |
75 |
| - |
76 | 73 | @force_ampere
|
77 | 74 | @pytest.mark.parametrize(
|
78 | 75 | "sampling_params",
|
|
0 commit comments