Skip to content

Commit 39ba6dc

Browse files
committed
Remove V1 batching tests
Signed-off-by: Iman Tabrizian <[email protected]>
1 parent 6567453 commit 39ba6dc

File tree

1 file changed

+13
-13
lines changed

1 file changed

+13
-13
lines changed

tests/integration/defs/triton_server/test_triton_llm.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def stop_triton_server():
3737
@pytest.mark.parametrize("KV_CACHE_FREE_GPU_MEM_FRACTION", [""])
3838
@pytest.mark.parametrize("ENABLE_TRT_OVERLAP", ["False"],
3939
ids=["disableTrtOverlap"])
40-
@pytest.mark.parametrize("BATCHING_STRATEGY", ["inflight_fused_batching", "V1"])
40+
@pytest.mark.parametrize("BATCHING_STRATEGY", ["inflight_fused_batching"])
4141
@pytest.mark.parametrize("DECOUPLED_MODE", ["True", "False"],
4242
ids=["enableDecoupleMode", "disableDecoupleMode"])
4343
@pytest.mark.parametrize("TRITON_MAX_BATCH_SIZE", ["128"])
@@ -170,7 +170,7 @@ def test_llama_v2_7b_ifb(
170170
@pytest.mark.parametrize("KV_CACHE_FREE_GPU_MEM_FRACTION", [""])
171171
@pytest.mark.parametrize("ENABLE_TRT_OVERLAP", ["False"],
172172
ids=["disableTrtOverlap"])
173-
@pytest.mark.parametrize("BATCHING_STRATEGY", ["inflight_fused_batching", "V1"])
173+
@pytest.mark.parametrize("BATCHING_STRATEGY", ["inflight_fused_batching"])
174174
@pytest.mark.parametrize("DECOUPLED_MODE", ["True", "False"],
175175
ids=["enableDecoupleMode", "disableDecoupleMode"])
176176
@pytest.mark.parametrize("TRITON_MAX_BATCH_SIZE", ["128"])
@@ -287,7 +287,7 @@ def test_mistral_v1_7b_ifb(
287287
@pytest.mark.parametrize("KV_CACHE_FREE_GPU_MEM_FRACTION", [""])
288288
@pytest.mark.parametrize("ENABLE_TRT_OVERLAP", ["False"],
289289
ids=["disableTrtOverlap"])
290-
@pytest.mark.parametrize("BATCHING_STRATEGY", ["inflight_fused_batching", "V1"])
290+
@pytest.mark.parametrize("BATCHING_STRATEGY", ["inflight_fused_batching"])
291291
@pytest.mark.parametrize("DECOUPLED_MODE", ["True", "False"],
292292
ids=["enableDecoupleMode", "disableDecoupleMode"])
293293
@pytest.mark.parametrize("TRITON_MAX_BATCH_SIZE", ["128"])
@@ -474,7 +474,7 @@ def test_mistral_v1_7b_python_backend(
474474
@pytest.mark.parametrize("KV_CACHE_FREE_GPU_MEM_FRACTION", [""])
475475
@pytest.mark.parametrize("ENABLE_TRT_OVERLAP", ["False"],
476476
ids=["disableTrtOverlap"])
477-
@pytest.mark.parametrize("BATCHING_STRATEGY", ["inflight_fused_batching", "V1"])
477+
@pytest.mark.parametrize("BATCHING_STRATEGY", ["inflight_fused_batching"])
478478
@pytest.mark.parametrize("DECOUPLED_MODE", ["True", "False"],
479479
ids=["enableDecoupleMode", "disableDecoupleMode"])
480480
@pytest.mark.parametrize("TRITON_MAX_BATCH_SIZE", ["128"])
@@ -592,7 +592,7 @@ def test_llama_v2_70b_ifb(
592592
@pytest.mark.parametrize("KV_CACHE_FREE_GPU_MEM_FRACTION", [""])
593593
@pytest.mark.parametrize("ENABLE_TRT_OVERLAP", ["False"],
594594
ids=["disableTrtOverlap"])
595-
@pytest.mark.parametrize("BATCHING_STRATEGY", ["inflight_fused_batching", "V1"])
595+
@pytest.mark.parametrize("BATCHING_STRATEGY", ["inflight_fused_batching"])
596596
@pytest.mark.parametrize("DECOUPLED_MODE", ["True", "False"],
597597
ids=["enableDecoupleMode", "disableDecoupleMode"])
598598
@pytest.mark.parametrize("TRITON_MAX_BATCH_SIZE", ["128"])
@@ -1043,7 +1043,7 @@ def test_gpt_350m_python_backend(
10431043
@pytest.mark.parametrize("KV_CACHE_FREE_GPU_MEM_FRACTION", [""])
10441044
@pytest.mark.parametrize("ENABLE_TRT_OVERLAP", ["False"],
10451045
ids=["disableTrtOverlap"])
1046-
@pytest.mark.parametrize("BATCHING_STRATEGY", ["inflight_fused_batching", "V1"])
1046+
@pytest.mark.parametrize("BATCHING_STRATEGY", ["inflight_fused_batching"])
10471047
@pytest.mark.parametrize("DECOUPLED_MODE", ["True", "False"],
10481048
ids=["enableDecoupleMode", "disableDecoupleMode"])
10491049
@pytest.mark.parametrize("TRITON_MAX_BATCH_SIZE", ["128"])
@@ -1445,7 +1445,7 @@ def test_whisper_large_v3_ifb(
14451445
@pytest.mark.parametrize("KV_CACHE_FREE_GPU_MEM_FRACTION", ["0.2"])
14461446
@pytest.mark.parametrize("ENABLE_TRT_OVERLAP", ["False"],
14471447
ids=["disableTrtOverlap"])
1448-
@pytest.mark.parametrize("BATCHING_STRATEGY", ["inflight_fused_batching", "V1"])
1448+
@pytest.mark.parametrize("BATCHING_STRATEGY", ["inflight_fused_batching"])
14491449
@pytest.mark.parametrize("DECOUPLED_MODE", ["False"],
14501450
ids=["disableDecoupleMode"])
14511451
@pytest.mark.parametrize("TRITON_MAX_BATCH_SIZE", ["128"])
@@ -1569,7 +1569,7 @@ def test_gpt_gather_logits_ifb(
15691569
@pytest.mark.parametrize("KV_CACHE_FREE_GPU_MEM_FRACTION", ["0.2"])
15701570
@pytest.mark.parametrize("ENABLE_TRT_OVERLAP", ["False"],
15711571
ids=["disableTrtOverlap"])
1572-
@pytest.mark.parametrize("BATCHING_STRATEGY", ["inflight_fused_batching", "V1"])
1572+
@pytest.mark.parametrize("BATCHING_STRATEGY", ["inflight_fused_batching"])
15731573
@pytest.mark.parametrize("DECOUPLED_MODE", ["False"],
15741574
ids=["disableDecoupleMode"])
15751575
@pytest.mark.parametrize("TRITON_MAX_BATCH_SIZE", ["128"])
@@ -1759,7 +1759,7 @@ def test_gpt_350m_speculative_decoding(
17591759
@pytest.mark.parametrize("KV_CACHE_FREE_GPU_MEM_FRACTION", ["0.2"])
17601760
@pytest.mark.parametrize("ENABLE_TRT_OVERLAP", ["False"],
17611761
ids=["disableTrtOverlap"])
1762-
@pytest.mark.parametrize("BATCHING_STRATEGY", ["inflight_fused_batching", "V1"])
1762+
@pytest.mark.parametrize("BATCHING_STRATEGY", ["inflight_fused_batching"])
17631763
@pytest.mark.parametrize("DECOUPLED_MODE", ["False"],
17641764
ids=["disableDecoupleMode"])
17651765
@pytest.mark.parametrize("TRITON_MAX_BATCH_SIZE", ["128"])
@@ -1950,7 +1950,7 @@ def test_gpt_350m_speculative_decoding_return_logits(
19501950
@pytest.mark.parametrize("KV_CACHE_FREE_GPU_MEM_FRACTION", ["0.2"])
19511951
@pytest.mark.parametrize("ENABLE_TRT_OVERLAP", ["False"],
19521952
ids=["disableTrtOverlap"])
1953-
@pytest.mark.parametrize("BATCHING_STRATEGY", ["inflight_fused_batching", "V1"])
1953+
@pytest.mark.parametrize("BATCHING_STRATEGY", ["inflight_fused_batching"])
19541954
@pytest.mark.parametrize("DECOUPLED_MODE", ["False"],
19551955
ids=["disableDecoupleMode"])
19561956
@pytest.mark.parametrize("TRITON_MAX_BATCH_SIZE", ["128"])
@@ -2104,7 +2104,7 @@ def test_gpt_speculative_decoding_bls(
21042104
@pytest.mark.parametrize("KV_CACHE_FREE_GPU_MEM_FRACTION", ["0.2"])
21052105
@pytest.mark.parametrize("ENABLE_TRT_OVERLAP", ["False"],
21062106
ids=["disableTrtOverlap"])
2107-
@pytest.mark.parametrize("BATCHING_STRATEGY", ["inflight_fused_batching", "V1"])
2107+
@pytest.mark.parametrize("BATCHING_STRATEGY", ["inflight_fused_batching"])
21082108
@pytest.mark.parametrize("DECOUPLED_MODE", ["False"],
21092109
ids=["disableDecoupleMode"])
21102110
@pytest.mark.parametrize("TRITON_MAX_BATCH_SIZE", ["128"])
@@ -2278,7 +2278,7 @@ def test_llama_v3_speculative_decoding_bls(
22782278
@pytest.mark.parametrize("KV_CACHE_FREE_GPU_MEM_FRACTION", [""])
22792279
@pytest.mark.parametrize("ENABLE_TRT_OVERLAP", ["False"],
22802280
ids=["disableTrtOverlap"])
2281-
@pytest.mark.parametrize("BATCHING_STRATEGY", ["inflight_fused_batching", "V1"])
2281+
@pytest.mark.parametrize("BATCHING_STRATEGY", ["inflight_fused_batching"])
22822282
@pytest.mark.parametrize("DECOUPLED_MODE", ["True", "False"],
22832283
ids=["enableDecoupleMode", "disableDecoupleMode"])
22842284
@pytest.mark.parametrize("TRITON_MAX_BATCH_SIZE", ["128"])
@@ -2394,7 +2394,7 @@ def test_gpt_175b_dummyWeights_ifb(
23942394
@pytest.mark.parametrize("KV_CACHE_FREE_GPU_MEM_FRACTION", ["0.7"])
23952395
@pytest.mark.parametrize("ENABLE_TRT_OVERLAP", ["False"],
23962396
ids=["disableTrtOverlap"])
2397-
@pytest.mark.parametrize("BATCHING_STRATEGY", ["inflight_fused_batching", "V1"])
2397+
@pytest.mark.parametrize("BATCHING_STRATEGY", ["inflight_fused_batching"])
23982398
@pytest.mark.parametrize("DECOUPLED_MODE", ["True", "False"],
23992399
ids=["enableDecoupleMode", "disableDecoupleMode"])
24002400
@pytest.mark.parametrize("TRITON_MAX_BATCH_SIZE", ["128"])

0 commit comments

Comments
 (0)