Skip to content

Commit 2913977

Browse files
committed
skip oom test on GH200
Signed-off-by: xinhe-nv <[email protected]>
1 parent de0613b commit 2913977

File tree

3 files changed

+11
-4
lines changed

3 files changed

+11
-4
lines changed

tests/integration/defs/examples/test_commandr.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import pytest
1919
from defs.common import (convert_weights, generate_summary_cmd, venv_check_call,
2020
venv_mpi_check_call)
21+
from defs.conftest import get_gpu_device_list
2122
from defs.trt_test_alternative import check_call
2223

2324

@@ -30,6 +31,9 @@ def test_llm_commandr_v01_single_gpu_summary(commandr_example_root,
3031
llm_venv, cmodel_dir, engine_dir,
3132
use_weight_only):
3233
"Build & run commandr_v01 on single gpu."
34+
if "GH200" in get_gpu_device_list()[0] and not use_weight_only:
35+
pytest.skip("OOM on GH200. https://nvbugs/5250460")
36+
3337
print("Converting checkpoint...")
3438
dtype = 'float16'
3539
model_name = os.path.basename(llm_commandr_v01_model_root)

tests/integration/defs/examples/test_gemma.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,9 @@
1717
import pytest
1818
from defs.common import (generate_summary_cmd, test_multi_lora_support,
1919
venv_check_call)
20-
from defs.conftest import (get_device_memory, skip_fp8_pre_ada,
21-
skip_post_blackwell, skip_pre_hopper)
20+
from defs.conftest import (get_device_memory, get_gpu_device_list,
21+
skip_fp8_pre_ada, skip_post_blackwell,
22+
skip_pre_hopper)
2223
from defs.trt_test_alternative import check_call
2324

2425

@@ -224,6 +225,10 @@ def test_llm_gemma_1gpu_summary(batch_size, data_type, gemma_model_root,
224225
llm_venv, cmodel_dir, engine_dir,
225226
gemma_example_root, llm_datasets_root,
226227
llm_rouge_root, test_case):
228+
if "27b" in gemma_model_root and "GH200" in get_gpu_device_list(
229+
)[0] and "other" in test_case:
230+
pytest.skip("OOM on GH200. https://nvbugs/5250460")
231+
227232
gemma_1gpu_summary(batch_size, data_type, gemma_model_root, llm_venv,
228233
cmodel_dir, engine_dir, gemma_example_root,
229234
llm_datasets_root, llm_rouge_root, test_case)

tests/integration/test_lists/waives.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -418,8 +418,6 @@ accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales_4gpu
418418
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales_4gpus[ep4-mtp_nextn=2-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False] SKIP (https://nvbugs/5239087)
419419
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[-] SKIP (https://nvbugs/5234002)
420420
examples/test_gemma.py::test_llm_hf_gemma_quantization_1gpu[gemma-2-27b-it-fp8-bfloat16-8] SKIP (https://nvbugs/5234164)
421-
full::GH200/examples/test_commandr.py::test_llm_commandr_v01_single_gpu_summary[disable_weight_only] SKIP (https://nvbugs/5250460)
422-
full::GH200/examples/test_gemma.py::test_llm_gemma_1gpu_summary[gemma-2-27b-it-other-bfloat16-8] SKIP (https://nvbugs/5250460)
423421
examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-disable_attention_plugin-disable_context_fmha-tp:1-pp:1-float16-RobertaForSequenceClassification-bert/twitter-roberta-base-emotion] SKIP (https://nvbugs/5234058)
424422
examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-disable_attention_plugin-disable_context_fmha-tp:2-pp:1-float16-RobertaForSequenceClassification-bert/twitter-roberta-base-emotion] SKIP (https://nvbugs/5234058)
425423
examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:2-pp:1-float16-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity] SKIP (https://nvbugs/5234058)

0 commit comments

Comments
 (0)