We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent f6721e8 commit 266af2dCopy full SHA for 266af2d
tests/basic_correctness/test_cpu_offload.py
@@ -1,8 +1,13 @@
1
+from vllm.utils import is_hip
2
+
3
from ..utils import compare_two_settings
4
5
6
def test_cpu_offload():
7
compare_two_settings("meta-llama/Llama-2-7b-hf", [],
8
["--cpu-offload-gb", "4"])
- compare_two_settings("nm-testing/llama7b-one-shot-2_4-w4a16-marlin24-t",
- [], ["--cpu-offload-gb", "1"])
9
+ if not is_hip():
10
+ # compressed-tensors quantization is currently not supported in ROCm.
11
+ compare_two_settings(
12
+ "nm-testing/llama7b-one-shot-2_4-w4a16-marlin24-t", [],
13
+ ["--cpu-offload-gb", "1"])
0 commit comments