Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions tests/e2e/vLLM/configs/fp8_dynamic_per_token_qwen.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
cadence: "nightly"
test_type: "regression"
model: Qwen/Qwen2.5-0.5B
scheme: FP8_DYNAMIC
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
cadence: "nightly"
test_type: "regression"
model: Qwen/Qwen2.5-0.5B
recipe: tests/e2e/vLLM/recipes/INT8/recipe_int8_tensor_weight_static_per_tensor_act.yaml
dataset_id: garage-bAInd/Open-Platypus
dataset_split: train
scheme: W8A8_tensor_weight_static_per_tensor_act
8 changes: 8 additions & 0 deletions tests/e2e/vLLM/configs/w4a16_actorder_group_qwen.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
cadence: "nightly"
test_type: "regression"
model: Qwen/Qwen2.5-0.5B
recipe: tests/e2e/vLLM/recipes/actorder/recipe_w4a16_actorder_group.yaml
dataset_id: neuralmagic/LLM_compression_calibration
dataset_split: train
scheme: W4A16_actorder_group
save_dir: Qwen2.5-0.5B-actorder-group
8 changes: 8 additions & 0 deletions tests/e2e/vLLM/configs/w4a16_actorder_weight_qwen.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
cadence: "nightly"
test_type: "regression"
model: Qwen/Qwen2.5-0.5B
recipe: tests/e2e/vLLM/recipes/actorder/recipe_w4a16_actorder_weight.yaml
dataset_id: Open-Orca/slimorca-deduped-cleaned-corrected
dataset_split: train
scheme: W4A16_actorder_weight
save_dir: Qwen2.5-0.5B-actorder-weight
7 changes: 7 additions & 0 deletions tests/e2e/vLLM/configs/w4a16_channel_quant_qwen.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
cadence: "nightly"
test_type: "regression"
model: Qwen/Qwen2.5-0.5B
scheme: W4A16_channel
dataset_id: Open-Orca/slimorca-deduped-cleaned-corrected
dataset_split: train
recipe: tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_channel_quant.yaml
27 changes: 15 additions & 12 deletions tests/e2e/vLLM/test_vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
HF_MODEL_HUB_NAME = "nm-testing"

TEST_DATA_FILE = os.environ.get("TEST_DATA_FILE", "")
SKIP_HF_UPLOAD = os.environ.get("SKIP_HF_UPLOAD", "")

EXPECTED_SAVED_FILES = [
"config.json",
Expand Down Expand Up @@ -128,21 +129,23 @@ def test_vllm(self):
fp.write(recipe_yaml_str)
session.reset()

logger.info("================= UPLOADING TO HUB ======================")
if SKIP_HF_UPLOAD.lower() != "yes":

stub = f"{HF_MODEL_HUB_NAME}/{self.save_dir}-e2e"
logger.info("================= UPLOADING TO HUB ======================")

self.api.create_repo(
repo_id=stub,
exist_ok=True,
repo_type="model",
private=False,
)
stub = f"{HF_MODEL_HUB_NAME}/{self.save_dir}-e2e"

self.api.upload_folder(
repo_id=stub,
folder_path=self.save_dir,
)
self.api.create_repo(
repo_id=stub,
exist_ok=True,
repo_type="model",
private=False,
)

self.api.upload_folder(
repo_id=stub,
folder_path=self.save_dir,
)

logger.info("================= RUNNING vLLM =========================")

Expand Down
69 changes: 67 additions & 2 deletions tests/testing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,8 @@ def preprocess_tokenize_dataset(
:param tokenizer: tokenizer to be used for tokenization
:param max_seq_length: maximum sequence length of samples
"""
if ds.info.dataset_name == "gsm8k":
ds_name = ds.info.dataset_name.lower()
if ds_name == "gsm8k":

def preprocess(example):
return example
Expand All @@ -148,7 +149,8 @@ def tokenize(sample):
truncation=True,
add_special_tokens=False,
)
elif ds.info.dataset_name == "ultrachat_200k":

elif ds_name == "ultrachat_200k":

def preprocess(example):
return {
Expand All @@ -166,6 +168,69 @@ def tokenize(sample):
truncation=True,
add_special_tokens=False,
)

elif ds_name == "llm_compression_calibration":

def preprocess(example):
return {
"text": tokenizer.apply_chat_template(
example["text"],
tokenize=False,
)
}

def tokenize(sample):
return tokenizer(
sample["text"],
padding=False,
max_length=max_seq_length,
truncation=True,
add_special_tokens=False,
)

elif ds_name == "open-platypus":
# use the output rather than the instruction
def preprocess(example):
return {
"text": tokenizer.apply_chat_template(
example["output"],
tokenize=False,
)
}

def tokenize(sample):
return tokenizer(
sample["text"],
padding=False,
max_length=max_seq_length,
truncation=True,
add_special_tokens=False,
)

elif ds_name == "slimorca-deduped-cleaned-corrected":
# find the first element corresponding to a message from a human
def preprocess(example):
conversation_idx = 0
for idx, conversation in enumerate(example["conversations"]):
if conversation["from"] == "human":
conversation_idx = idx
break
return {
"text": tokenizer.apply_chat_template(
example["conversations"][conversation_idx]["value"],
tokenize=False,
)
}

def tokenize(sample):
return tokenizer(
sample["text"],
padding=False,
max_length=max_seq_length,
truncation=True,
add_special_tokens=False,
)

else:
raise NotImplementedError(f"Cannot preprocess dataset {ds.info.dataset_name}")

Expand Down