Skip to content

Commit 354791d

Browse files
authored
Add the export model process in mlperf codes (#1602)
Signed-off-by: YIYANGCAI <[email protected]>
1 parent e22c61e commit 354791d

File tree

2 files changed

+11
-9
lines changed

2 files changed

+11
-9
lines changed

examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_gptj_mlperf_int4.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,7 @@ def forward(self, *inp, **kwargs):
260260
parser.add_argument('--use_max_length', action='store_true',
261261
help='Only select data whose length equals or more than model.seqlen, please refer to GPTQ original implementation'
262262
)
263+
parser.add_argument('--benchmark', action='store_true', help='Whether to do benchmark on CNN datasets.')
263264

264265
# load the gptj model
265266
args = parser.parse_args()
@@ -324,12 +325,13 @@ def forward(self, *inp, **kwargs):
324325

325326
q_model = quantization.fit(model, conf, calib_dataloader=dataloader,)
326327

327-
q_model.save("./gptj-gptq-gs128-calib128-calibration-fp16/")
328+
# q_model.save("./gptj-gptq-gs128-calib128-calibration-fp16/")
328329
# q_model.float()
329330
# q_model.save("./gptj-gptq-gs128-calib128-calibration-fp32/")
331+
compressed_model = q_model.export_compressed_model()
332+
torch.save(compressed_model.state_dict(), "gptj_w3g128_compressed_model.pt")
330333
# benchmarking first 100 examples
331-
# if args.benchmark:
332-
if True:
334+
if args.benchmark:
333335
# use half to accerlerate inference
334336
model.half()
335337
model = model.to(DEV)

examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_gptj_mlperf_int4.sh

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,15 @@ CALIBRATION_DATA=/your/data/calibration-data/cnn_dailymail_calibration.json
22
VALIDATION_DATA=/your/data/validation-data/cnn_dailymail_validation.json
33
MODEL_DIR=/your/gptj/
44

5-
python -u examples/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_weight_only/run_gptj_mlperf_int4.py \
5+
python -u examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_gptj_mlperf_int4.py \
66
--model_name_or_path ${MODEL_DIR} \
7-
--wbits 4 \
7+
--wbits 3 \
88
--sym \
9-
--group_size -1 \
10-
--nsamples 128 \
9+
--group_size 128 \
10+
--nsamples 256 \
1111
--calib-data-path ${CALIBRATION_DATA} \
1212
--val-data-path ${VALIDATION_DATA} \
13-
--calib-iters 128 \
13+
--calib-iters 256 \
1414
--use_max_length \
1515
--pad_max_length 2048 \
16-
--use_gpu
16+
--use_gpu

0 commit comments

Comments
 (0)