@@ -279,6 +279,20 @@ jobs:
279279 python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et
280280 cat ./output_et
281281
282+ echo "******************************************"
283+ echo "**** Emb 4bit: channel-wise quantized ****"
284+ echo "******************************************"
285+ python export.py --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
286+ python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et
287+ cat ./output_et
288+
289+ echo "******************************************"
290+ echo "****** Emb 4bit: group-wise quantized ****"
291+ echo "******************************************"
292+ python export.py --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
293+ python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et
294+ cat ./output_et
295+
282296 echo "******************************************"
283297 echo "******* INT8 channel-wise quantized ******"
284298 echo "******************************************"
@@ -300,6 +314,20 @@ jobs:
300314 python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et
301315 # cat ./output_et
302316
317+ echo "******************************************"
318+ echo "******** INT4 group-wise quantized *******"
319+ echo "******************************************"
320+ # python export.py --quant '{"linear:int4" : {"groupsize": 32}}' --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
321+ # python generate.py --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et
322+ # cat ./output_et
323+
324+ echo "******************************************"
325+ echo "******** HQQ group-wise quantized *******"
326+ echo "******************************************"
327+ # python export.py --quant '{"linear:hqq" : {"groupsize": 32}}' --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
328+ # python generate.py --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte > ./output_et
329+ # cat ./output_et
330+
303331 echo "tests complete"
304332 echo "******************************************"
305333
0 commit comments