@@ -1432,17 +1432,18 @@ def load_vocab(
14321432 return vocab , special_vocab
14331433
14341434
1435- def default_outfile (model_paths : list [Path ], file_type : GGMLFileType ) -> Path :
1435+ def default_output_file (model_paths : list [Path ], file_type : GGMLFileType ) -> Path :
14361436 namestr = {
1437- GGMLFileType .AllF32 : "f32" ,
1437+ GGMLFileType .AllF32 : "f32" ,
14381438 GGMLFileType .MostlyF16 : "f16" ,
1439- GGMLFileType .MostlyQ8_0 :"q8_0" ,
1439+ GGMLFileType .MostlyQ8_0 : "q8_0" ,
14401440 }[file_type ]
14411441 ret = model_paths [0 ].parent / f"ggml-model-{ namestr } .gguf"
14421442 if ret in model_paths :
14431443 sys .stderr .write (
14441444 f"Error: Default output path ({ ret } ) would overwrite the input. "
1445- "Please explicitly specify a path using --outfile.\n " )
1445+ "Please explicitly specify a path using --out-file.\n "
1446+ )
14461447 sys .exit (1 )
14471448 return ret
14481449
@@ -1452,29 +1453,107 @@ def do_dump_model(model_plus: ModelPlus) -> None:
14521453 print (f"model_plus.format = { model_plus .format !r} " )
14531454 print (f"model_plus.vocab = { model_plus .vocab !r} " )
14541455 for name , lazy_tensor in model_plus .model .items ():
1455- print (f"{ name } : shape={ lazy_tensor .shape } type={ lazy_tensor .data_type } ; { lazy_tensor .description } " )
1456+ print (
1457+ f"{ name } : shape={ lazy_tensor .shape } type={ lazy_tensor .data_type } ; { lazy_tensor .description } "
1458+ )
14561459
14571460
1458- def main ( args_in : list [ str ] | None = None ) -> None :
1461+ def get_argument_parser ( ) -> ArgumentParser :
14591462 output_choices = ["f32" , "f16" ]
14601463 if np .uint32 (1 ) == np .uint32 (1 ).newbyteorder ("<" ):
14611464 # We currently only support Q8_0 output on little endian systems.
14621465 output_choices .append ("q8_0" )
1463- parser = argparse .ArgumentParser (description = "Convert a LLaMa model to a GGML compatible file" )
1464- parser .add_argument ("--awq-path" , type = Path , help = "Path to scale awq cache file" , default = None )
1465- parser .add_argument ("--dump" , action = "store_true" , help = "don't convert, just show what's in the model" )
1466- parser .add_argument ("--dump-single" , action = "store_true" , help = "don't convert, just show what's in a single model file" )
1467- parser .add_argument ("--vocab-only" , action = "store_true" , help = "extract only the vocab" )
1468- parser .add_argument ("--outtype" , choices = output_choices , help = "output format - note: q8_0 may be very slow (default: f16 or f32 based on input)" )
1469- parser .add_argument ("--vocab-dir" , type = Path , help = "directory containing tokenizer.model, if separate from model file" )
1470- parser .add_argument ("--outfile" , type = Path , help = "path to write to; default: based on input" )
1471- parser .add_argument ("model" , type = Path , help = "directory containing model file, or model file itself (*.pth, *.pt, *.bin)" )
1472- parser .add_argument ("--ctx" , type = int , help = "model training context (default: based on input)" )
1473- parser .add_argument ("--concurrency" , type = int , help = f"concurrency used for conversion (default: { DEFAULT_CONCURRENCY } )" , default = DEFAULT_CONCURRENCY )
1474- parser .add_argument ("--bigendian" , action = "store_true" , help = "model is executed on big endian machine" )
1475- parser .add_argument ("--padvocab" , action = "store_true" , help = "add pad tokens when model vocab expects more than tokenizer metadata provides" )
1476-
1477- args = parser .parse_args (args_in )
1466+
1467+ parser = argparse .ArgumentParser (
1468+ description = "Convert a LLaMa model to a GGML compatible file"
1469+ )
1470+
1471+ parser .add_argument (
1472+ "model" ,
1473+ type = Path ,
1474+ help = "Directory containing the model file or the model file itself (*.pth, *.pt, *.bin)" ,
1475+ )
1476+
1477+ parser .add_argument (
1478+ "--awq-path" ,
1479+ type = Path ,
1480+ help = "Path to the Activation-aware Weight Quantization cache file" ,
1481+ default = None ,
1482+ )
1483+
1484+ parser .add_argument (
1485+ "--dump" ,
1486+ action = "store_true" ,
1487+ help = "Display the model content without converting it" ,
1488+ )
1489+
1490+ parser .add_argument (
1491+ "--dump-single" ,
1492+ action = "store_true" ,
1493+ help = "Display the content of a single model file without conversion" ,
1494+ )
1495+
1496+ parser .add_argument (
1497+ "--vocab-only" ,
1498+ action = "store_true" ,
1499+ help = "Extract and output only the vocabulary" ,
1500+ )
1501+
1502+ parser .add_argument (
1503+ "--out-type" ,
1504+ choices = output_choices ,
1505+ help = "Output format - note: q8_0 may be very slow (default: f16 or f32 based on input)" ,
1506+ )
1507+
1508+ parser .add_argument (
1509+ "--vocab-dir" ,
1510+ type = Path ,
1511+ help = "Directory containing the tokenizer.model, if separate from the model file" ,
1512+ )
1513+
1514+ parser .add_argument (
1515+ "--vocab-type" ,
1516+ choices = ["spm" , "bpe" , "hfft" ], # hfft: Hugging Face Fast Tokenizer
1517+ default = "spm" ,
1518+ help = "The vocabulary format used to define the tokenizer model (default: spm)" ,
1519+ )
1520+
1521+ parser .add_argument (
1522+ "--pad-vocab" ,
1523+ action = "store_true" ,
1524+ help = "Add padding tokens when the model's vocabulary size exceeds the tokenizer metadata" ,
1525+ )
1526+
1527+ parser .add_argument (
1528+ "--out-file" ,
1529+ type = Path ,
1530+ help = "Specify the path for the output file (default is based on input)" ,
1531+ )
1532+
1533+ parser .add_argument (
1534+ "--ctx" , type = int , help = "Model training context (default is based on input)"
1535+ )
1536+
1537+ parser .add_argument (
1538+ "--concurrency" ,
1539+ type = int ,
1540+ help = f"Concurrency used for conversion (default: { DEFAULT_CONCURRENCY } )" ,
1541+ default = DEFAULT_CONCURRENCY ,
1542+ )
1543+
1544+ parser .add_argument (
1545+ "--big-endian" ,
1546+ action = "store_true" ,
1547+ help = "Indicate that the model is executed on a big-endian machine" ,
1548+ )
1549+
1550+ return parser
1551+
1552+
1553+ def main (argv : Optional [list [str ]] = None ) -> None :
1554+ parser = get_argument_parser ()
1555+ args = parser .parse_args (argv )
1556+
14781557 if args .awq_path :
14791558 sys .path .insert (1 , str (Path (__file__ ).parent / 'awq-py' ))
14801559 from awq .apply_awq import add_scale_weights
0 commit comments