1919#
2020
2121GGUF_MAGIC = 0x46554747
22- GGUF_VERSION = 2
22+ GGUF_VERSION = 3
2323GGUF_DEFAULT_ALIGNMENT = 32
2424
25+
2526# general
2627KEY_GENERAL_ARCHITECTURE = "general.architecture"
2728KEY_GENERAL_QUANTIZATION_VERSION = "general.quantization_version"
@@ -491,6 +492,10 @@ class GGMLQuantizationType(IntEnum):
491492 Q6_K = 14
492493 Q8_K = 15
493494
495+ class GGUFEndian (IntEnum ):
496+ LITTLE = 0
497+ BIG = 1
498+
494499
495500class GGUFValueType (IntEnum ):
496501 UINT8 = 0
@@ -538,18 +543,41 @@ class GGUFWriter:
538543 temp_file : tempfile .SpooledTemporaryFile [bytes ] | None = None
539544 tensors : list [tuple [np .ndarray [Any , Any ], int ]]
540545
541- def __init__ (self , path : os .PathLike [str ] | str , arch : str , use_temp_file = True ):
546+ @property
547+ def pack_prefix (self ):
548+ if self .endianess == GGUFEndian .LITTLE :
549+ return "<"
550+ else :
551+ return ">"
552+
553+ def __init__ (self , path : os .PathLike [str ] | str , arch : str , use_temp_file = True , endianess = GGUFEndian .LITTLE ):
542554 self .fout = open (path , "wb" )
543555 self .arch = arch
556+ self .endianess = endianess
557+ self ._simple_value_packing = {
558+ GGUFValueType .UINT8 : f"{ self .pack_prefix } B" ,
559+ GGUFValueType .INT8 : f"{ self .pack_prefix } b" ,
560+ GGUFValueType .UINT16 : f"{ self .pack_prefix } H" ,
561+ GGUFValueType .INT16 : f"{ self .pack_prefix } h" ,
562+ GGUFValueType .UINT32 : f"{ self .pack_prefix } I" ,
563+ GGUFValueType .INT32 : f"{ self .pack_prefix } i" ,
564+ GGUFValueType .FLOAT32 : f"{ self .pack_prefix } f" ,
565+ GGUFValueType .UINT64 : f"{ self .pack_prefix } Q" ,
566+ GGUFValueType .INT64 : f"{ self .pack_prefix } q" ,
567+ GGUFValueType .FLOAT64 : f"{ self .pack_prefix } d" ,
568+ GGUFValueType .BOOL : "?" ,
569+ }
544570 self .add_architecture ()
545571 self .use_temp_file = use_temp_file
546572 self .tensors = []
573+ endianess_str = "Big Endian" if self .endianess == GGUFEndian .BIG else "Little Endian"
574+ print (f"This gguf file is for { endianess_str } only" )
547575
548576 def write_header_to_file (self ):
549577 self .fout .write (struct .pack ("<I" , GGUF_MAGIC ))
550- self .fout .write (struct .pack ("< I" , GGUF_VERSION ))
551- self .fout .write (struct .pack ("< Q" , self .ti_data_count ))
552- self .fout .write (struct .pack ("< Q" , self .kv_data_count ))
578+ self .fout .write (struct .pack (f" { self . pack_prefix } I" , GGUF_VERSION ))
579+ self .fout .write (struct .pack (f" { self . pack_prefix } Q" , self .ti_data_count ))
580+ self .fout .write (struct .pack (f" { self . pack_prefix } Q" , self .kv_data_count ))
553581 self .flush ()
554582# print("tensors " + str(self.ti_data_count) + " kv " + str(self.kv_data_count))
555583
@@ -621,40 +649,27 @@ def add_array(self, key: str, val: Sequence[Any]):
621649 self .add_key (key )
622650 self .add_val (val , GGUFValueType .ARRAY )
623651
624- _simple_value_packing = {
625- GGUFValueType .UINT8 : "<B" ,
626- GGUFValueType .INT8 : "<b" ,
627- GGUFValueType .UINT16 : "<H" ,
628- GGUFValueType .INT16 : "<h" ,
629- GGUFValueType .UINT32 : "<I" ,
630- GGUFValueType .INT32 : "<i" ,
631- GGUFValueType .FLOAT32 : "<f" ,
632- GGUFValueType .UINT64 : "<Q" ,
633- GGUFValueType .INT64 : "<q" ,
634- GGUFValueType .FLOAT64 : "<d" ,
635- GGUFValueType .BOOL : "?" ,
636- }
637652 def add_val (self , val : Any , vtype : GGUFValueType | None = None , add_vtype : bool = True ):
638653 if vtype is None :
639654 vtype = GGUFValueType .get_type (val )
640655
641656 if add_vtype :
642- self .kv_data += struct .pack ("< I" , vtype )
657+ self .kv_data += struct .pack (f" { self . pack_prefix } I" , vtype )
643658 self .kv_data_count += 1
644659
645660 pack_fmt = self ._simple_value_packing .get (vtype )
646661 if pack_fmt is not None :
647662 self .kv_data += struct .pack (pack_fmt , val )
648663 elif vtype == GGUFValueType .STRING :
649664 encoded_val = val .encode ("utf8" ) if isinstance (val , str ) else val
650- self .kv_data += struct .pack ("< Q" , len (encoded_val ))
665+ self .kv_data += struct .pack (f" { self . pack_prefix } Q" , len (encoded_val ))
651666 self .kv_data += encoded_val
652667 elif vtype == GGUFValueType .ARRAY and isinstance (val , Sequence ) and len (val ) > 0 :
653668 ltype = GGUFValueType .get_type (val [0 ])
654669 if not all (GGUFValueType .get_type (i ) is ltype for i in val [1 :]):
655670 raise ValueError ("All items in a GGUF array should be of the same type" )
656- self .kv_data += struct .pack ("< I" , ltype )
657- self .kv_data += struct .pack ("< Q" , len (val ))
671+ self .kv_data += struct .pack (f" { self . pack_prefix } I" , ltype )
672+ self .kv_data += struct .pack (f" { self . pack_prefix } Q" , len (val ))
658673 for item in val :
659674 self .add_val (item , add_vtype = False )
660675 else :
@@ -668,22 +683,24 @@ def add_tensor_info(self, name: str, tensor_shape: Sequence[int], tensor_dtype:
668683 assert raw_dtype is not None or tensor_dtype in (np .float32 , np .float16 ), "Only F32 and F16 tensors are supported for now"
669684
670685 encoded_name = name .encode ("utf8" )
671- self .ti_data += struct .pack ("< Q" , len (encoded_name ))
686+ self .ti_data += struct .pack (f" { self . pack_prefix } Q" , len (encoded_name ))
672687 self .ti_data += encoded_name
673688 n_dims = len (tensor_shape )
674- self .ti_data += struct .pack ("< I" , n_dims )
689+ self .ti_data += struct .pack (f" { self . pack_prefix } I" , n_dims )
675690 for i in range (n_dims ):
676- self .ti_data += struct .pack ("< Q" , tensor_shape [n_dims - 1 - i ])
691+ self .ti_data += struct .pack (f" { self . pack_prefix } Q" , tensor_shape [n_dims - 1 - i ])
677692 if raw_dtype is None :
678693 dtype = GGMLQuantizationType .F32 if tensor_dtype == np .float32 else GGMLQuantizationType .F16
679694 else :
680695 dtype = raw_dtype
681- self .ti_data += struct .pack ("< I" , dtype )
682- self .ti_data += struct .pack ("< Q" , self .offset_tensor )
696+ self .ti_data += struct .pack (f" { self . pack_prefix } I" , dtype )
697+ self .ti_data += struct .pack (f" { self . pack_prefix } Q" , self .offset_tensor )
683698 self .offset_tensor += GGUFWriter .ggml_pad (tensor_nbytes , self .data_alignment )
684699 self .ti_data_count += 1
685700
686701 def add_tensor (self , name : str , tensor : np .ndarray [Any , Any ], raw_shape : Sequence [int ] | None = None , raw_dtype : GGMLQuantizationType | None = None ):
702+ if self .endianess == GGUFEndian .BIG :
703+ tensor .byteswap (inplace = True )
687704 if self .use_temp_file and self .temp_file is None :
688705 fp = tempfile .SpooledTemporaryFile (mode = "w+b" , max_size = 256 * 1024 * 1024 )
689706 fp .seek (0 )
@@ -709,6 +726,8 @@ def write_padding(self, fp: BinaryIO, n: int, align: int | None = None):
709726 fp .write (bytes ([0 ] * pad ))
710727
711728 def write_tensor_data (self , tensor : np .ndarray [Any , Any ]):
729+ if self .endianess == GGUFEndian .BIG :
730+ tensor .byteswap (inplace = True )
712731 self .write_padding (self .fout , self .fout .tell ())
713732 tensor .tofile (self .fout )
714733 self .write_padding (self .fout , tensor .nbytes )
0 commit comments