@@ -1208,7 +1208,6 @@ def set_vocab(self):
12081208 self ._set_vocab_qwen ()
12091209
12101210 def set_gguf_parameters (self ):
1211- super ().set_gguf_parameters ()
12121211 hparams = self .hparams
12131212 block_count = hparams ["num_hidden_layers" ]
12141213
@@ -1224,6 +1223,107 @@ def set_gguf_parameters(self):
12241223 self .gguf_writer .add_parallel_residual (hparams ["use_parallel_residual" ] if "use_parallel_residual" in hparams else True )
12251224 self .gguf_writer .add_layer_norm_eps (self .find_hparam (["layer_norm_eps" , "norm_eps" ]))
12261225
1226+ def write_tensors (self ):
1227+ block_count = self .hparams .get ("n_layers" , self .hparams .get ("num_hidden_layers" , self .hparams .get ("n_layer" )))
1228+ tensor_map = gguf .get_tensor_name_map (self .model_arch , block_count )
1229+ n_head = self .hparams .get ("num_attention_heads" )
1230+ n_kv_head = self .hparams .get ("num_key_value_heads" )
1231+ q_norms = dict ()
1232+ k_norms = dict ()
1233+ for name , data_torch in self .get_tensors ():
1234+ # we don't need these
1235+ if name .endswith ((".attention.masked_bias" , ".attention.bias" , ".attention.rotary_emb.inv_freq" )):
1236+ continue
1237+
1238+ old_dtype = data_torch .dtype
1239+
1240+ # convert any unsupported data types to float32
1241+ if data_torch .dtype not in (torch .float16 , torch .float32 ):
1242+ data_torch = data_torch .to (torch .float32 )
1243+
1244+ data = data_torch .squeeze ().numpy ()
1245+ n_dims = len (data .shape )
1246+ if name .find ("q_layernorm.norms" ) != - 1 :
1247+ q_norms [name ] = data
1248+ if len (q_norms ) >= (block_count * n_head ):
1249+ for bid in range (block_count ):
1250+ datas = []
1251+ for xid in range (n_head ):
1252+ ename = f"model.layers.{ bid } .self_attn.q_layernorm.norms.{ xid } .weight"
1253+ datas .append (q_norms [ename ])
1254+ del q_norms [ename ]
1255+ data = np .stack (datas , axis = 0 )
1256+ data_dtype = data .dtype
1257+ merged_name = f"model.layers.{ bid } .self_attn.q_layernorm.weight"
1258+ new_name = tensor_map .get_name (merged_name , try_suffixes = (".weight" , ".bias" ))
1259+ if self .ftype == 1 and data_dtype == np .float16 and (n_dims == 1 or new_name .endswith ("_norm.weight" )):
1260+ data = data .astype (np .float32 )
1261+
1262+ # if f16 desired, convert any float32 2-dim weight tensors to float16
1263+ if self .ftype == 1 and data_dtype == np .float32 and name .endswith (".weight" ) and not new_name .endswith ("_norm.weight" ) and n_dims == 2 :
1264+ data = data .astype (np .float16 )
1265+ if new_name is None :
1266+ print (f"Can not map tensor { name !r} " )
1267+ sys .exit ()
1268+
1269+ print (f"{ new_name } , n_dims = { len (data .shape )} , shape = { data .shape } --> { data .dtype } " )
1270+
1271+ self .gguf_writer .add_tensor (new_name , data )
1272+ continue
1273+ if name .find ("k_layernorm.norms" ) != - 1 :
1274+ k_norms [name ] = data
1275+ if len (k_norms ) >= (block_count * n_kv_head ):
1276+ for bid in range (block_count ):
1277+ full = True
1278+ datas = []
1279+ for xid in range (n_kv_head ):
1280+ ename = f"model.layers.{ bid } .self_attn.k_layernorm.norms.{ xid } .weight"
1281+ datas .append (k_norms [ename ])
1282+ del k_norms [ename ]
1283+ data = np .stack (datas , axis = 0 )
1284+ data_dtype = data .dtype
1285+ merged_name = f"model.layers.{ bid } .self_attn.k_layernorm.weight"
1286+ new_name = tensor_map .get_name (merged_name , try_suffixes = (".weight" , ".bias" ))
1287+ if self .ftype == 1 and data_dtype == np .float16 and (n_dims == 1 or new_name .endswith ("_norm.weight" )):
1288+ data = data .astype (np .float32 )
1289+
1290+ # if f16 desired, convert any float32 2-dim weight tensors to float16
1291+ if self .ftype == 1 and data_dtype == np .float32 and name .endswith (".weight" ) and not new_name .endswith ("_norm.weight" ) and n_dims == 2 :
1292+ data = data .astype (np .float16 )
1293+ if new_name is None :
1294+ print (f"Can not map tensor { name !r} " )
1295+ sys .exit ()
1296+
1297+ print (f"{ new_name } , n_dims = { len (data .shape )} , shape = { data .shape } --> { data .dtype } " )
1298+
1299+ self .gguf_writer .add_tensor (new_name , data )
1300+ continue
1301+
1302+
1303+ # map tensor names
1304+ new_name = tensor_map .get_name (name , try_suffixes = (".weight" , ".bias" ))
1305+ if new_name is None :
1306+ print (f"Can not map tensor { name !r} " )
1307+ sys .exit ()
1308+
1309+ n_dims = len (data .shape )
1310+ data_dtype = data .dtype
1311+
1312+ # if f32 desired, convert any float16 to float32
1313+ if self .ftype == 0 and data_dtype == np .float16 :
1314+ data = data .astype (np .float32 )
1315+
1316+ # TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
1317+ if self .ftype == 1 and data_dtype == np .float16 and (n_dims == 1 or new_name .endswith ("_norm.weight" )):
1318+ data = data .astype (np .float32 )
1319+
1320+ # if f16 desired, convert any float32 2-dim weight tensors to float16
1321+ if self .ftype == 1 and data_dtype == np .float32 and name .endswith (".weight" ) and not new_name .endswith ("_norm.weight" ) and n_dims == 2 :
1322+ data = data .astype (np .float16 )
1323+
1324+ print (f"{ new_name } , n_dims = { n_dims } , { old_dtype } --> { data .dtype } " )
1325+
1326+ self .gguf_writer .add_tensor (new_name , data )
12271327
12281328@Model .register ("LlamaForCausalLM" , "MistralForCausalLM" , "MixtralForCausalLM" )
12291329class LlamaModel (Model ):
0 commit comments