@@ -772,22 +772,37 @@ struct LLM_TN {
772772 llm_arch arch;
773773
774774 std::string operator ()(llm_tensor tensor) const {
775+ if (LLM_TENSOR_NAMES[arch].find (tensor) == LLM_TENSOR_NAMES[arch].end ()) {
776+ return " __missing__" ;
777+ }
775778 return LLM_TENSOR_NAMES[arch].at (tensor);
776779 }
777780
778781 std::string operator ()(llm_tensor tensor, const std::string & suffix) const {
782+ if (LLM_TENSOR_NAMES[arch].find (tensor) == LLM_TENSOR_NAMES[arch].end ()) {
783+ return " __missing__" ;
784+ }
779785 return LLM_TENSOR_NAMES[arch].at (tensor) + " ." + suffix;
780786 }
781787
782788 std::string operator ()(llm_tensor tensor, int bid) const {
789+ if (LLM_TENSOR_NAMES[arch].find (tensor) == LLM_TENSOR_NAMES[arch].end ()) {
790+ return " __missing__" ;
791+ }
783792 return ::format (LLM_TENSOR_NAMES[arch].at (tensor).c_str (), bid);
784793 }
785794
786795 std::string operator ()(llm_tensor tensor, const std::string & suffix, int bid) const {
796+ if (LLM_TENSOR_NAMES[arch].find (tensor) == LLM_TENSOR_NAMES[arch].end ()) {
797+ return " __missing__" ;
798+ }
787799 return ::format (LLM_TENSOR_NAMES[arch].at (tensor).c_str (), bid) + " ." + suffix;
788800 }
789801
790802 std::string operator ()(llm_tensor tensor, const std::string & suffix, int bid, int xid) const {
803+ if (LLM_TENSOR_NAMES[arch].find (tensor) == LLM_TENSOR_NAMES[arch].end ()) {
804+ return " __missing__" ;
805+ }
791806 return ::format (LLM_TENSOR_NAMES[arch].at (tensor).c_str (), bid, xid) + " ." + suffix;
792807 }
793808};
@@ -10227,6 +10242,7 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
1022710242 }
1022810243 ++qs.i_ffn_up ;
1022910244 }
10245+
1023010246 // if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K) new_type = GGML_TYPE_Q3_K;
1023110247 // }
1023210248 // IK: let's remove this, else Q2_K is almost the same as Q3_K_S
@@ -10286,19 +10302,19 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
1028610302
1028710303 // K-quants
1028810304 case LLAMA_FTYPE_MOSTLY_Q2_K_S:
10289- case LLAMA_FTYPE_MOSTLY_Q2_K: quantized_type = GGML_TYPE_Q2_K; break ;
10305+ case LLAMA_FTYPE_MOSTLY_Q2_K: quantized_type = GGML_TYPE_Q2_K; break ;
1029010306 case LLAMA_FTYPE_MOSTLY_Q3_K_XS:
1029110307 case LLAMA_FTYPE_MOSTLY_Q3_K_S:
1029210308 case LLAMA_FTYPE_MOSTLY_Q3_K_M:
10293- case LLAMA_FTYPE_MOSTLY_Q3_K_L: quantized_type = GGML_TYPE_Q3_K; break ;
10309+ case LLAMA_FTYPE_MOSTLY_Q3_K_L: quantized_type = GGML_TYPE_Q3_K; break ;
1029410310 case LLAMA_FTYPE_MOSTLY_Q4_K_S:
10295- case LLAMA_FTYPE_MOSTLY_Q4_K_M: quantized_type = GGML_TYPE_Q4_K; break ;
10311+ case LLAMA_FTYPE_MOSTLY_Q4_K_M: quantized_type = GGML_TYPE_Q4_K; break ;
1029610312 case LLAMA_FTYPE_MOSTLY_Q5_K_S:
10297- case LLAMA_FTYPE_MOSTLY_Q5_K_M: quantized_type = GGML_TYPE_Q5_K; break ;
10298- case LLAMA_FTYPE_MOSTLY_Q6_K: quantized_type = GGML_TYPE_Q6_K; break ;
10299- case LLAMA_FTYPE_MOSTLY_IQ2_XXS:quantized_type = GGML_TYPE_IQ2_XXS; break ;
10300- case LLAMA_FTYPE_MOSTLY_IQ2_XS : quantized_type = GGML_TYPE_IQ2_XS; break ;
10301- case LLAMA_FTYPE_MOSTLY_IQ3_XXS:quantized_type = GGML_TYPE_IQ3_XXS; break ;
10313+ case LLAMA_FTYPE_MOSTLY_Q5_K_M: quantized_type = GGML_TYPE_Q5_K; break ;
10314+ case LLAMA_FTYPE_MOSTLY_Q6_K: quantized_type = GGML_TYPE_Q6_K; break ;
10315+ case LLAMA_FTYPE_MOSTLY_IQ2_XXS: quantized_type = GGML_TYPE_IQ2_XXS; break ;
10316+ case LLAMA_FTYPE_MOSTLY_IQ2_XS: quantized_type = GGML_TYPE_IQ2_XS; break ;
10317+ case LLAMA_FTYPE_MOSTLY_IQ3_XXS: quantized_type = GGML_TYPE_IQ3_XXS; break ;
1030210318
1030310319 default : throw std::runtime_error (format (" invalid output file type %d\n " , ftype));
1030410320 }
0 commit comments