|  | 
|  | 1 | +#include "ggml.h" | 
|  | 2 | +#include "gguf.h" | 
|  | 3 | + | 
|  | 4 | +#include <climits> | 
|  | 5 | +#include <cstdarg> | 
|  | 6 | +#include <string> | 
|  | 7 | +#include <map> | 
|  | 8 | +#include <sstream> | 
|  | 9 | +#include <vector> | 
|  | 10 | + | 
|  | 11 | +// Internal header for clip.cpp | 
|  | 12 | + | 
|  | 13 | +#define KEY_FTYPE               "general.file_type" | 
|  | 14 | +#define KEY_NAME                "general.name" | 
|  | 15 | +#define KEY_DESCRIPTION         "general.description" | 
|  | 16 | +#define KEY_HAS_TEXT_ENC        "clip.has_text_encoder" | 
|  | 17 | +#define KEY_HAS_VIS_ENC         "clip.has_vision_encoder" | 
|  | 18 | +#define KEY_HAS_LLAVA_PROJ      "clip.has_llava_projector" | 
|  | 19 | +#define KEY_HAS_MINICPMV_PROJ   "clip.has_minicpmv_projector" | 
|  | 20 | +#define KEY_HAS_GLM_PROJ        "clip.has_glm_projector" | 
|  | 21 | +#define KEY_MINICPMV_VERSION    "clip.minicpmv_version" | 
|  | 22 | +#define KEY_HAS_QWEN2VL_MERGER  "clip.has_qwen2vl_merger" | 
|  | 23 | +#define KEY_USE_GELU            "clip.use_gelu" | 
|  | 24 | +#define KEY_USE_SILU            "clip.use_silu" | 
|  | 25 | +#define KEY_N_EMBD              "clip.%s.embedding_length" | 
|  | 26 | +#define KEY_N_FF                "clip.%s.feed_forward_length" | 
|  | 27 | +#define KEY_N_BLOCK             "clip.%s.block_count" | 
|  | 28 | +#define KEY_N_HEAD              "clip.%s.attention.head_count" | 
|  | 29 | +#define KEY_LAYER_NORM_EPS      "clip.%s.attention.layer_norm_epsilon" | 
|  | 30 | +#define KEY_PROJ_DIM            "clip.%s.projection_dim" | 
|  | 31 | +#define KEY_TOKENS              "tokenizer.ggml.tokens" | 
|  | 32 | +#define KEY_N_POSITIONS         "clip.text.context_length" | 
|  | 33 | +#define KEY_IMAGE_SIZE          "clip.vision.image_size" | 
|  | 34 | +#define KEY_PATCH_SIZE          "clip.vision.patch_size" | 
|  | 35 | +#define KEY_IMAGE_MEAN          "clip.vision.image_mean" | 
|  | 36 | +#define KEY_IMAGE_STD           "clip.vision.image_std" | 
|  | 37 | +#define KEY_PROJ_TYPE           "clip.projector_type" | 
|  | 38 | +#define KEY_FEATURE_LAYER       "clip.vision.feature_layer" | 
|  | 39 | + | 
|  | 40 | +#define KEY_MM_PATCH_MERGE_TYPE   "clip.vision.mm_patch_merge_type" | 
|  | 41 | +#define KEY_IMAGE_GRID_PINPOINTS  "clip.vision.image_grid_pinpoints" | 
|  | 42 | +#define KEY_IMAGE_CROP_RESOLUTION "clip.vision.image_crop_resolution" | 
|  | 43 | + | 
|  | 44 | + | 
|  | 45 | +// | 
|  | 46 | +// tensor name constants | 
|  | 47 | +// | 
|  | 48 | + | 
|  | 49 | +#define TN_TOKEN_EMBD      "%s.token_embd.weight" | 
|  | 50 | +#define TN_POS_EMBD        "%s.position_embd.weight" | 
|  | 51 | +#define TN_CLASS_EMBD      "v.class_embd" | 
|  | 52 | +#define TN_PATCH_EMBD      "v.patch_embd.weight"  // not rename tensor with ".0" postfix for backwrad compat | 
|  | 53 | +#define TN_PATCH_EMBD_1    "v.patch_embd.weight.1" | 
|  | 54 | +#define TN_PATCH_BIAS      "v.patch_embd.bias" | 
|  | 55 | +#define TN_ATTN_K          "%s.blk.%d.attn_k.%s" | 
|  | 56 | +#define TN_ATTN_Q          "%s.blk.%d.attn_q.%s" | 
|  | 57 | +#define TN_ATTN_V          "%s.blk.%d.attn_v.%s" | 
|  | 58 | +#define TN_ATTN_OUTPUT     "%s.blk.%d.attn_out.%s" | 
|  | 59 | +#define TN_FFN_DOWN        "%s.blk.%d.ffn_down.%s" | 
|  | 60 | +#define TN_FFN_UP          "%s.blk.%d.ffn_up.%s" | 
|  | 61 | +#define TN_LN_1            "%s.blk.%d.ln1.%s" | 
|  | 62 | +#define TN_LN_2            "%s.blk.%d.ln2.%s" | 
|  | 63 | +#define TN_LN_PRE          "%s.pre_ln.%s" | 
|  | 64 | +#define TN_LN_POST         "%s.post_ln.%s" | 
|  | 65 | +#define TN_TEXT_PROJ       "text_projection.weight" | 
|  | 66 | +#define TN_VIS_PROJ        "visual_projection.weight" | 
|  | 67 | +#define TN_LLAVA_PROJ      "mm.%d.%s" | 
|  | 68 | +#define TN_MVLM_PROJ_MLP   "mm.model.mlp.%d.%s" | 
|  | 69 | +#define TN_MVLM_PROJ_BLOCK "mm.model.mb_block.%d.block.%d.%s" | 
|  | 70 | +#define TN_MVLM_PROJ_PEG   "mm.model.peg.%d.%s" | 
|  | 71 | +#define TN_IMAGE_NEWLINE   "model.image_newline" | 
|  | 72 | +#define TN_MM_INP_PROJ     "mm.input_projection.weight" // gemma3 | 
|  | 73 | +#define TN_MM_SOFT_EMB_N   "mm.soft_emb_norm.weight"    // gemma3 | 
|  | 74 | + | 
|  | 75 | +// mimicpmv | 
|  | 76 | +#define TN_MINICPMV_POS_EMBD_K "resampler.pos_embed_k" | 
|  | 77 | +#define TN_MINICPMV_QUERY      "resampler.query" | 
|  | 78 | +#define TN_MINICPMV_PROJ       "resampler.proj.weight" | 
|  | 79 | +#define TN_MINICPMV_KV_PROJ    "resampler.kv.weight" | 
|  | 80 | +#define TN_MINICPMV_ATTN       "resampler.attn.%s.%s" | 
|  | 81 | +#define TN_MINICPMV_LN         "resampler.ln_%s.%s" | 
|  | 82 | + | 
|  | 83 | +#define TN_GLM_ADAPER_CONV      "adapter.conv.%s" | 
|  | 84 | +#define TN_GLM_ADAPTER_LINEAR   "adapter.linear.linear.%s" | 
|  | 85 | +#define TN_GLM_ADAPTER_NORM_1   "adapter.linear.norm1.%s" | 
|  | 86 | +#define TN_GLM_ADAPTER_D_H_2_4H "adapter.linear.dense_h_to_4h.%s" | 
|  | 87 | +#define TN_GLM_ADAPTER_GATE     "adapter.linear.gate.%s" | 
|  | 88 | +#define TN_GLM_ADAPTER_D_4H_2_H "adapter.linear.dense_4h_to_h.%s" | 
|  | 89 | +#define TN_GLM_BOI_W            "adapter.boi" | 
|  | 90 | +#define TN_GLM_EOI_W            "adapter.eoi" | 
|  | 91 | + | 
|  | 92 | +enum projector_type { | 
|  | 93 | +    PROJECTOR_TYPE_MLP, | 
|  | 94 | +    PROJECTOR_TYPE_MLP_NORM, | 
|  | 95 | +    PROJECTOR_TYPE_LDP, | 
|  | 96 | +    PROJECTOR_TYPE_LDPV2, | 
|  | 97 | +    PROJECTOR_TYPE_RESAMPLER, | 
|  | 98 | +    PROJECTOR_TYPE_GLM_EDGE, | 
|  | 99 | +    PROJECTOR_TYPE_MERGER, | 
|  | 100 | +    PROJECTOR_TYPE_GEMMA3, | 
|  | 101 | +    PROJECTOR_TYPE_UNKNOWN, | 
|  | 102 | +}; | 
|  | 103 | + | 
|  | 104 | +static std::map<projector_type, std::string> PROJECTOR_TYPE_NAMES = { | 
|  | 105 | +    { PROJECTOR_TYPE_MLP,       "mlp" }, | 
|  | 106 | +    { PROJECTOR_TYPE_LDP,       "ldp" }, | 
|  | 107 | +    { PROJECTOR_TYPE_LDPV2,     "ldpv2"}, | 
|  | 108 | +    { PROJECTOR_TYPE_RESAMPLER, "resampler"}, | 
|  | 109 | +    { PROJECTOR_TYPE_GLM_EDGE,  "adapter"}, | 
|  | 110 | +    { PROJECTOR_TYPE_MERGER,    "qwen2vl_merger"}, | 
|  | 111 | +    { PROJECTOR_TYPE_GEMMA3,    "gemma3"}, | 
|  | 112 | +}; | 
|  | 113 | + | 
|  | 114 | +static projector_type clip_projector_type_from_string(const std::string & str) { | 
|  | 115 | +    for (const auto & pair : PROJECTOR_TYPE_NAMES) { | 
|  | 116 | +        if (pair.second == str) { | 
|  | 117 | +            return pair.first; | 
|  | 118 | +        } | 
|  | 119 | +    } | 
|  | 120 | +    return PROJECTOR_TYPE_UNKNOWN; | 
|  | 121 | +} | 
|  | 122 | + | 
|  | 123 | +// | 
|  | 124 | +// logging | 
|  | 125 | +// | 
|  | 126 | + | 
|  | 127 | +static void clip_log_callback_default(enum ggml_log_level level, const char * text, void * user_data) { | 
|  | 128 | +    (void) level; | 
|  | 129 | +    (void) user_data; | 
|  | 130 | +    fputs(text, stderr); | 
|  | 131 | +    fflush(stderr); | 
|  | 132 | +} | 
|  | 133 | + | 
|  | 134 | +struct clip_logger_state { | 
|  | 135 | +    ggml_log_level verbosity_thold; | 
|  | 136 | +    ggml_log_callback log_callback; | 
|  | 137 | +    void * log_callback_user_data; | 
|  | 138 | +}; | 
|  | 139 | + | 
|  | 140 | +extern struct clip_logger_state g_logger_state; | 
|  | 141 | + | 
|  | 142 | +static void clip_log_internal_v(enum ggml_log_level level, const char * format, va_list args) { | 
|  | 143 | +    if (format == NULL) { | 
|  | 144 | +        return; | 
|  | 145 | +    } | 
|  | 146 | +    va_list args_copy; | 
|  | 147 | +    va_copy(args_copy, args); | 
|  | 148 | +    char buffer[128]; | 
|  | 149 | +    int len = vsnprintf(buffer, 128, format, args); | 
|  | 150 | +    if (len < 128) { | 
|  | 151 | +        g_logger_state.log_callback(level, buffer, g_logger_state.log_callback_user_data); | 
|  | 152 | +    } else { | 
|  | 153 | +        char * buffer2 = (char *) calloc(len + 1, sizeof(char)); | 
|  | 154 | +        vsnprintf(buffer2, len + 1, format, args_copy); | 
|  | 155 | +        buffer2[len] = 0; | 
|  | 156 | +        g_logger_state.log_callback(level, buffer2, g_logger_state.log_callback_user_data); | 
|  | 157 | +        free(buffer2); | 
|  | 158 | +    } | 
|  | 159 | +    va_end(args_copy); | 
|  | 160 | +} | 
|  | 161 | + | 
|  | 162 | +static void clip_log_internal(enum ggml_log_level level, const char * format, ...) { | 
|  | 163 | +    va_list args; | 
|  | 164 | +    va_start(args, format); | 
|  | 165 | +    clip_log_internal_v(level, format, args); | 
|  | 166 | +    va_end(args); | 
|  | 167 | +} | 
|  | 168 | + | 
|  | 169 | +#define LOG_TMPL(level, ...) \ | 
|  | 170 | +    do { \ | 
|  | 171 | +        if ((level) >= g_logger_state.verbosity_thold) { \ | 
|  | 172 | +            clip_log_internal((level), __VA_ARGS__); \ | 
|  | 173 | +        } \ | 
|  | 174 | +    } while (0) | 
|  | 175 | +#define LOG_INF(...) LOG_TMPL(GGML_LOG_LEVEL_INFO,  __VA_ARGS__) | 
|  | 176 | +#define LOG_WRN(...) LOG_TMPL(GGML_LOG_LEVEL_WARN,  __VA_ARGS__) | 
|  | 177 | +#define LOG_ERR(...) LOG_TMPL(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) | 
|  | 178 | +#define LOG_DBG(...) LOG_TMPL(GGML_LOG_LEVEL_DEBUG, __VA_ARGS__) | 
|  | 179 | +#define LOG_CNT(...) LOG_TMPL(GGML_LOG_LEVEL_CONT,  __VA_ARGS__) | 
|  | 180 | + | 
|  | 181 | +// | 
|  | 182 | +// common utils | 
|  | 183 | +// | 
|  | 184 | + | 
|  | 185 | +static std::string string_format(const char * fmt, ...) { | 
|  | 186 | +    va_list ap; | 
|  | 187 | +    va_list ap2; | 
|  | 188 | +    va_start(ap, fmt); | 
|  | 189 | +    va_copy(ap2, ap); | 
|  | 190 | +    int size = vsnprintf(NULL, 0, fmt, ap); | 
|  | 191 | +    GGML_ASSERT(size >= 0 && size < INT_MAX); // NOLINT | 
|  | 192 | +    std::vector<char> buf(size + 1); | 
|  | 193 | +    int size2 = vsnprintf(buf.data(), size + 1, fmt, ap2); | 
|  | 194 | +    GGML_ASSERT(size2 == size); | 
|  | 195 | +    va_end(ap2); | 
|  | 196 | +    va_end(ap); | 
|  | 197 | +    return std::string(buf.data(), buf.size()); | 
|  | 198 | +} | 
|  | 199 | + | 
|  | 200 | +static void string_replace_all(std::string & s, const std::string & search, const std::string & replace) { | 
|  | 201 | +    if (search.empty()) { | 
|  | 202 | +        return; | 
|  | 203 | +    } | 
|  | 204 | +    std::string builder; | 
|  | 205 | +    builder.reserve(s.length()); | 
|  | 206 | +    size_t pos = 0; | 
|  | 207 | +    size_t last_pos = 0; | 
|  | 208 | +    while ((pos = s.find(search, last_pos)) != std::string::npos) { | 
|  | 209 | +        builder.append(s, last_pos, pos - last_pos); | 
|  | 210 | +        builder.append(replace); | 
|  | 211 | +        last_pos = pos + search.length(); | 
|  | 212 | +    } | 
|  | 213 | +    builder.append(s, last_pos, std::string::npos); | 
|  | 214 | +    s = std::move(builder); | 
|  | 215 | +} | 
|  | 216 | + | 
|  | 217 | +// | 
|  | 218 | +// gguf utils | 
|  | 219 | +// | 
|  | 220 | + | 
|  | 221 | +static std::string gguf_data_to_str(enum gguf_type type, const void * data, int i) { | 
|  | 222 | +    switch (type) { | 
|  | 223 | +        case GGUF_TYPE_UINT8:   return std::to_string(((const uint8_t  *)data)[i]); | 
|  | 224 | +        case GGUF_TYPE_INT8:    return std::to_string(((const int8_t   *)data)[i]); | 
|  | 225 | +        case GGUF_TYPE_UINT16:  return std::to_string(((const uint16_t *)data)[i]); | 
|  | 226 | +        case GGUF_TYPE_INT16:   return std::to_string(((const int16_t  *)data)[i]); | 
|  | 227 | +        case GGUF_TYPE_UINT32:  return std::to_string(((const uint32_t *)data)[i]); | 
|  | 228 | +        case GGUF_TYPE_INT32:   return std::to_string(((const int32_t  *)data)[i]); | 
|  | 229 | +        case GGUF_TYPE_UINT64:  return std::to_string(((const uint64_t *)data)[i]); | 
|  | 230 | +        case GGUF_TYPE_INT64:   return std::to_string(((const int64_t  *)data)[i]); | 
|  | 231 | +        case GGUF_TYPE_FLOAT32: return std::to_string(((const float    *)data)[i]); | 
|  | 232 | +        case GGUF_TYPE_FLOAT64: return std::to_string(((const double   *)data)[i]); | 
|  | 233 | +        case GGUF_TYPE_BOOL:    return ((const bool *)data)[i] ? "true" : "false"; | 
|  | 234 | +        default:                return string_format("unknown type %d", type); | 
|  | 235 | +    } | 
|  | 236 | +} | 
|  | 237 | + | 
|  | 238 | +static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) { | 
|  | 239 | +    const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i); | 
|  | 240 | + | 
|  | 241 | +    switch (type) { | 
|  | 242 | +        case GGUF_TYPE_STRING: | 
|  | 243 | +            return gguf_get_val_str(ctx_gguf, i); | 
|  | 244 | +        case GGUF_TYPE_ARRAY: | 
|  | 245 | +            { | 
|  | 246 | +                const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i); | 
|  | 247 | +                int arr_n = gguf_get_arr_n(ctx_gguf, i); | 
|  | 248 | +                const void * data = arr_type == GGUF_TYPE_STRING ? nullptr : gguf_get_arr_data(ctx_gguf, i); | 
|  | 249 | +                std::stringstream ss; | 
|  | 250 | +                ss << "["; | 
|  | 251 | +                for (int j = 0; j < arr_n; j++) { | 
|  | 252 | +                    if (arr_type == GGUF_TYPE_STRING) { | 
|  | 253 | +                        std::string val = gguf_get_arr_str(ctx_gguf, i, j); | 
|  | 254 | +                        // escape quotes | 
|  | 255 | +                        string_replace_all(val, "\\", "\\\\"); | 
|  | 256 | +                        string_replace_all(val, "\"", "\\\""); | 
|  | 257 | +                        ss << '"' << val << '"'; | 
|  | 258 | +                    } else if (arr_type == GGUF_TYPE_ARRAY) { | 
|  | 259 | +                        ss << "???"; | 
|  | 260 | +                    } else { | 
|  | 261 | +                        ss << gguf_data_to_str(arr_type, data, j); | 
|  | 262 | +                    } | 
|  | 263 | +                    if (j < arr_n - 1) { | 
|  | 264 | +                        ss << ", "; | 
|  | 265 | +                    } | 
|  | 266 | +                } | 
|  | 267 | +                ss << "]"; | 
|  | 268 | +                return ss.str(); | 
|  | 269 | +            } | 
|  | 270 | +        default: | 
|  | 271 | +            return gguf_data_to_str(type, gguf_get_val_data(ctx_gguf, i), 0); | 
|  | 272 | +    } | 
|  | 273 | +} | 
0 commit comments