Skip to content

Commit a89a03e

Browse files
CharlieFRuanjingyi-zhao-01
authored andcommitted
[Model] Add Phi3.5-mini (mlc-ai#555)
This PR adds the newly release Phi3.5-mini, adding the following `model_id`s to our prebuilt model list: - `Phi-3.5-mini-instruct-q4f16_1-MLC` (4k KVCache) - `Phi-3.5-mini-instruct-q4f32_1-MLC` (4k KVCache) - `Phi-3.5-mini-instruct-q4f16_1-MLC-1k` (1k KVCache) - `Phi-3.5-mini-instruct-q4f16_1-MLC-1k` (1k KVCache) See mlc-ai/binary-mlc-llm-libs#136 for on which commits of TVM and MLC-LLM this is compiled with. Note that Phi-3.5-mini comes with support up to 128K context (unlike Phi-3-mini which only has 4k) thanks to rope scaling which MLC-LLM supports, which you can take advantage of in WebLLM by increasing `ModelRecord.overrides.context_window_size` or specifying it in `ChatOptions` when loading a model, as long as there is enough VRAM.
1 parent 04b5b16 commit a89a03e

File tree

2 files changed

+67
-14
lines changed

2 files changed

+67
-14
lines changed

examples/multi-models/src/main.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ const initProgressCallback = (report: webllm.InitProgressReport) => {
2121
};
2222

2323
// Prepare request for each model, same for both methods
24-
const selectedModel1 = "Phi-3-mini-4k-instruct-q4f32_1-MLC-1k";
24+
const selectedModel1 = "Phi-3.5-mini-instruct-q4f32_1-MLC-1k";
2525
const selectedModel2 = "gemma-2-2b-it-q4f32_1-MLC-1k";
2626
const prompt1 = "Tell me about California in 3 short sentences.";
2727
const prompt2 = "Tell me about New York City in 3 short sentences.";

src/config.ts

Lines changed: 66 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -431,53 +431,53 @@ export const prebuiltAppConfig: AppConfig = {
431431
sliding_window_size: -1,
432432
},
433433
},
434-
// Phi3-mini-instruct
434+
// Phi3.5-mini-instruct
435435
{
436-
model: "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f16_1-MLC",
437-
model_id: "Phi-3-mini-4k-instruct-q4f16_1-MLC",
436+
model: "https://huggingface.co/mlc-ai/Phi-3.5-mini-instruct-q4f16_1-MLC",
437+
model_id: "Phi-3.5-mini-instruct-q4f16_1-MLC",
438438
model_lib:
439439
modelLibURLPrefix +
440440
modelVersion +
441-
"/Phi-3-mini-4k-instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm",
441+
"/Phi-3.5-mini-instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm",
442442
vram_required_MB: 3672.07,
443443
low_resource_required: false,
444444
overrides: {
445445
context_window_size: 4096,
446446
},
447447
},
448448
{
449-
model: "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f32_1-MLC",
450-
model_id: "Phi-3-mini-4k-instruct-q4f32_1-MLC",
449+
model: "https://huggingface.co/mlc-ai/Phi-3.5-mini-instruct-q4f32_1-MLC",
450+
model_id: "Phi-3.5-mini-instruct-q4f32_1-MLC",
451451
model_lib:
452452
modelLibURLPrefix +
453453
modelVersion +
454-
"/Phi-3-mini-4k-instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
454+
"/Phi-3.5-mini-instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
455455
vram_required_MB: 5483.12,
456456
low_resource_required: false,
457457
overrides: {
458458
context_window_size: 4096,
459459
},
460460
},
461461
{
462-
model: "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f16_1-MLC",
463-
model_id: "Phi-3-mini-4k-instruct-q4f16_1-MLC-1k",
462+
model: "https://huggingface.co/mlc-ai/Phi-3.5-mini-instruct-q4f16_1-MLC",
463+
model_id: "Phi-3.5-mini-instruct-q4f16_1-MLC-1k",
464464
model_lib:
465465
modelLibURLPrefix +
466466
modelVersion +
467-
"/Phi-3-mini-4k-instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm",
467+
"/Phi-3.5-mini-instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm",
468468
vram_required_MB: 2520.07,
469469
low_resource_required: true,
470470
overrides: {
471471
context_window_size: 1024,
472472
},
473473
},
474474
{
475-
model: "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f32_1-MLC",
476-
model_id: "Phi-3-mini-4k-instruct-q4f32_1-MLC-1k",
475+
model: "https://huggingface.co/mlc-ai/Phi-3.5-mini-instruct-q4f32_1-MLC",
476+
model_id: "Phi-3.5-mini-instruct-q4f32_1-MLC-1k",
477477
model_lib:
478478
modelLibURLPrefix +
479479
modelVersion +
480-
"/Phi-3-mini-4k-instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
480+
"/Phi-3.5-mini-instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
481481
vram_required_MB: 3179.12,
482482
low_resource_required: true,
483483
overrides: {
@@ -1224,6 +1224,59 @@ export const prebuiltAppConfig: AppConfig = {
12241224
context_window_size: 4096,
12251225
},
12261226
},
1227+
// Phi3-mini-instruct
1228+
{
1229+
model: "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f16_1-MLC",
1230+
model_id: "Phi-3-mini-4k-instruct-q4f16_1-MLC",
1231+
model_lib:
1232+
modelLibURLPrefix +
1233+
modelVersion +
1234+
"/Phi-3-mini-4k-instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm",
1235+
vram_required_MB: 3672.07,
1236+
low_resource_required: false,
1237+
overrides: {
1238+
context_window_size: 4096,
1239+
},
1240+
},
1241+
{
1242+
model: "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f32_1-MLC",
1243+
model_id: "Phi-3-mini-4k-instruct-q4f32_1-MLC",
1244+
model_lib:
1245+
modelLibURLPrefix +
1246+
modelVersion +
1247+
"/Phi-3-mini-4k-instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
1248+
vram_required_MB: 5483.12,
1249+
low_resource_required: false,
1250+
overrides: {
1251+
context_window_size: 4096,
1252+
},
1253+
},
1254+
{
1255+
model: "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f16_1-MLC",
1256+
model_id: "Phi-3-mini-4k-instruct-q4f16_1-MLC-1k",
1257+
model_lib:
1258+
modelLibURLPrefix +
1259+
modelVersion +
1260+
"/Phi-3-mini-4k-instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm",
1261+
vram_required_MB: 2520.07,
1262+
low_resource_required: true,
1263+
overrides: {
1264+
context_window_size: 1024,
1265+
},
1266+
},
1267+
{
1268+
model: "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f32_1-MLC",
1269+
model_id: "Phi-3-mini-4k-instruct-q4f32_1-MLC-1k",
1270+
model_lib:
1271+
modelLibURLPrefix +
1272+
modelVersion +
1273+
"/Phi-3-mini-4k-instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
1274+
vram_required_MB: 3179.12,
1275+
low_resource_required: true,
1276+
overrides: {
1277+
context_window_size: 1024,
1278+
},
1279+
},
12271280
// Llama-2
12281281
{
12291282
model: "https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f32_1-MLC",

0 commit comments

Comments
 (0)