-
Notifications
You must be signed in to change notification settings - Fork 2.4k
feat: add GLM-4.6 model support for z.ai provider #8408
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -32,6 +32,18 @@ export const internationalZAiModels = { | |
| description: | ||
| "GLM-4.5-Air is the lightweight version of GLM-4.5. It balances performance and cost-effectiveness, and can flexibly switch to hybrid thinking models.", | ||
| }, | ||
| "glm-4.6": { | ||
| maxTokens: 98_304, | ||
| contextWindow: 204_800, | ||
| supportsImages: false, | ||
| supportsPromptCache: true, | ||
| inputPrice: 0.6, | ||
| outputPrice: 2.2, | ||
| cacheWritesPrice: 0, | ||
| cacheReadsPrice: 0.11, | ||
| description: | ||
| "GLM-4.6 is Zhipu's newest model with an extended context window of up to 200k tokens, providing enhanced capabilities for processing longer documents and conversations.", | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [P3] Optional: add a link to the GLM-4.6 docs/announcement in this description for maintainers to verify context window and pricing quickly. |
||
| }, | ||
| } as const satisfies Record<string, ModelInfo> | ||
|
|
||
| export type MainlandZAiModelId = keyof typeof mainlandZAiModels | ||
|
|
@@ -101,6 +113,44 @@ export const mainlandZAiModels = { | |
| }, | ||
| ], | ||
| }, | ||
| "glm-4.6": { | ||
| maxTokens: 98_304, | ||
| contextWindow: 204_800, | ||
| supportsImages: false, | ||
| supportsPromptCache: true, | ||
| inputPrice: 0.29, | ||
| outputPrice: 1.14, | ||
| cacheWritesPrice: 0, | ||
| cacheReadsPrice: 0.057, | ||
| description: | ||
| "GLM-4.6 is Zhipu's newest model with an extended context window of up to 200k tokens, providing enhanced capabilities for processing longer documents and conversations.", | ||
| tiers: [ | ||
| { | ||
| contextWindow: 32_000, | ||
| inputPrice: 0.21, | ||
| outputPrice: 1.0, | ||
| cacheReadsPrice: 0.043, | ||
| }, | ||
| { | ||
| contextWindow: 128_000, | ||
| inputPrice: 0.29, | ||
| outputPrice: 1.14, | ||
| cacheReadsPrice: 0.057, | ||
| }, | ||
| { | ||
| contextWindow: 200_000, | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [P2] The 200_000 tier duplicates the Infinity tier pricing. Unless the provider bills differently at exactly 200k, this tier appears redundant and can be removed to reduce noise. |
||
| inputPrice: 0.29, | ||
| outputPrice: 1.14, | ||
| cacheReadsPrice: 0.057, | ||
| }, | ||
| { | ||
| contextWindow: Infinity, | ||
| inputPrice: 0.29, | ||
| outputPrice: 1.14, | ||
| cacheReadsPrice: 0.057, | ||
| }, | ||
| ], | ||
| }, | ||
| } as const satisfies Record<string, ModelInfo> | ||
|
|
||
| export const ZAI_DEFAULT_TEMPERATURE = 0 | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -71,6 +71,19 @@ describe("ZAiHandler", () => { | |
| expect(model.id).toBe(testModelId) | ||
| expect(model.info).toEqual(internationalZAiModels[testModelId]) | ||
| }) | ||
|
|
||
| it("should return GLM-4.6 international model with correct configuration", () => { | ||
| const testModelId: InternationalZAiModelId = "glm-4.6" | ||
| const handlerWithModel = new ZAiHandler({ | ||
| apiModelId: testModelId, | ||
| zaiApiKey: "test-zai-api-key", | ||
| zaiApiLine: "international", | ||
| }) | ||
| const model = handlerWithModel.getModel() | ||
| expect(model.id).toBe(testModelId) | ||
| expect(model.info).toEqual(internationalZAiModels[testModelId]) | ||
| expect(model.info.contextWindow).toBe(204_800) | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [P3] Consider asserting pricing and supportsPromptCache for GLM-4.6 (both intl and mainland) to catch future config regressions. |
||
| }) | ||
| }) | ||
|
|
||
| describe("China Z AI", () => { | ||
|
|
@@ -108,6 +121,19 @@ describe("ZAiHandler", () => { | |
| expect(model.id).toBe(testModelId) | ||
| expect(model.info).toEqual(mainlandZAiModels[testModelId]) | ||
| }) | ||
|
|
||
| it("should return GLM-4.6 China model with correct configuration", () => { | ||
| const testModelId: MainlandZAiModelId = "glm-4.6" | ||
| const handlerWithModel = new ZAiHandler({ | ||
| apiModelId: testModelId, | ||
| zaiApiKey: "test-zai-api-key", | ||
| zaiApiLine: "china", | ||
| }) | ||
| const model = handlerWithModel.getModel() | ||
| expect(model.id).toBe(testModelId) | ||
| expect(model.info).toEqual(mainlandZAiModels[testModelId]) | ||
| expect(model.info.contextWindow).toBe(204_800) | ||
| }) | ||
| }) | ||
|
|
||
| describe("Default behavior", () => { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[P3] Please confirm maxTokens for GLM-4.6. With a 204,800 context window, 98,304 mirrors the 4.5 configuration, but it may differ for 4.6 per provider docs.