diff --git a/apps/web-roo-code/src/app/privacy/page.tsx b/apps/web-roo-code/src/app/privacy/page.tsx index bb132651f76..b0efdbc4440 100644 --- a/apps/web-roo-code/src/app/privacy/page.tsx +++ b/apps/web-roo-code/src/app/privacy/page.tsx @@ -86,8 +86,8 @@ export default function Privacy() { Your source code does not transit Roo Code servers unless you explicitly choose Roo Code as a model provider (proxy mode). {" "} - When Roo Code Cloud is your model provider, your code briefly transits Roo Code servers only to - forward it to the upstream model, is not stored, and is deleted immediately after + When Roo Code Cloud is your model provider, your code briefly transits Roo Code servers only + to forward it to the upstream model, is not stored, and is deleted immediately after forwarding. Otherwise, your code is sent directly—via client‑to‑provider TLS—to the model you select. Roo Code never stores, inspects, or trains on your code. diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index f9ccd8512ad..7779708af00 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -720,8 +720,8 @@ importers: specifier: ^0.5.17 version: 0.5.17 openai: - specifier: ^5.12.2 - version: 5.12.2(ws@8.18.3)(zod@3.25.61) + specifier: ^5.15.0 + version: 5.15.0(ws@8.18.3)(zod@3.25.61) os-name: specifier: ^6.0.0 version: 6.1.0 @@ -7976,8 +7976,8 @@ packages: resolution: {integrity: sha512-cxN6aIDPz6rm8hbebcP7vrQNhvRcveZoJU72Y7vskh4oIm+BZwBECnx5nTmrlres1Qapvx27Qo1Auukpf8PKXw==} engines: {node: '>=18'} - openai@5.12.2: - resolution: {integrity: sha512-xqzHHQch5Tws5PcKR2xsZGX9xtch+JQFz5zb14dGqlshmmDAFBFEWmeIpf7wVqWV+w7Emj7jRgkNJakyKE0tYQ==} + openai@5.15.0: + resolution: {integrity: sha512-kcUdws8K/A8m02I+IqFBwO51gS+87GP89yWEufGbzEi8anBz4FB/bti2QxaJdGwwY4mwJGzx85XO7TuL/Tpu1w==} hasBin: true peerDependencies: ws: ^8.18.0 @@ -18213,7 +18213,7 @@ snapshots: is-inside-container: 1.0.0 is-wsl: 3.1.0 - openai@5.12.2(ws@8.18.3)(zod@3.25.61): + openai@5.15.0(ws@8.18.3)(zod@3.25.61): optionalDependencies: ws: 8.18.3 zod: 3.25.61 diff --git a/src/api/providers/__tests__/openai.spec.ts b/src/api/providers/__tests__/openai.spec.ts index 14ed35430a5..0affafdabff 100644 --- a/src/api/providers/__tests__/openai.spec.ts +++ b/src/api/providers/__tests__/openai.spec.ts @@ -4,27 +4,65 @@ import { OpenAiHandler, getOpenAiModels } from "../openai" import { ApiHandlerOptions } from "../../../shared/api" import { Anthropic } from "@anthropic-ai/sdk" import OpenAI from "openai" -import { openAiModelInfoSaneDefaults } from "@roo-code/types" import { Package } from "../../../shared/package" import axios from "axios" +type ErrorWithStatus = Error & { status?: number } + +function getMockCallsOf(fn: unknown): any[] { + const isObj = (v: unknown): v is Record => typeof v === "object" && v !== null + if (isObj(fn) || typeof fn === "function") { + const rec = fn as Record + const mock = rec["mock"] + if (isObj(mock)) { + const calls = mock["calls"] + if (Array.isArray(calls)) return calls + } + } + return [] +} const mockCreate = vitest.fn() +const mockResponsesCreate = vitest.fn() vitest.mock("openai", () => { const mockConstructor = vitest.fn() - return { - __esModule: true, - default: mockConstructor.mockImplementation(() => ({ - chat: { - completions: { - create: mockCreate.mockImplementation(async (options) => { - if (!options.stream) { - return { - id: "test-completion", + const makeClient = () => ({ + chat: { + completions: { + create: mockCreate.mockImplementation(async (options) => { + if (!options.stream) { + return { + id: "test-completion", + choices: [ + { + message: { role: "assistant", content: "Test response", refusal: null }, + finish_reason: "stop", + index: 0, + }, + ], + usage: { + prompt_tokens: 10, + completion_tokens: 5, + total_tokens: 15, + }, + } + } + + return { + [Symbol.asyncIterator]: async function* () { + yield { choices: [ { - message: { role: "assistant", content: "Test response", refusal: null }, - finish_reason: "stop", + delta: { content: "Test response" }, + index: 0, + }, + ], + usage: null, + } + yield { + choices: [ + { + delta: {}, index: 0, }, ], @@ -34,38 +72,30 @@ vitest.mock("openai", () => { total_tokens: 15, }, } - } - - return { - [Symbol.asyncIterator]: async function* () { - yield { - choices: [ - { - delta: { content: "Test response" }, - index: 0, - }, - ], - usage: null, - } - yield { - choices: [ - { - delta: {}, - index: 0, - }, - ], - usage: { - prompt_tokens: 10, - completion_tokens: 5, - total_tokens: 15, - }, - } - }, - } - }), - }, + }, + } + }), }, - })), + }, + responses: { + create: mockResponsesCreate.mockImplementation(async (options) => { + // Default happy-path mock for non-streaming Responses API + return { + id: "test-response", + output_text: "Test response", + usage: { + input_tokens: 10, + output_tokens: 5, + total_tokens: 15, + }, + } + }), + }, + }) + return { + __esModule: true, + default: mockConstructor.mockImplementation((args: any) => makeClient()), + AzureOpenAI: mockConstructor.mockImplementation((args: any) => makeClient()), } }) @@ -408,9 +438,9 @@ describe("OpenAiHandler", () => { }) it("should handle rate limiting", async () => { - const rateLimitError = new Error("Rate limit exceeded") + const rateLimitError: ErrorWithStatus = new Error("Rate limit exceeded") rateLimitError.name = "Error" - ;(rateLimitError as any).status = 429 + rateLimitError.status = 429 mockCreate.mockRejectedValueOnce(rateLimitError) const stream = handler.createMessage("system prompt", testMessages) @@ -977,6 +1007,56 @@ describe("getOpenAiModels", () => { expect(result).toEqual([]) }) + describe("Azure portal Responses URL normalization", () => { + beforeEach(() => { + mockCreate.mockClear() + mockResponsesCreate.mockClear() + }) + + it("Responses URL from Azure portal is converted to use Responses API", async () => { + const handler = new OpenAiHandler({ + openAiApiKey: "test-azure", + openAiModelId: "my-deployment", + openAiBaseUrl: "https://sample-name.openai.azure.com/openai/responses?api-version=2025-04-01-preview", + openAiUseAzure: true, + openAiStreamingEnabled: false, + includeMaxTokens: true, + openAiCustomModelInfo: { + contextWindow: 128_000, + maxTokens: 64, + supportsPromptCache: false, + }, + }) + + const messages: Anthropic.Messages.MessageParam[] = [ + { role: "user", content: [{ type: "text", text: "Hello!" }] }, + ] + + const stream = handler.createMessage("You are Roo Code.", messages) + const chunks: any[] = [] + for await (const ch of stream) { + chunks.push(ch) + } + + // Should have used Responses API, not Chat Completions + expect(mockResponsesCreate).toHaveBeenCalled() + expect(mockCreate).not.toHaveBeenCalled() + + // Payload shape sanity + const args = mockResponsesCreate.mock.calls[0][0] + expect(args).toHaveProperty("model", "my-deployment") + expect(args).toHaveProperty("input") + expect(typeof args.input).toBe("string") + expect(args.input).toContain("Developer: You are Roo Code.") + expect(args.input).toContain("User: Hello!") + expect(args).toHaveProperty("max_output_tokens", 64) + + // Ensure returned text chunk surfaced + const textChunk = chunks.find((c) => c.type === "text") + expect(textChunk?.text).toBe("Test response") + }) + }) + it("should deduplicate model IDs", async () => { const mockResponse = { data: { @@ -990,3 +1070,1000 @@ describe("getOpenAiModels", () => { expect(result).toEqual(["gpt-4", "gpt-3.5-turbo"]) }) }) + +// -- Added Responses API tests (TDD) -- + +describe("OpenAI Compatible - Responses API", () => { + let handler: OpenAiHandler + const baseMessages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { + type: "text" as const, + text: "Hello!", + }, + ], + }, + ] + + beforeEach(() => { + mockCreate.mockClear() + mockResponsesCreate.mockClear() + }) + + it("Azure Responses happy path uses string input (no messages) and max_output_tokens", async () => { + const opts: ApiHandlerOptions = { + openAiApiKey: "test-azure", + openAiModelId: "my-deployment", + openAiBaseUrl: "https://myres.openai.azure.com/openai/v1/responses?api-version=preview", + openAiStreamingEnabled: false, + includeMaxTokens: true, + openAiCustomModelInfo: { + contextWindow: 128_000, + maxTokens: 256, + supportsPromptCache: false, + }, + enableReasoningEffort: false, + } + handler = new OpenAiHandler(opts) + + const stream = handler.createMessage("You are Roo Code.", baseMessages) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + // Should have produced a text chunk + const textChunk = chunks.find((c) => c.type === "text") + expect(textChunk?.text).toBe("Test response") + + // Ensure Responses API was used + expect(mockResponsesCreate).toHaveBeenCalled() + expect(mockCreate).not.toHaveBeenCalled() + + const callArgs = mockResponsesCreate.mock.calls[0][0] + expect(callArgs).not.toHaveProperty("messages") + expect(callArgs).toHaveProperty("input") + expect(typeof callArgs.input).toBe("string") + expect(callArgs.input).toContain("Developer: You are Roo Code.") + expect(callArgs.input).toContain("User: Hello!") + expect(callArgs).toHaveProperty("model", "my-deployment") + // Azure Responses naming + expect(callArgs).toHaveProperty("max_output_tokens", 256) + }) + + it("Auto-detect: '/v1/responses' => Responses payload; '/chat/completions' => Chat Completions payload", async () => { + // Responses URL + const respHandler = new OpenAiHandler({ + openAiApiKey: "test", + openAiModelId: "gpt-5", + openAiBaseUrl: "https://api.openai.com/v1/responses", + openAiStreamingEnabled: false, + }) + for await (const _ of respHandler.createMessage("sys", baseMessages)) { + } + expect(mockResponsesCreate).toHaveBeenCalled() + const respArgs = mockResponsesCreate.mock.calls.pop()?.[0] + expect(respArgs).not.toHaveProperty("messages") + expect(respArgs).toHaveProperty("input") + + // Chat Completions URL + mockResponsesCreate.mockClear() + mockCreate.mockClear() + const chatHandler = new OpenAiHandler({ + openAiApiKey: "test", + openAiModelId: "gpt-4o", + openAiBaseUrl: "https://api.openai.com/v1/chat/completions", + openAiStreamingEnabled: false, + }) + for await (const _ of chatHandler.createMessage("sys", baseMessages)) { + } + expect(mockCreate).toHaveBeenCalled() + const chatArgs = mockCreate.mock.calls.pop()?.[0] + expect(chatArgs).toHaveProperty("messages") + expect(chatArgs).not.toHaveProperty("input") + }) + + it("Reasoning effort mapping: Responses uses reasoning: { effort }, Chat uses reasoning_effort", async () => { + // Responses path + const responsesHandler = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5", + openAiBaseUrl: "https://api.openai.com/v1/responses", + openAiStreamingEnabled: false, + enableReasoningEffort: true, + reasoningEffort: "high", + openAiCustomModelInfo: { + contextWindow: 128_000, + supportsPromptCache: false, + supportsReasoningEffort: true, + }, + }) + for await (const _ of responsesHandler.createMessage("sys", baseMessages)) { + } + expect(mockResponsesCreate).toHaveBeenCalled() + const rArgs = mockResponsesCreate.mock.calls.pop()?.[0] + expect(rArgs).toHaveProperty("reasoning") + expect(rArgs.reasoning).toMatchObject({ effort: "high" }) + + // Chat path + mockResponsesCreate.mockClear() + mockCreate.mockClear() + const chatHandler = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-4o", + openAiBaseUrl: "https://api.openai.com/v1/chat/completions", + openAiStreamingEnabled: false, + enableReasoningEffort: true, + reasoningEffort: "high", + openAiCustomModelInfo: { + contextWindow: 128_000, + supportsPromptCache: false, + supportsReasoningEffort: true, + }, + }) + for await (const _ of chatHandler.createMessage("sys", baseMessages)) { + } + expect(mockCreate).toHaveBeenCalled() + const cArgs = mockCreate.mock.calls.pop()?.[0] + expect(cArgs).toHaveProperty("reasoning_effort", "high") + }) + + it("Verbosity (Responses): include when set; if server rejects, retry without it (warn once)", async () => { + // First call throws 400 for 'verbosity', second succeeds + mockResponsesCreate.mockImplementationOnce((_opts: unknown) => { + const err: ErrorWithStatus = new Error("Unsupported parameter: 'verbosity'") + err.status = 400 + throw err + }) + + const h = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5", + openAiBaseUrl: "https://api.openai.com/v1/responses", + openAiStreamingEnabled: false, + verbosity: "high", + }) + + const stream = h.createMessage("sys", baseMessages) + const chunks: any[] = [] + for await (const ch of stream) { + chunks.push(ch) + } + + expect(mockResponsesCreate).toHaveBeenCalledTimes(2) + const first = mockResponsesCreate.mock.calls[0][0] + const second = mockResponsesCreate.mock.calls[1][0] + expect(first).toHaveProperty("text") + expect(first.text).toEqual({ verbosity: "high" }) + expect(second).not.toHaveProperty("text") + + // Should still yield text + const textChunk = chunks.find((c) => c.type === "text") + expect(textChunk?.text).toBe("Test response") + }) + + it("Azure naming: use max_output_tokens for Responses; keep max_completion_tokens for Chat Completions", async () => { + // Responses + includeMaxTokens + const r = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5", + openAiBaseUrl: "https://api.openai.com/v1/responses", + openAiStreamingEnabled: false, + includeMaxTokens: true, + modelMaxTokens: 128, + openAiCustomModelInfo: { + contextWindow: 128_000, + maxTokens: 4096, + supportsPromptCache: false, + }, + }) + for await (const _ of r.createMessage("sys", baseMessages)) { + } + const rArgs = mockResponsesCreate.mock.calls.pop()?.[0] + expect(rArgs).toHaveProperty("max_output_tokens", 128) + expect(rArgs).not.toHaveProperty("max_completion_tokens") + + // Chat + includeMaxTokens + mockResponsesCreate.mockClear() + mockCreate.mockClear() + const c = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-4o", + openAiBaseUrl: "https://api.openai.com/v1/chat/completions", + openAiStreamingEnabled: false, + includeMaxTokens: true, + modelMaxTokens: 128, + openAiCustomModelInfo: { + contextWindow: 128_000, + maxTokens: 4096, + supportsPromptCache: false, + }, + }) + for await (const _ of c.createMessage("sys", baseMessages)) { + } + const cArgs = mockCreate.mock.calls.pop()?.[0] + expect(cArgs).toHaveProperty("max_completion_tokens", 128) + expect(cArgs).not.toHaveProperty("max_output_tokens") + }) + + it("Normalizes Azure portal responses URL to /openai/v1 with apiVersion=preview", async () => { + mockResponsesCreate.mockClear() + mockCreate.mockClear() + + const portalUrl = "https://sample-name.openai.azure.com/openai/responses?api-version=2025-04-01-preview" + + const handler = new OpenAiHandler({ + openAiApiKey: "test-azure", + openAiModelId: "my-deployment", + openAiBaseUrl: portalUrl, + openAiStreamingEnabled: false, + }) + + for await (const _ of handler.createMessage("sys", baseMessages)) { + } + + // Ensures Responses API path was used + expect(mockResponsesCreate).toHaveBeenCalled() + + // Ensure SDK constructor was called with normalized baseURL and 'preview' apiVersion (per requirement) + // Note: AzureOpenAI and OpenAI share same mock constructor; inspect last call + const ctorCalls = getMockCallsOf(OpenAI) + const lastCall = ctorCalls[ctorCalls.length - 1] + const lastArg0 = Array.isArray(lastCall) ? lastCall[0] : undefined + const lastCtorArgs = + typeof lastArg0 === "object" && lastArg0 !== null ? (lastArg0 as Record) : {} + expect(lastCtorArgs["baseURL"]).toBe("https://sample-name.openai.azure.com/openai/v1") + expect(lastCtorArgs["apiVersion"]).toBe("preview") + }) + + it("streams Responses API when provider returns AsyncIterable", async () => { + // Arrange: make responses.create return an AsyncIterable stream for this test + mockResponsesCreate.mockImplementationOnce(async (_opts: any) => { + return { + [Symbol.asyncIterator]: async function* () { + yield { type: "response.text.delta", delta: "Hello " } + yield { type: "response.text.delta", delta: "world" } + yield { + type: "response.completed", + response: { usage: { input_tokens: 7, output_tokens: 2 } }, + } + }, + } + }) + + const handler = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5-mini", + openAiBaseUrl: "https://api.openai.com/v1/responses", + // streaming enabled by default + }) + + const stream = handler.createMessage("You are Roo.", [ + { role: "user", content: [{ type: "text" as const, text: "Say hi" }] }, + ]) + + const chunks: any[] = [] + for await (const ch of stream) { + chunks.push(ch) + } + + // Text should be streamed and concatenated in order + const text = chunks + .filter((c) => c.type === "text") + .map((c) => c.text) + .join("") + expect(text).toBe("Hello world") + + // Usage chunk emitted at completion + const usage = chunks.find((c) => c.type === "usage") + expect(usage).toBeDefined() + expect(usage.inputTokens).toBe(7) + expect(usage.outputTokens).toBe(2) + + // Ensure stream: true was sent + const args = mockResponsesCreate.mock.calls.pop()?.[0] + expect(args).toHaveProperty("stream", true) + }) +}) + +describe("OpenAI Compatible - Responses API (extended streaming)", () => { + it("handles reasoning deltas and output_text in message content", async () => { + // Arrange: make responses.create return an AsyncIterable stream for this test + mockResponsesCreate.mockImplementationOnce(async (_opts: any) => { + return { + [Symbol.asyncIterator]: async function* () { + // Reasoning delta first + yield { type: "response.reasoning.delta", delta: "Thinking. " } + // Then a message item with output_text inside content array + yield { + type: "response.output_item.added", + item: { + type: "message", + content: [{ type: "output_text", text: "Answer." }], + }, + } + // Completion with usage + yield { + type: "response.completed", + response: { usage: { input_tokens: 3, output_tokens: 2 } }, + } + }, + } + }) + + const handler = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5-mini", + openAiBaseUrl: "https://api.openai.com/v1/responses", + }) + + const chunks: any[] = [] + for await (const ch of handler.createMessage("sys", [ + { role: "user", content: [{ type: "text" as const, text: "Hi" }] }, + ])) { + chunks.push(ch) + } + + const reasoning = chunks.find((c) => c.type === "reasoning") + expect(reasoning?.text).toBe("Thinking. ") + + const text = chunks.find((c) => c.type === "text") + expect(text?.text).toBe("Answer.") + + const usage = chunks.find((c) => c.type === "usage") + expect(usage).toBeDefined() + expect(usage.inputTokens).toBe(3) + expect(usage.outputTokens).toBe(2) + + // Ensure stream: true was sent + const args = mockResponsesCreate.mock.calls.pop()?.[0] + expect(args).toHaveProperty("stream", true) + }) + + it("maps refusal deltas to text with prefix", async () => { + mockResponsesCreate.mockImplementationOnce(async (_opts: any) => { + return { + [Symbol.asyncIterator]: async function* () { + yield { type: "response.refusal.delta", delta: "Cannot comply" } + // Usage may be attached directly on the event for some implementations + yield { type: "response.done", usage: { prompt_tokens: 1, completion_tokens: 1 } } + }, + } + }) + + const handler = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5-mini", + openAiBaseUrl: "https://api.openai.com/v1/responses", + }) + + const result: any[] = [] + for await (const ch of handler.createMessage("sys", [ + { role: "user", content: [{ type: "text" as const, text: "Hi" }] }, + ])) { + result.push(ch) + } + + const textChunks = result.filter((c) => c.type === "text").map((c) => c.text) + expect(textChunks).toContain("[Refusal] Cannot comply") + + const usage = result.find((c) => c.type === "usage") + expect(usage).toBeDefined() + expect(usage.inputTokens).toBe(1) + expect(usage.outputTokens).toBe(1) + }) +}) + +describe("OpenAI Compatible - Responses API (multimodal)", () => { + it("builds structured array input with images (non-streaming)", async () => { + // Reset mocks for clarity + mockResponsesCreate.mockClear() + mockCreate.mockClear() + + const handler = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5-mini", + openAiBaseUrl: "https://api.openai.com/v1/responses", + openAiStreamingEnabled: false, + includeMaxTokens: false, + }) + + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { type: "text" as const, text: "Here is an image" }, + { + type: "image" as const, + // Minimal Anthropic-style inline image (base64) block + source: { type: "base64" as const, media_type: "image/png", data: "BASE64DATA" }, + }, + ], + }, + ] + + const chunks: any[] = [] + for await (const ch of handler.createMessage("You are Roo Code.", messages)) { + chunks.push(ch) + } + + // Should have used Responses API + expect(mockResponsesCreate).toHaveBeenCalled() + const args = mockResponsesCreate.mock.calls[0][0] + + // Input should be an array (structured input mode) + expect(Array.isArray(args.input)).toBe(true) + const arr = Array.isArray(args.input) ? args.input : [] + + // First element should be Developer preface as input_text + expect(arr[0]?.role).toBe("user") + expect(arr[0]?.content?.[0]?.type).toBe("input_text") + expect(arr[0]?.content?.[0]?.text).toContain("Developer: You are Roo Code.") + + // There should be at least one input_image with a data URL for the provided image + const hasInputImage = arr.some((item: any) => { + const c = item?.content + return ( + Array.isArray(c) && + c.some( + (part: any) => + part?.type === "input_image" && + typeof part?.image_url === "string" && + part.image_url.startsWith("data:image/png;base64,BASE64DATA"), + ) + ) + }) + expect(hasInputImage).toBe(true) + + // Should still yield a text chunk and usage (from default mock) + const textChunk = chunks.find((c: any) => c.type === "text") + const usageChunk = chunks.find((c: any) => c.type === "usage") + expect(textChunk?.text).toBe("Test response") + expect(usageChunk?.inputTokens).toBe(10) + expect(usageChunk?.outputTokens).toBe(5) + }) + + it("streams with multimodal input using array 'input'", async () => { + // Make responses.create return an AsyncIterable stream for this test + mockResponsesCreate.mockClear() + mockResponsesCreate.mockImplementationOnce(async (_opts: any) => { + return { + [Symbol.asyncIterator]: async function* () { + yield { type: "response.text.delta", delta: "A" } + yield { type: "response.text.delta", delta: "B" } + yield { + type: "response.completed", + response: { usage: { input_tokens: 2, output_tokens: 2 } }, + } + }, + } + }) + + const handler = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5-mini", + openAiBaseUrl: "https://api.openai.com/v1/responses", + }) + + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { type: "text" as const, text: "Look at this" }, + { + type: "image" as const, + source: { type: "base64" as const, media_type: "image/jpeg", data: "IMGDATA" }, + }, + ], + }, + ] + + const out: any[] = [] + for await (const ch of handler.createMessage("System text", messages)) { + out.push(ch) + } + + // Ensure stream: true was sent and input is array + expect(mockResponsesCreate).toHaveBeenCalled() + const args = mockResponsesCreate.mock.calls[0][0] + expect(args).toHaveProperty("stream", true) + expect(Array.isArray(args.input)).toBe(true) + + // Verify streamed text concatenation and usage + const combined = out + .filter((c) => c.type === "text") + .map((c) => c.text) + .join("") + expect(combined).toBe("AB") + + const usage = out.find((c) => c.type === "usage") + expect(usage?.inputTokens).toBe(2) + expect(usage?.outputTokens).toBe(2) + }) +}) + +// --- New tests: Responses API conversation continuity (previous_response_id) --- +describe("OpenAI Compatible - Responses API conversation continuity", () => { + beforeEach(() => { + mockCreate.mockClear() + mockResponsesCreate.mockClear() + }) + + it("propagates previous_response_id from first streaming response into the next request", async () => { + // First call will stream and include a response.id + mockResponsesCreate.mockImplementationOnce(async (_opts: any) => { + return { + [Symbol.asyncIterator]: async function* () { + yield { type: "response.text.delta", delta: "Desc " } + yield { + type: "response.completed", + response: { id: "resp-1", usage: { input_tokens: 5, output_tokens: 2 } }, + } + }, + } + }) + + const handler = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5-mini", + openAiBaseUrl: "https://api.openai.com/v1/responses", + }) + + // 1) First call (establish response id) + const firstChunks: any[] = [] + for await (const ch of handler.createMessage("You are Roo.", [ + { role: "user", content: [{ type: "text" as const, text: "Describe the image" }] }, + ])) { + firstChunks.push(ch) + } + + // Ensure first call was made + expect(mockResponsesCreate).toHaveBeenCalledTimes(1) + // 2) Second call - should include previous_response_id from first call + const secondChunks: any[] = [] + for await (const ch of handler.createMessage("You are Roo.", [ + { role: "user", content: [{ type: "text" as const, text: "Continue." }] }, + ])) { + secondChunks.push(ch) + } + + // Validate that a second Responses.create call was made + expect(mockResponsesCreate).toHaveBeenCalledTimes(2) + const secondArgs = mockResponsesCreate.mock.calls[1][0] + expect(secondArgs).toHaveProperty("previous_response_id", "resp-1") + }) + + it("omits previous_response_id when metadata.suppressPreviousResponseId is true", async () => { + // First call streams and returns an id + mockResponsesCreate.mockImplementationOnce(async (_opts: any) => { + return { + [Symbol.asyncIterator]: async function* () { + yield { type: "response.text.delta", delta: "First" } + yield { + type: "response.completed", + response: { id: "rid-xyz", usage: { input_tokens: 1, output_tokens: 1 } }, + } + }, + } + }) + + const handler = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5-mini", + openAiBaseUrl: "https://api.openai.com/v1/responses", + }) + + // First call to capture lastResponseId + for await (const _ of handler.createMessage("sys", [ + { role: "user", content: [{ type: "text" as const, text: "Turn 1" }] }, + ])) { + } + + // Second call with suppressPreviousResponseId => should NOT include previous_response_id + for await (const _ of handler.createMessage( + "sys", + [{ role: "user", content: [{ type: "text" as const, text: "Turn 2" }] }], + { taskId: "test", suppressPreviousResponseId: true }, + )) { + } + + expect(mockResponsesCreate).toHaveBeenCalledTimes(2) + const args = mockResponsesCreate.mock.calls[1][0] + expect(args).not.toHaveProperty("previous_response_id") + }) + it("does not include previous_response_id when prior stream fails before id; defaults to store:true", async () => { + // First call: stream throws before emitting any response.id + mockResponsesCreate + .mockImplementationOnce(async (_opts: any) => { + return { + [Symbol.asyncIterator]: async function* () { + yield { type: "response.text.delta", delta: "Partial " } + throw new Error("stream interrupted") + }, + } + }) + // Second call: normal stream + .mockImplementationOnce(async (_opts: any) => { + return { + [Symbol.asyncIterator]: async function* () { + yield { type: "response.text.delta", delta: "OK" } + yield { + type: "response.completed", + response: { usage: { input_tokens: 1, output_tokens: 1 } }, + } + }, + } + }) + + const handler = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5-mini", + openAiBaseUrl: "https://api.openai.com/v1/responses", + }) + + // First call fails mid-stream, so no response.id is captured + const first = handler.createMessage("You are Roo.", [ + { role: "user", content: [{ type: "text" as const, text: "Hi" }] }, + ]) + + await expect(async () => { + for await (const _ of first) { + // drain until error + } + }).rejects.toThrow("stream interrupted") + + // Second call should not include previous_response_id and should default to store:true + const chunks: any[] = [] + for await (const ch of handler.createMessage("You are Roo.", [ + { role: "user", content: [{ type: "text" as const, text: "Hi" }] }, + ])) { + chunks.push(ch) + } + + expect(mockResponsesCreate).toHaveBeenCalledTimes(2) + const secondArgs = mockResponsesCreate.mock.calls[1][0] + expect(secondArgs).not.toHaveProperty("previous_response_id") + expect(secondArgs).toHaveProperty("store", true) + expect(typeof secondArgs.input).toBe("string") + expect(secondArgs.input).toContain("Developer: You are Roo.") + expect(secondArgs.input).toContain("User: Hi") + }) +}) + +// --- New: Responses API parity improvements tests --- +describe("OpenAI Compatible - Responses API parity improvements", () => { + beforeEach(() => { + mockCreate.mockClear() + mockResponsesCreate.mockClear() + }) + + it("retries without previous_response_id when server returns 400 'Previous response ... not found' (non-streaming)", async () => { + // First call throws 400 for previous_response_id, second succeeds + mockResponsesCreate + .mockImplementationOnce((_opts: unknown) => { + const err: ErrorWithStatus = new Error("Previous response rid-bad not found") + err.status = 400 + throw err + }) + .mockImplementationOnce(async (_opts: any) => { + return { id: "rid-good", output_text: "OK", usage: { input_tokens: 1, output_tokens: 1 } } + }) + + const h = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5", + openAiBaseUrl: "https://api.openai.com/v1/responses", + openAiStreamingEnabled: false, + }) + + const chunks: any[] = [] + for await (const ch of h.createMessage( + "sys", + [{ role: "user", content: [{ type: "text" as const, text: "Turn" }] }], + { taskId: "test", previousResponseId: "rid-bad" }, + )) { + chunks.push(ch) + } + + // Two calls made: first fails with 400, second retries without previous_response_id + expect(mockResponsesCreate).toHaveBeenCalledTimes(2) + const firstArgs = mockResponsesCreate.mock.calls[0][0] + expect(firstArgs).toHaveProperty("previous_response_id", "rid-bad") + + const secondArgs = mockResponsesCreate.mock.calls[1][0] + expect(secondArgs).not.toHaveProperty("previous_response_id") + + // Should still surface text + const textChunk = chunks.find((c: any) => c.type === "text") + expect(textChunk?.text).toBe("OK") + }) + + it("retries without previous_response_id when server returns 400 (streaming)", async () => { + // First call throws, second returns a stream + mockResponsesCreate + .mockImplementationOnce((_opts: unknown) => { + const err: ErrorWithStatus = new Error("Previous response not found") + err.status = 400 + throw err + }) + .mockImplementationOnce(async (_opts: any) => { + return { + [Symbol.asyncIterator]: async function* () { + yield { type: "response.text.delta", delta: "Hello" } + yield { type: "response.completed", response: { usage: { input_tokens: 1, output_tokens: 1 } } } + }, + } + }) + + const h = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5", + openAiBaseUrl: "https://api.openai.com/v1/responses", + // streaming enabled by default + }) + + const out: any[] = [] + for await (const ch of h.createMessage( + "sys", + [{ role: "user", content: [{ type: "text" as const, text: "Hi" }] }], + { taskId: "test", previousResponseId: "bad-id" }, + )) { + out.push(ch) + } + + expect(mockResponsesCreate).toHaveBeenCalledTimes(2) + const first = mockResponsesCreate.mock.calls[0][0] + expect(first).toHaveProperty("previous_response_id", "bad-id") + const second = mockResponsesCreate.mock.calls[1][0] + expect(second).not.toHaveProperty("previous_response_id") + + const combined = out + .filter((c) => c.type === "text") + .map((c) => c.text) + .join("") + expect(combined).toBe("Hello") + }) + + it("handles response.content_part.added by emitting text", async () => { + mockResponsesCreate.mockImplementationOnce(async (_opts: any) => { + return { + [Symbol.asyncIterator]: async function* () { + yield { type: "response.content_part.added", part: { type: "text", text: "Part" } } + yield { type: "response.completed", response: { usage: { input_tokens: 0, output_tokens: 0 } } } + }, + } + }) + + const h = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5", + openAiBaseUrl: "https://api.openai.com/v1/responses", + }) + + const out: any[] = [] + for await (const ch of h.createMessage("sys", [ + { role: "user", content: [{ type: "text" as const, text: "Hi" }] }, + ])) { + out.push(ch) + } + + const texts = out.filter((c) => c.type === "text").map((c) => c.text) + expect(texts).toContain("Part") + }) + + it("maps response.audio_transcript.delta to text", async () => { + mockResponsesCreate.mockImplementationOnce(async (_opts: any) => { + return { + [Symbol.asyncIterator]: async function* () { + yield { type: "response.audio_transcript.delta", delta: "Transcript" } + yield { type: "response.completed", response: { usage: { input_tokens: 0, output_tokens: 0 } } } + }, + } + }) + + const h = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5", + openAiBaseUrl: "https://api.openai.com/v1/responses", + }) + + const out: any[] = [] + for await (const ch of h.createMessage("sys", [ + { role: "user", content: [{ type: "text" as const, text: "Hi" }] }, + ])) { + out.push(ch) + } + + const texts = out.filter((c) => c.type === "text").map((c) => c.text) + expect(texts).toContain("Transcript") + }) + + it("includes reasoning: { effort: 'minimal', summary: 'auto' } when enabled (non-streaming)", async () => { + mockResponsesCreate.mockImplementationOnce(async (opts: any) => { + return { id: "rid-1", output_text: "ok", usage: { input_tokens: 1, output_tokens: 1 } } + }) + + const h = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5", + openAiBaseUrl: "https://api.openai.com/v1/responses", + openAiStreamingEnabled: false, + enableReasoningEffort: true, + reasoningEffort: "minimal", + }) + + for await (const _ of h.createMessage("sys", [ + { role: "user", content: [{ type: "text" as const, text: "Hi" }] }, + ])) { + // consume + } + + expect(mockResponsesCreate).toHaveBeenCalledTimes(1) + const args = mockResponsesCreate.mock.calls[0][0] + expect(args).toHaveProperty("reasoning") + expect(args.reasoning).toMatchObject({ effort: "minimal", summary: "auto" }) + }) + + it("omits reasoning.summary when enableGpt5ReasoningSummary is false", async () => { + mockResponsesCreate.mockImplementationOnce(async (opts: any) => { + return { id: "rid-2", output_text: "ok", usage: { input_tokens: 1, output_tokens: 1 } } + }) + + const h = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5", + openAiBaseUrl: "https://api.openai.com/v1/responses", + openAiStreamingEnabled: false, + enableReasoningEffort: true, + reasoningEffort: "low", + enableGpt5ReasoningSummary: false, + }) + + for await (const _ of h.createMessage("sys", [ + { role: "user", content: [{ type: "text" as const, text: "Hi" }] }, + ])) { + // consume + } + + expect(mockResponsesCreate).toHaveBeenCalledTimes(1) + const args = mockResponsesCreate.mock.calls[0][0] + expect(args).toHaveProperty("reasoning") + expect(args.reasoning.effort).toBe("low") + expect(args.reasoning.summary).toBeUndefined() + }) +}) + +describe("OpenAI Compatible - Responses API minimal input parity (new tests)", () => { + beforeEach(() => { + // @ts-ignore - reuse mocks from this spec module + mockCreate.mockClear() + // @ts-ignore - reuse mocks from this spec module + mockResponsesCreate.mockClear() + }) + + it("sends only latest user message when previous_response_id is provided (string input, no Developer preface)", async () => { + const handler = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5", + openAiBaseUrl: "https://api.openai.com/v1/responses", + openAiStreamingEnabled: false, + }) + + const msgs: Anthropic.Messages.MessageParam[] = [ + { role: "user", content: [{ type: "text" as const, text: "First" }] }, + { role: "assistant", content: [{ type: "text" as const, text: "Reply" }] }, + { role: "user", content: [{ type: "text" as const, text: "Latest" }] }, + ] + + const chunks: any[] = [] + for await (const ch of handler.createMessage("System Inst", msgs, { + taskId: "test", + previousResponseId: "prev-1", + })) { + chunks.push(ch) + } + + // Ensure Responses API was used with minimal input + // @ts-ignore + expect(mockResponsesCreate).toHaveBeenCalled() + // @ts-ignore + const args = mockResponsesCreate.mock.calls[0][0] + + expect(typeof args.input).toBe("string") + expect(args.input).toBe("User: Latest") + expect(String(args.input)).not.toContain("Developer: System Inst") + }) + + it("uses array input with only latest user content when previous_response_id and last user has images (no Developer preface)", async () => { + const handler = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5", + openAiBaseUrl: "https://api.openai.com/v1/responses", + openAiStreamingEnabled: false, + }) + + const msgs: Anthropic.Messages.MessageParam[] = [ + { role: "user", content: [{ type: "text" as const, text: "Prev" }] }, + { role: "assistant", content: [{ type: "text" as const, text: "Ok" }] }, + { + role: "user", + content: [ + { type: "text" as const, text: "See" }, + { + type: "image" as const, + source: { type: "base64" as const, media_type: "image/png", data: "IMGDATA" }, + }, + ], + }, + ] + + const iter = handler.createMessage("Sys", msgs, { taskId: "test", previousResponseId: "prev-2" }) + for await (const _ of iter) { + // consume + } + + // @ts-ignore + const args = mockResponsesCreate.mock.calls.pop()?.[0] + expect(Array.isArray(args.input)).toBe(true) + + const arr = Array.isArray(args.input) ? args.input : [] + expect(arr.length).toBe(1) + expect(arr[0]?.role).toBe("user") + + const contents = arr[0]?.content || [] + const hasImg = contents.some((p: any) => p?.type === "input_image") + expect(hasImg).toBe(true) + + // No Developer preface should be injected in minimal mode + const hasDev = contents.some( + (p: any) => p?.type === "input_text" && typeof p.text === "string" && p.text.includes("Developer:"), + ) + expect(hasDev).toBe(false) + }) + + it("always includes max_output_tokens for Responses API", async () => { + const handler = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5", + openAiBaseUrl: "https://api.openai.com/v1/responses", + openAiStreamingEnabled: false, + includeMaxTokens: false, // should still include based on model info + openAiCustomModelInfo: { + contextWindow: 128_000, + maxTokens: 123, // fallback used when modelMaxTokens not set + supportsPromptCache: false, + }, + }) + + for await (const _ of handler.createMessage("sys", [ + { role: "user", content: [{ type: "text" as const, text: "Hi" }] }, + ])) { + // consume + } + + // @ts-ignore + const args = mockResponsesCreate.mock.calls.pop()?.[0] + expect(args).toHaveProperty("max_output_tokens", 123) + }) + + it("does not include text.verbosity when not provided", async () => { + const handler = new OpenAiHandler({ + openAiApiKey: "k", + openAiModelId: "gpt-5", + openAiBaseUrl: "https://api.openai.com/v1/responses", + openAiStreamingEnabled: false, + }) + + for await (const _ of handler.createMessage("sys", [ + { role: "user", content: [{ type: "text" as const, text: "Hi" }] }, + ])) { + // consume + } + + // @ts-ignore + const args = mockResponsesCreate.mock.calls.pop()?.[0] + expect(args).not.toHaveProperty("text") + }) +}) diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index 2a57f251318..b9b5b7da586 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -19,6 +19,7 @@ import { convertToR1Format } from "../transform/r1-format" import { convertToSimpleMessages } from "../transform/simple-format" import { ApiStream, ApiStreamUsageChunk } from "../transform/stream" import { getModelParams } from "../transform/model-params" +import { handleResponsesStream } from "../transform/responses-stream" import { DEFAULT_HEADERS } from "./constants" import { BaseProvider } from "./base-provider" @@ -29,19 +30,71 @@ import { handleOpenAIError } from "./utils/openai-error-handler" // TODO: Rename this to OpenAICompatibleHandler. Also, I think the // `OpenAINativeHandler` can subclass from this, since it's obviously // compatible with the OpenAI API. We can also rename it to `OpenAIHandler`. +/** + * URL auto-detection overview + * + * Decision tree (host and path based): + * 1) Azure AI Inference Service: + * - Detected when host ends with ".services.ai.azure.com" + * - Uses OpenAI Chat Completions API shape with a path override + * (see OPENAI_AZURE_AI_INFERENCE_PATH) when making requests. + * + * 2) Azure OpenAI: + * - Detected when host is "openai.azure.com" or ends with ".openai.azure.com" + * or when options.openAiUseAzure is explicitly true. + * - Within Azure OpenAI, the API "flavor" is chosen by URL path: + * - Responses API: + * * Path contains "/v1/responses" or ends with "/responses" + * * Also auto-detected for portal-style URLs (e.g. "/openai/responses?api-version=2025-04-01-preview") + * which itself is not valid in request, are normalized to "/openai/v1" with apiVersion "preview". + * - Chat Completions API: + * * Path contains "/chat/completions" + * - Default: + * * Falls back to Chat Completions if none of the above match. + * + * 3) Generic OpenAI-compatible endpoints: + * - Anything else (OpenAI, OpenRouter, LM Studio, vLLM, etc.) + * - Flavor is again selected by URL path as above: + * - "/v1/responses" or ending with "/responses" => Responses API + * - "/chat/completions" => Chat Completions + * - otherwise defaults to Chat Completions for backward compatibility. + * + * Examples: + * - https://api.openai.com/v1 -> Chat Completions (default) + * - https://api.openai.com/v1/responses -> Responses API + * - https://api.openai.com/v1/chat/completions -> Chat Completions + * - https://myres.openai.azure.com/openai/v1/responses?api-version=preview + * -> Azure OpenAI + Responses API + * - https://myres.openai.azure.com/openai/responses?api-version=2025-04-01-preview + * -> normalized to base /openai/v1 + apiVersion "preview" (Responses) + * - https://test.services.ai.azure.com -> Azure AI Inference Service (Chat Completions with path override) + */ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandler { protected options: ApiHandlerOptions private client: OpenAI + private lastResponseId: string | undefined private readonly providerName = "OpenAI" constructor(options: ApiHandlerOptions) { super() this.options = options + // Default to including reasoning.summary: "auto" for Responses API (parity with native provider) + if (this.options.enableGpt5ReasoningSummary === undefined) { + this.options.enableGpt5ReasoningSummary = true + } - const baseURL = this.options.openAiBaseUrl ?? "https://api.openai.com/v1" + // Normalize Azure Responses "web" URL shape if provided by users. + // Example input (Azure portal sometimes shows): + // https://{resource}.openai.azure.com/openai/responses?api-version=2025-04-01-preview + // We normalize to Azure SDK-friendly base and version: + // baseURL: https://{resource}.openai.azure.com/openai/v1 + // apiVersion: preview + const rawBaseURL = this.options.openAiBaseUrl ?? "https://api.openai.com/v1" + const azureNormalization = this._normalizeAzureResponsesBaseUrlAndVersion(rawBaseURL) + const baseURL = azureNormalization.baseURL const apiKey = this.options.openAiApiKey ?? "not-provided" - const isAzureAiInference = this._isAzureAiInference(this.options.openAiBaseUrl) - const urlHost = this._getUrlHost(this.options.openAiBaseUrl) + const isAzureAiInference = this._isAzureAiInference(baseURL) + const urlHost = this._getUrlHost(baseURL) const isAzureOpenAi = urlHost === "azure.com" || urlHost.endsWith(".azure.com") || options.openAiUseAzure const headers = { @@ -63,10 +116,23 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl } else if (isAzureOpenAi) { // Azure API shape slightly differs from the core API shape: // https://github.com/openai/openai-node?tab=readme-ov-file#microsoft-azure-openai + + // Determine if we're using the Responses API flavor for Azure (auto-detect by URL only) + const flavor = this._resolveApiFlavor(this.options.openAiBaseUrl ?? "") + const isResponsesFlavor = + flavor === "responses" || + this._isAzureOpenAiResponses(this.options.openAiBaseUrl) || + this._isAzureOpenAiResponses(baseURL) + + // Always use 'preview' for Azure Responses API calls (per user requirement) + const azureVersion = isResponsesFlavor + ? "preview" + : this.options.azureApiVersion || azureOpenAiDefaultApiVersion + this.client = new AzureOpenAI({ baseURL, apiKey, - apiVersion: this.options.azureApiVersion || azureOpenAiDefaultApiVersion, + apiVersion: azureVersion, defaultHeaders: headers, timeout, }) @@ -85,7 +151,16 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, ): ApiStream { - const { info: modelInfo, reasoning } = this.getModel() + // Gather model params (centralized: temperature, max tokens, reasoning, verbosity) + const { info: modelInfo } = this.getModel() + const openAiParams = getModelParams({ + format: "openai", + modelId: this.options.openAiModelId ?? "", + model: modelInfo, + settings: this.options, + }) + const { reasoning, reasoningEffort, verbosity } = openAiParams + const modelUrl = this.options.openAiBaseUrl ?? "" const modelId = this.options.openAiModelId ?? "" const enabledR1Format = this.options.openAiR1FormatEnabled ?? false @@ -94,6 +169,15 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl const deepseekReasoner = modelId.includes("deepseek-reasoner") || enabledR1Format const ark = modelUrl.includes(".volces.com") + // Decide API flavor (auto-detect by URL) + const flavor = this._resolveApiFlavor(modelUrl) + + // If Responses API is selected, use the Responses payload and endpoint + if (flavor === "responses") { + yield* this._handleResponsesFlavor(systemPrompt, messages, metadata, modelInfo, openAiParams) + return + } + if (modelId.includes("o1") || modelId.includes("o3") || modelId.includes("o4")) { yield* this.handleO3FamilyMessage(modelId, systemPrompt, messages) return @@ -239,6 +323,10 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl ? [systemMessage, ...convertToSimpleMessages(messages)] : [systemMessage, ...convertToOpenAiMessages(messages)], } + // Include reasoning_effort for Chat Completions when available + if (reasoning) { + Object.assign(requestOptions, reasoning) + } // Add max_tokens if needed this.addMaxTokensIfNeeded(requestOptions, modelInfo) @@ -282,9 +370,77 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl async completePrompt(prompt: string): Promise { try { const isAzureAiInference = this._isAzureAiInference(this.options.openAiBaseUrl) + const flavor = this._resolveApiFlavor(this.options.openAiBaseUrl ?? "") const model = this.getModel() const modelInfo = model.info + // Use Responses API when selected (non-streaming convenience method) + if (flavor === "responses") { + // Build structured single-turn input + const payload: Record = { + model: model.id, + input: [ + { + role: "user", + content: [{ type: "input_text", text: prompt }], + }, + ], + stream: false, + store: false, + } + + // Reasoning effort (support "minimal"; include summary: "auto" unless disabled) + const effort = (this.options.reasoningEffort || model.reasoningEffort) as + | "minimal" + | "low" + | "medium" + | "high" + | undefined + if (this.options.enableReasoningEffort && effort) { + ;( + payload as { reasoning?: { effort: "minimal" | "low" | "medium" | "high"; summary?: "auto" } } + ).reasoning = { + effort, + ...(this.options.enableGpt5ReasoningSummary !== false ? { summary: "auto" as const } : {}), + } + } + + // Temperature if supported and set + if (modelInfo.supportsTemperature !== false && this.options.modelTemperature !== undefined) { + ;(payload as Record).temperature = this.options.modelTemperature + } + + // Verbosity via text.verbosity - include only when supported + if (this.options.verbosity && modelInfo.supportsVerbosity) { + ;(payload as { text?: { verbosity: "low" | "medium" | "high" } }).text = { + verbosity: this.options.verbosity as "low" | "medium" | "high", + } + } + + // max_output_tokens + if (this.options.includeMaxTokens === true) { + ;(payload as Record).max_output_tokens = + this.options.modelMaxTokens || modelInfo.maxTokens + } + + const response = await this._responsesCreateWithRetries(payload, { + usedArrayInput: true, + lastUserMessage: undefined, + previousId: undefined, + systemPrompt: "", + messages: [], + }) + try { + const respId = (response as { id?: unknown } | undefined)?.id + if (typeof respId === "string" && respId.length > 0) { + this.lastResponseId = respId + } + } catch { + // ignore + } + return this._extractResponsesText(response) ?? "" + } + const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = { model: model.id, messages: [{ role: "user", content: prompt }], @@ -420,16 +576,143 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl } } + /** + * Detects Grok xAI endpoints. + * - Returns true when the host contains "x.ai" (e.g., "api.x.ai"). + * - Used to omit stream_options for streaming requests because Grok may not support them. + * + * Examples: + * - https://api.x.ai/v1 -> true + * - https://api.openai.com/v1 -> false + */ private _isGrokXAI(baseUrl?: string): boolean { const urlHost = this._getUrlHost(baseUrl) return urlHost.includes("x.ai") } + /** + * Detects Azure AI Inference Service endpoints (distinct from Azure OpenAI). + * - Returns true when host ends with ".services.ai.azure.com". + * - These endpoints require a special path override when calling the Chat Completions API. + * + * Examples: + * - https://myenv.services.ai.azure.com -> true + * - https://myres.openai.azure.com -> false (this is Azure OpenAI, not AI Inference) + */ private _isAzureAiInference(baseUrl?: string): boolean { const urlHost = this._getUrlHost(baseUrl) return urlHost.endsWith(".services.ai.azure.com") } + /** + * Detects Azure OpenAI "Responses API" URLs by host and path. + * - Host must be "openai.azure.com" or end with ".openai.azure.com" + * - Path may be one of: + * • "/openai/v1/responses" (preferred v1 path) + * • "/openai/responses" (portal/legacy style) + * • any path ending with "/responses" + * - Trailing slashes are trimmed before matching. + * + * This is used to favor the Responses API flavor on Azure OpenAI when the base URL already + * points to a Responses path. + * + * Examples (true): + * - https://myres.openai.azure.com/openai/v1/responses?api-version=preview + * - https://myres.openai.azure.com/openai/responses?api-version=2025-04-01-preview + * - https://openai.azure.com/openai/v1/responses + * + * Examples (false): + * - https://myres.openai.azure.com/openai/v1/chat/completions + * - https://api.openai.com/v1/responses (not an Azure host) + */ + private _isAzureOpenAiResponses(baseUrl?: string): boolean { + try { + if (!baseUrl) return false + const u = new URL(baseUrl) + const host = u.host + const path = u.pathname.replace(/\/+$/, "") + if (!(host.endsWith(".openai.azure.com") || host === "openai.azure.com")) return false + return ( + path.endsWith("/openai/v1/responses") || + path.endsWith("/openai/responses") || + path.endsWith("/responses") + ) + } catch { + return false + } + } + + /** + * Normalizes Azure OpenAI "Responses" portal URLs to an SDK-friendly base and version. + * + * Why: + * - The Azure portal often presents a non-v1 Responses endpoint such as: + * https://{res}.openai.azure.com/openai/responses?api-version=2025-04-01-preview + * which is not the ideal base for SDK clients. We convert it to: + * baseURL = https://{res}.openai.azure.com/openai/v1 + * apiVersionOverride = "preview" + * + * What it does: + * - If the input is an Azure OpenAI host and its path is exactly "/openai/responses" + * with api-version=2025-04-01-preview, we: + * • return { baseURL: "https://{host}/openai/v1", apiVersionOverride: "preview" } + * - If the input is already "/openai/v1/responses", we similarly normalize the base to "/openai/v1" + * and set apiVersionOverride to "preview". + * - Otherwise, returns the original URL unchanged. + * + * Scope: + * - Only applies to Azure OpenAI hosts ("openai.azure.com" or "*.openai.azure.com"). + * - Non-Azure URLs or already SDK-friendly bases are returned as-is. + * + * Examples: + * - In: https://sample.openai.azure.com/openai/responses?api-version=2025-04-01-preview + * Out: baseURL=https://sample.openai.azure.com/openai/v1, apiVersionOverride="preview" + * + * - In: https://sample.openai.azure.com/openai/v1/responses?api-version=preview + * Out: baseURL=https://sample.openai.azure.com/openai/v1, apiVersionOverride="preview" + * + * - In: https://api.openai.com/v1/responses + * Out: baseURL unchanged (non-Azure) + */ + private _normalizeAzureResponsesBaseUrlAndVersion(inputBaseUrl: string): { + baseURL: string + apiVersionOverride?: string + } { + try { + const url = new URL(inputBaseUrl) + const isAzureHost = url.hostname.endsWith(".openai.azure.com") || url.hostname === "openai.azure.com" + const pathname = (url.pathname || "").replace(/\/+$/, "") + + // 1) Azure portal "non-v1" shape: + // https://{res}.openai.azure.com/openai/responses?api-version=2025-04-01-preview + const isPortalNonV1 = + isAzureHost && + pathname === "/openai/responses" && + url.searchParams.get("api-version") === "2025-04-01-preview" + + if (isPortalNonV1) { + const normalized = `${url.protocol}//${url.host}/openai/v1` + const ver = "preview" + return { baseURL: normalized, apiVersionOverride: ver } + } + + // 2) v1 responses path passed as base URL: + // https://{res}.openai.azure.com/openai/v1/responses?api-version=preview + // Normalize base to '/openai/v1' and force apiVersion 'preview' for Azure Responses v1 preview. + const isV1ResponsesPath = isAzureHost && pathname === "/openai/v1/responses" + if (isV1ResponsesPath) { + const normalized = `${url.protocol}//${url.host}/openai/v1` + const ver = "preview" + return { baseURL: normalized, apiVersionOverride: ver } + } + + // If it's already '/openai/v1' or any other valid path, keep as-is + return { baseURL: inputBaseUrl } + } catch { + return { baseURL: inputBaseUrl } + } + } + /** * Adds max_completion_tokens to the request body if needed based on provider configuration * Note: max_tokens is deprecated in favor of max_completion_tokens as per OpenAI documentation @@ -448,6 +731,458 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl requestOptions.max_completion_tokens = this.options.modelMaxTokens || modelInfo.maxTokens } } + + // --- Responses helpers --- + + private async *_handleResponsesFlavor( + systemPrompt: string, + messages: Anthropic.Messages.MessageParam[], + metadata: ApiHandlerCreateMessageMetadata | undefined, + modelInfo: ModelInfo, + openAiParams: any, + ): ApiStream { + const modelId = this.options.openAiModelId ?? "" + const nonStreaming = !(this.options.openAiStreamingEnabled ?? true) + + // Determine conversation continuity id (skip when explicitly suppressed) + const previousId = metadata?.suppressPreviousResponseId + ? undefined + : (metadata?.previousResponseId ?? this.lastResponseId) + + // Prepare Responses API input per test expectations: + // - Non-minimal text-only => single string with Developer/User lines + // - Minimal (previous_response_id) => single string "User: ..." when last user has no images + // - Image cases => structured array; inject Developer preface as first item (non-minimal only) + const lastUserMessage = [...messages].reverse().find((m) => m.role === "user") + const lastUserHasImages = + !!lastUserMessage && + Array.isArray(lastUserMessage.content) && + lastUserMessage.content.some((b: any) => (b as any)?.type === "image") + const minimalInputMode = Boolean(previousId) + + let inputPayload: unknown + if (minimalInputMode && lastUserMessage) { + // Minimal mode: only latest user turn + if (lastUserHasImages) { + inputPayload = this._toResponsesInput([lastUserMessage]) + } else { + inputPayload = this._formatResponsesSingleMessage(lastUserMessage, true) + } + } else if (lastUserHasImages && lastUserMessage) { + // Initial turn with images: include Developer preface and minimal context + const lastAssistantMessage = [...messages].reverse().find((m) => m.role === "assistant") + const messagesForArray = messages.filter((m) => { + if (m.role === "assistant") { + return lastAssistantMessage ? m === lastAssistantMessage : false + } + if (m.role === "user") { + const hasImage = + Array.isArray(m.content) && m.content.some((b: any) => (b as any)?.type === "image") + return hasImage || m === lastUserMessage + } + return false + }) + + const arrayInput = this._toResponsesInput(messagesForArray) + const developerPreface = { + role: "user" as const, + content: [{ type: "input_text" as const, text: `Developer: ${systemPrompt}` }], + } + inputPayload = [developerPreface, ...arrayInput] + } else { + // Pure text history: compact transcript string + inputPayload = this._formatResponsesInput(systemPrompt, messages) + } + + // Build base payload: use top-level instructions; default to storing unless explicitly disabled + const basePayload: Record = { + model: modelId, + input: inputPayload, + ...(previousId ? { previous_response_id: previousId } : {}), + instructions: systemPrompt, + store: metadata?.store !== false, + } + + // Reasoning effort (support "minimal"; include summary: "auto" unless disabled) + if (this.options.enableReasoningEffort && (this.options.reasoningEffort || openAiParams?.reasoningEffort)) { + const effort = (this.options.reasoningEffort || openAiParams?.reasoningEffort) as + | "minimal" + | "low" + | "medium" + | "high" + | undefined + if (effort) { + ;( + basePayload as { reasoning?: { effort: "minimal" | "low" | "medium" | "high"; summary?: "auto" } } + ).reasoning = { + effort, + ...(this.options.enableGpt5ReasoningSummary !== false ? { summary: "auto" as const } : {}), + } + } + } + + // Temperature: include only if model supports it + const deepseekReasoner = modelId.includes("deepseek-reasoner") || (this.options.openAiR1FormatEnabled ?? false) + if (modelInfo.supportsTemperature !== false) { + if (this.options.modelTemperature !== undefined) { + ;(basePayload as Record).temperature = this.options.modelTemperature + } else if (deepseekReasoner) { + ;(basePayload as Record).temperature = DEEP_SEEK_DEFAULT_TEMPERATURE + } + } + + // Verbosity: include when provided; retry logic removes it on 400 + if (this.options.verbosity) { + ;(basePayload as { text?: { verbosity: "low" | "medium" | "high" } }).text = { + verbosity: this.options.verbosity as "low" | "medium" | "high", + } + } + + // Always include max_output_tokens for Responses API to cap output length + const reservedMax = openAiParams?.maxTokens + ;(basePayload as Record).max_output_tokens = + this.options.modelMaxTokens || reservedMax || modelInfo.maxTokens + + // Non-streaming path + if (nonStreaming) { + const response = await this._responsesCreateWithRetries(basePayload, { + usedArrayInput: Array.isArray(inputPayload), + lastUserMessage, + previousId, + systemPrompt, + messages, + }) + yield* this._yieldResponsesResult(response, modelInfo) + return + } + + // Streaming path (auto-fallback to non-streaming result if provider ignores stream flag) + const streamingPayload: Record = { ...basePayload, stream: true } + const maybeStream = await this._responsesCreateWithRetries(streamingPayload, { + usedArrayInput: Array.isArray(inputPayload), + lastUserMessage, + previousId, + systemPrompt, + messages, + }) + + const isAsyncIterable = (obj: unknown): obj is AsyncIterable => + typeof (obj as AsyncIterable)[Symbol.asyncIterator] === "function" + + if (isAsyncIterable(maybeStream)) { + for await (const chunk of handleResponsesStream(maybeStream, { + onResponseId: (id) => { + this.lastResponseId = id + }, + })) { + yield chunk + } + } else { + // Some providers may ignore the stream flag and return a complete response + yield* this._yieldResponsesResult(maybeStream, modelInfo) + } + } + + /** + * Determines which OpenAI-compatible API flavor to use based on the URL path. + * - This is purely path-based and provider-agnostic (works for OpenAI, Azure OpenAI after normalization, etc.). + * + * Rules: + * - If path contains "/v1/responses" OR ends with "/responses" => "responses" + * - Else if path contains "/chat/completions" => "chat" + * - Else default to "chat" for backward compatibility + * + * Notes: + * - Trailing slashes are not required to match; we rely on substring checks. + * - Azure "portal" style URLs are normalized beforehand where applicable. + * + * Examples: + * - https://api.openai.com/v1/responses -> "responses" + * - https://api.openai.com/v1/chat/completions -> "chat" + * - https://myres.openai.azure.com/openai/v1 -> "chat" (default) + * - https://myres.openai.azure.com/openai/v1/responses -> "responses" + */ + private _resolveApiFlavor(baseUrl: string): "responses" | "chat" { + // Auto-detect by URL path + const url = this._safeParseUrl(baseUrl) + const path = url?.pathname || "" + if (path.includes("/v1/responses") || path.endsWith("/responses")) { + return "responses" + } + if (path.includes("/chat/completions")) { + return "chat" + } + // Default to Chat Completions for backward compatibility + return "chat" + } + + private _safeParseUrl(input?: string): URL | undefined { + try { + if (!input) return undefined + return new URL(input) + } catch { + return undefined + } + } + + private _toResponsesInput(anthropicMessages: Anthropic.Messages.MessageParam[]): Array<{ + role: "user" | "assistant" + content: Array< + | { type: "input_text"; text: string } + | { type: "input_image"; image_url: string } + | { type: "output_text"; text: string } + > + }> { + const input: Array<{ + role: "user" | "assistant" + content: Array< + | { type: "input_text"; text: string } + | { type: "input_image"; image_url: string } + | { type: "output_text"; text: string } + > + }> = [] + + for (const msg of anthropicMessages) { + const role = msg.role === "assistant" ? "assistant" : "user" + const parts: Array< + | { type: "input_text"; text: string } + | { type: "input_image"; image_url: string } + | { type: "output_text"; text: string } + > = [] + + if (typeof msg.content === "string") { + if (msg.content.length > 0) { + if (role === "assistant") { + parts.push({ type: "output_text", text: msg.content }) + } else { + parts.push({ type: "input_text", text: msg.content }) + } + } + } else if (Array.isArray(msg.content)) { + for (const block of msg.content) { + if (block.type === "text") { + if (role === "assistant") { + parts.push({ type: "output_text", text: block.text }) + } else { + parts.push({ type: "input_text", text: block.text }) + } + } else if (block.type === "image") { + // Images are treated as user input; ignore images on assistant turns + if (role === "user") { + parts.push({ + type: "input_image", + image_url: `data:${block.source.media_type};base64,${block.source.data}`, + }) + } + } + // tool_use/tool_result are omitted in this minimal mapping (can be added as needed) + } + } + + if (parts.length > 0) { + input.push({ role, content: parts }) + } + } + return input + } + + private _extractResponsesText(response: any): string | undefined { + // Prefer the simple output_text if present, otherwise attempt to parse output array + if (response?.output_text) return response.output_text + if (Array.isArray(response?.output)) { + // Find assistant message with output_text + for (const item of response.output) { + if (item?.type === "message" && Array.isArray(item.content)) { + const textPart = item.content.find( + (c: any) => c.type === "output_text" && typeof c.text === "string", + ) + if (textPart?.text) return textPart.text + } + } + } + return undefined + } + + private _isInputTextInvalidError(err: unknown): boolean { + if (err == null || typeof err !== "object") return false + const anyErr = err as { + status?: unknown + response?: { status?: unknown } + message?: unknown + error?: { message?: unknown } + } + const statusRaw = anyErr.status ?? anyErr.response?.status + const status = typeof statusRaw === "number" ? statusRaw : Number(statusRaw) + const msgRaw = (anyErr.message ?? anyErr.error?.message ?? "").toString().toLowerCase() + return status === 400 && msgRaw.includes("invalid value") && msgRaw.includes("input_text") + } + + /** + * Centralized Responses.create with one-shot retries for common provider errors: + * - 400 "Previous response ... not found" -> drop previous_response_id and retry + * - 400 unknown/unsupported "text.verbosity" -> remove text and retry + * - 400 invalid value for input_text (Azure) -> rebuild single-message string input and retry + * Returns either an AsyncIterable (streaming) or a full response object (non-streaming). + */ + private async _responsesCreateWithRetries( + payload: Record, + opts: { + usedArrayInput: boolean + lastUserMessage?: Anthropic.Messages.MessageParam + previousId?: string + systemPrompt: string + messages: Anthropic.Messages.MessageParam[] + }, + ): Promise { + const create = (body: Record) => { + const hasResponsesCreate = ( + obj: unknown, + ): obj is { responses: { create: (b: Record) => Promise } } => { + if (obj == null || typeof obj !== "object") return false + const responses = (obj as Record).responses + if (responses == null || typeof responses !== "object") return false + return typeof (responses as Record).create === "function" + } + if (!hasResponsesCreate(this.client)) { + throw new Error("Responses API not available on client") + } + return this.client.responses.create(body) + } + + try { + return await create(payload) + } catch (err: unknown) { + // Retry without previous_response_id if server rejects it + if (opts.previousId && this._isPreviousResponseNotFoundError(err)) { + const { previous_response_id: _omitPrev, ...withoutPrev } = payload as { + previous_response_id?: unknown + [key: string]: unknown + } + this.lastResponseId = undefined + return await create(withoutPrev) + } + + // Graceful downgrade if verbosity is rejected by server + if ("text" in payload && this._isVerbosityUnsupportedError(err)) { + const { text: _omit, ...withoutVerbosity } = payload as { text?: unknown } & Record + return await create(withoutVerbosity) + } + + // Azure-specific fallback when array input is rejected + if (opts.usedArrayInput && this._isInputTextInvalidError(err)) { + const fallbackInput = + opts.previousId && opts.lastUserMessage + ? this._formatResponsesSingleMessage(opts.lastUserMessage, true) + : this._formatResponsesInput(opts.systemPrompt, opts.messages) + + const retryPayload: Record = { + ...payload, + input: fallbackInput, + } + return await create(retryPayload) + } + + throw err + } + } + private async *_yieldResponsesResult(response: any, modelInfo: ModelInfo): ApiStream { + // Capture response id for continuity when present + try { + const respId = (response as { id?: unknown } | undefined)?.id + if (typeof respId === "string" && respId.length > 0) { + this.lastResponseId = respId + } + } catch { + // ignore + } + + const text = this._extractResponsesText(response) ?? "" + if (text) { + yield { type: "text", text } + } + // Translate usage fields if present + const usage = response?.usage + if (usage) { + yield { + type: "usage", + inputTokens: usage.input_tokens || usage.prompt_tokens || 0, + outputTokens: usage.output_tokens || usage.completion_tokens || 0, + cacheWriteTokens: usage.cache_creation_input_tokens || undefined, + cacheReadTokens: usage.cache_read_input_tokens || undefined, + } + } + } + + private _isVerbosityUnsupportedError(err: unknown): boolean { + if (err == null || typeof err !== "object") return false + + // you had hasOwnProperty("message") twice — likely a typo + if (!("message" in err)) return false + + const msg = String((err as { message?: unknown }).message ?? "").toLowerCase() + + const rawStatus = "status" in err ? (err as { status?: unknown }).status : undefined + const status = typeof rawStatus === "number" ? rawStatus : Number(rawStatus) + + return ( + status === 400 && + (msg.includes("verbosity") || msg.includes("unknown parameter") || msg.includes("unsupported")) + ) + } + + private _isPreviousResponseNotFoundError(err: unknown): boolean { + if (err == null || typeof err !== "object") return false + const anyErr = err as { + status?: unknown + response?: { status?: unknown } + message?: unknown + error?: { message?: unknown } + } + const statusRaw = anyErr.status ?? anyErr.response?.status + const status = typeof statusRaw === "number" ? statusRaw : Number(statusRaw) + const msg = (anyErr.message ?? anyErr.error?.message ?? "").toString().toLowerCase() + return status === 400 && (msg.includes("previous response") || msg.includes("not found")) + } + + // ---- Responses input formatting (align with openai-native.ts) ---- + + private _formatResponsesInput(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): string { + // Developer role for system prompt + let formattedInput = `Developer: ${systemPrompt}\n\n` + for (const message of messages) { + const role = message.role === "user" ? "User" : "Assistant" + if (typeof message.content === "string") { + formattedInput += `${role}: ${message.content}\n\n` + } else if (Array.isArray(message.content)) { + const textContent = message.content + .filter((block) => block.type === "text") + .map((block) => block.text) + .join("\n") + if (textContent) { + formattedInput += `${role}: ${textContent}\n\n` + } + } + } + return formattedInput.trim() + } + + private _formatResponsesSingleMessage( + message: Anthropic.Messages.MessageParam, + includeRole: boolean = true, + ): string { + const role = includeRole ? (message.role === "user" ? "User" : "Assistant") + ": " : "" + if (typeof message.content === "string") { + return `${role}${message.content}` + } + if (Array.isArray(message.content)) { + const textContent = message.content + .filter((block) => block.type === "text") + .map((block) => block.text) + .join("\n") + return `${role}${textContent}` + } + return role + } } export async function getOpenAiModels(baseUrl?: string, apiKey?: string, openAiHeaders?: Record) { diff --git a/src/api/transform/responses-stream.ts b/src/api/transform/responses-stream.ts new file mode 100644 index 00000000000..f0152c9ec36 --- /dev/null +++ b/src/api/transform/responses-stream.ts @@ -0,0 +1,263 @@ +import type { ApiStreamChunk } from "./stream" + +/** + * Minimal, typed streaming handler for OpenAI/Azure Responses API streams. + * Consumes an AsyncIterable of events and yields ApiStreamChunk items. + * + * Notes: + * - We intentionally handle only the core, stable event shapes that we already + * use in openai-native, to keep the surface area small and predictable. + * - If the event format changes, extend the type guards below conservatively. + */ +export async function* handleResponsesStream( + stream: AsyncIterable, + options?: { onResponseId?: (id: string) => void }, +): AsyncGenerator { + let lastUsage: ResponseUsage | undefined + + for await (const event of stream) { + // Surface response.id to callers when available (for conversation continuity) + if (isObject(event)) { + const resp = (event as Record).response + if (isObject(resp)) { + const rid = (resp as Record).id + if (typeof rid === "string") { + options?.onResponseId?.(rid) + } + } + } + // 1) Streaming text deltas + if (isTextDelta(event)) { + const e = event as TextDeltaEvent + if (e.delta != null) { + yield { type: "text", text: String(e.delta) } + } + continue + } + + // 2) Streaming reasoning deltas + if (isReasoningDelta(event)) { + const e = event as ReasoningDeltaEvent + if (e.delta != null) { + yield { type: "reasoning", text: String(e.delta) } + } + continue + } + + // 2.1) Audio transcript deltas (map to text) + if (isAudioTranscriptDelta(event)) { + const e = event as AudioTranscriptDeltaEvent + if (e.delta != null) { + yield { type: "text", text: String(e.delta) } + } + continue + } + + // 3) Refusal deltas (map to text with prefix, matching native handler behavior) + if (isRefusalDelta(event)) { + const e = event as RefusalDeltaEvent + if (e.delta != null) { + yield { type: "text", text: `[Refusal] ${String(e.delta)}` } + } + continue + } + + // 4) Output-item added (alternative carrier for text/reasoning) + if (isOutputItemAdded(event)) { + const item = (event as OutputItemAddedEvent).item + if (item) { + if (item.type === "text" && typeof item.text === "string") { + yield { type: "text", text: item.text } + } else if (item.type === "reasoning" && typeof item.text === "string") { + yield { type: "reasoning", text: item.text } + } else if (item.type === "message" && Array.isArray(item.content)) { + for (const content of item.content) { + // Some servers use "text"; others use "output_text" + if ( + (content?.type === "text" || content?.type === "output_text") && + typeof content?.text === "string" + ) { + yield { type: "text", text: content.text } + } + } + } else if (typeof item.text === "string") { + // Fallback: emit item.text even if item.type is unknown (matches native handler tolerance) + yield { type: "text", text: item.text } + } + } + continue + } + + // 4.1) Content part added (SDK alternative format) + if (isContentPartAdded(event)) { + const part = (event as ContentPartAddedEvent).part + if (part && part.type === "text" && typeof part.text === "string") { + yield { type: "text", text: part.text } + } + continue + } + + // 5) Fallback: some implementations (or older shapes) supply choices[0].delta.content + const content = getChoiceDeltaContent(event) + if (content) { + yield { type: "text", text: content } + } + + // 6) Track usage whenever present + const usage = extractUsage(event) + if (usage) { + lastUsage = usage + } + + // 7) Completion/done events - emit usage if we have it + if (isDoneEvent(event)) { + const u = lastUsage + if (u && hasAnyUsage(u)) { + yield makeUsageChunk(u) + } + } + } +} + +/** Types, guards, and helpers */ + +type ResponseUsage = { + input_tokens?: number + output_tokens?: number + prompt_tokens?: number + completion_tokens?: number + cache_creation_input_tokens?: number + cache_read_input_tokens?: number + prompt_tokens_details?: { cached_tokens?: number } +} + +type TextDeltaEvent = { + type: "response.text.delta" | "response.output_text.delta" + delta?: unknown +} + +type ReasoningDeltaEvent = { + type: + | "response.reasoning.delta" + | "response.reasoning_text.delta" + | "response.reasoning_summary.delta" + | "response.reasoning_summary_text.delta" + delta?: unknown +} + +type RefusalDeltaEvent = { + type: "response.refusal.delta" + delta?: unknown +} + +type OutputItemAddedEvent = { + type: "response.output_item.added" + item?: { + type?: string + text?: unknown + content?: Array<{ type?: string; text?: unknown }> + } +} + +type DoneEvent = { + type: "response.done" | "response.completed" +} + +type AudioTranscriptDeltaEvent = { + type: "response.audio_transcript.delta" + delta?: unknown +} + +type ContentPartAddedEvent = { + type: "response.content_part.added" + part?: { + type?: string + text?: unknown + } +} + +function isObject(value: unknown): value is Record { + return typeof value === "object" && value !== null +} + +function isTextDelta(event: unknown): event is TextDeltaEvent { + return ( + isObject(event) && + typeof (event as Record).type === "string" && + (((event as Record).type as string) === "response.text.delta" || + ((event as Record).type as string) === "response.output_text.delta") + ) +} + +function isReasoningDelta(event: unknown): event is ReasoningDeltaEvent { + if (!isObject(event)) return false + const t = (event as Record).type + return ( + t === "response.reasoning.delta" || + t === "response.reasoning_text.delta" || + t === "response.reasoning_summary.delta" || + t === "response.reasoning_summary_text.delta" + ) +} + +function isRefusalDelta(event: unknown): event is RefusalDeltaEvent { + return isObject(event) && (event as Record).type === "response.refusal.delta" +} + +function isOutputItemAdded(event: unknown): event is OutputItemAddedEvent { + return isObject(event) && (event as Record).type === "response.output_item.added" +} + +function isAudioTranscriptDelta(event: unknown): event is AudioTranscriptDeltaEvent { + return isObject(event) && (event as Record).type === "response.audio_transcript.delta" +} + +function isContentPartAdded(event: unknown): event is ContentPartAddedEvent { + return isObject(event) && (event as Record).type === "response.content_part.added" +} + +function isDoneEvent(event: unknown): event is DoneEvent { + if (!isObject(event)) return false + const t = (event as Record).type + return t === "response.done" || t === "response.completed" +} + +function getChoiceDeltaContent(event: unknown): string | undefined { + if (!isObject(event)) return undefined + const choices = (event as Record).choices + if (!Array.isArray(choices) || choices.length === 0) return undefined + const first = choices[0] + if (!isObject(first)) return undefined + const delta = (first as Record).delta + if (!isObject(delta)) return undefined + const content = (delta as Record).content + if (content == null) return undefined + return String(content) +} + +function extractUsage(event: unknown): ResponseUsage | undefined { + if (!isObject(event)) return undefined + const resp = (event as Record).response + if (isObject(resp) && isObject((resp as Record).usage)) { + return (resp as Record).usage as ResponseUsage + } + const usage = (event as Record).usage + if (isObject(usage)) { + return usage as ResponseUsage + } + return undefined +} + +function hasAnyUsage(usage: ResponseUsage): boolean { + return Boolean(usage.input_tokens || usage.output_tokens || usage.prompt_tokens || usage.completion_tokens) +} + +function makeUsageChunk(usage: ResponseUsage): ApiStreamChunk { + return { + type: "usage", + inputTokens: usage.input_tokens ?? usage.prompt_tokens ?? 0, + outputTokens: usage.output_tokens ?? usage.completion_tokens ?? 0, + cacheWriteTokens: usage.cache_creation_input_tokens ?? undefined, + cacheReadTokens: usage.cache_read_input_tokens ?? usage.prompt_tokens_details?.cached_tokens ?? undefined, + } +} diff --git a/src/package.json b/src/package.json index d9e986feac9..ccd16a9f869 100644 --- a/src/package.json +++ b/src/package.json @@ -468,7 +468,7 @@ "node-cache": "^5.1.2", "node-ipc": "^12.0.0", "ollama": "^0.5.17", - "openai": "^5.12.2", + "openai": "^5.15.0", "os-name": "^6.0.0", "p-limit": "^6.2.0", "p-wait-for": "^5.0.2", diff --git a/webview-ui/src/components/settings/providers/OpenAICompatible.tsx b/webview-ui/src/components/settings/providers/OpenAICompatible.tsx index 736b0253c43..f67e23789cf 100644 --- a/webview-ui/src/components/settings/providers/OpenAICompatible.tsx +++ b/webview-ui/src/components/settings/providers/OpenAICompatible.tsx @@ -22,6 +22,7 @@ import { inputEventTransform, noTransform } from "../transforms" import { ModelPicker } from "../ModelPicker" import { R1FormatSetting } from "../R1FormatSetting" import { ThinkingBudget } from "../ThinkingBudget" +import { Verbosity } from "../Verbosity" type OpenAICompatibleProps = { apiConfiguration: ProviderSettings @@ -40,6 +41,7 @@ export const OpenAICompatible = ({ const [azureApiVersionSelected, setAzureApiVersionSelected] = useState(!!apiConfiguration?.azureApiVersion) const [openAiLegacyFormatSelected, setOpenAiLegacyFormatSelected] = useState(!!apiConfiguration?.openAiLegacyFormat) + const [verbositySelected, setVerbositySelected] = useState(!!apiConfiguration?.verbosity) const [openAiModels, setOpenAiModels] = useState | null>(null) @@ -282,6 +284,27 @@ export const OpenAICompatible = ({ /> )} +
+ { + setVerbositySelected(checked) + if (!checked) { + setApiConfigurationField("verbosity", undefined) + } else if (!apiConfiguration.verbosity) { + setApiConfigurationField("verbosity", "medium") + } + }}> + {t("settings:providers.verbosity.label")} + + {verbositySelected && ( + + )} +
{t("settings:providers.customModel.capabilities")}