diff --git a/apps/web-roo-code/src/app/privacy/page.tsx b/apps/web-roo-code/src/app/privacy/page.tsx
index bb132651f76..b0efdbc4440 100644
--- a/apps/web-roo-code/src/app/privacy/page.tsx
+++ b/apps/web-roo-code/src/app/privacy/page.tsx
@@ -86,8 +86,8 @@ export default function Privacy() {
 								Your source code does not transit Roo Code servers unless you explicitly choose Roo Code
 								as a model provider (proxy mode).
 							</strong>{" "}
-							When Roo Code Cloud is your model provider, your code briefly transits Roo Code servers only to
-							forward it to the upstream model, is not stored, and is deleted immediately after
+							When Roo Code Cloud is your model provider, your code briefly transits Roo Code servers only
+							to forward it to the upstream model, is not stored, and is deleted immediately after
 							forwarding. Otherwise, your code is sent <strong>directly</strong>—via client‑to‑provider
 							TLS—to the model you select. Roo Code never stores, inspects, or trains on your code.
 						</li>
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index f9ccd8512ad..7779708af00 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -720,8 +720,8 @@ importers:
         specifier: ^0.5.17
         version: 0.5.17
       openai:
-        specifier: ^5.12.2
-        version: 5.12.2(ws@8.18.3)(zod@3.25.61)
+        specifier: ^5.15.0
+        version: 5.15.0(ws@8.18.3)(zod@3.25.61)
       os-name:
         specifier: ^6.0.0
         version: 6.1.0
@@ -7976,8 +7976,8 @@ packages:
     resolution: {integrity: sha512-cxN6aIDPz6rm8hbebcP7vrQNhvRcveZoJU72Y7vskh4oIm+BZwBECnx5nTmrlres1Qapvx27Qo1Auukpf8PKXw==}
     engines: {node: '>=18'}
 
-  openai@5.12.2:
-    resolution: {integrity: sha512-xqzHHQch5Tws5PcKR2xsZGX9xtch+JQFz5zb14dGqlshmmDAFBFEWmeIpf7wVqWV+w7Emj7jRgkNJakyKE0tYQ==}
+  openai@5.15.0:
+    resolution: {integrity: sha512-kcUdws8K/A8m02I+IqFBwO51gS+87GP89yWEufGbzEi8anBz4FB/bti2QxaJdGwwY4mwJGzx85XO7TuL/Tpu1w==}
     hasBin: true
     peerDependencies:
       ws: ^8.18.0
@@ -18213,7 +18213,7 @@ snapshots:
       is-inside-container: 1.0.0
       is-wsl: 3.1.0
 
-  openai@5.12.2(ws@8.18.3)(zod@3.25.61):
+  openai@5.15.0(ws@8.18.3)(zod@3.25.61):
     optionalDependencies:
       ws: 8.18.3
       zod: 3.25.61
diff --git a/src/api/providers/__tests__/openai.spec.ts b/src/api/providers/__tests__/openai.spec.ts
index 14ed35430a5..0affafdabff 100644
--- a/src/api/providers/__tests__/openai.spec.ts
+++ b/src/api/providers/__tests__/openai.spec.ts
@@ -4,27 +4,65 @@ import { OpenAiHandler, getOpenAiModels } from "../openai"
 import { ApiHandlerOptions } from "../../../shared/api"
 import { Anthropic } from "@anthropic-ai/sdk"
 import OpenAI from "openai"
-import { openAiModelInfoSaneDefaults } from "@roo-code/types"
 import { Package } from "../../../shared/package"
 import axios from "axios"
 
+type ErrorWithStatus = Error & { status?: number }
+
+function getMockCallsOf(fn: unknown): any[] {
+	const isObj = (v: unknown): v is Record<string, unknown> => typeof v === "object" && v !== null
+	if (isObj(fn) || typeof fn === "function") {
+		const rec = fn as Record<string, unknown>
+		const mock = rec["mock"]
+		if (isObj(mock)) {
+			const calls = mock["calls"]
+			if (Array.isArray(calls)) return calls
+		}
+	}
+	return []
+}
 const mockCreate = vitest.fn()
+const mockResponsesCreate = vitest.fn()
 
 vitest.mock("openai", () => {
 	const mockConstructor = vitest.fn()
-	return {
-		__esModule: true,
-		default: mockConstructor.mockImplementation(() => ({
-			chat: {
-				completions: {
-					create: mockCreate.mockImplementation(async (options) => {
-						if (!options.stream) {
-							return {
-								id: "test-completion",
+	const makeClient = () => ({
+		chat: {
+			completions: {
+				create: mockCreate.mockImplementation(async (options) => {
+					if (!options.stream) {
+						return {
+							id: "test-completion",
+							choices: [
+								{
+									message: { role: "assistant", content: "Test response", refusal: null },
+									finish_reason: "stop",
+									index: 0,
+								},
+							],
+							usage: {
+								prompt_tokens: 10,
+								completion_tokens: 5,
+								total_tokens: 15,
+							},
+						}
+					}
+
+					return {
+						[Symbol.asyncIterator]: async function* () {
+							yield {
 								choices: [
 									{
-										message: { role: "assistant", content: "Test response", refusal: null },
-										finish_reason: "stop",
+										delta: { content: "Test response" },
+										index: 0,
+									},
+								],
+								usage: null,
+							}
+							yield {
+								choices: [
+									{
+										delta: {},
 										index: 0,
 									},
 								],
@@ -34,38 +72,30 @@ vitest.mock("openai", () => {
 									total_tokens: 15,
 								},
 							}
-						}
-
-						return {
-							[Symbol.asyncIterator]: async function* () {
-								yield {
-									choices: [
-										{
-											delta: { content: "Test response" },
-											index: 0,
-										},
-									],
-									usage: null,
-								}
-								yield {
-									choices: [
-										{
-											delta: {},
-											index: 0,
-										},
-									],
-									usage: {
-										prompt_tokens: 10,
-										completion_tokens: 5,
-										total_tokens: 15,
-									},
-								}
-							},
-						}
-					}),
-				},
+						},
+					}
+				}),
 			},
-		})),
+		},
+		responses: {
+			create: mockResponsesCreate.mockImplementation(async (options) => {
+				// Default happy-path mock for non-streaming Responses API
+				return {
+					id: "test-response",
+					output_text: "Test response",
+					usage: {
+						input_tokens: 10,
+						output_tokens: 5,
+						total_tokens: 15,
+					},
+				}
+			}),
+		},
+	})
+	return {
+		__esModule: true,
+		default: mockConstructor.mockImplementation((args: any) => makeClient()),
+		AzureOpenAI: mockConstructor.mockImplementation((args: any) => makeClient()),
 	}
 })
 
@@ -408,9 +438,9 @@ describe("OpenAiHandler", () => {
 		})
 
 		it("should handle rate limiting", async () => {
-			const rateLimitError = new Error("Rate limit exceeded")
+			const rateLimitError: ErrorWithStatus = new Error("Rate limit exceeded")
 			rateLimitError.name = "Error"
-			;(rateLimitError as any).status = 429
+			rateLimitError.status = 429
 			mockCreate.mockRejectedValueOnce(rateLimitError)
 
 			const stream = handler.createMessage("system prompt", testMessages)
@@ -977,6 +1007,56 @@ describe("getOpenAiModels", () => {
 		expect(result).toEqual([])
 	})
 
+	describe("Azure portal Responses URL normalization", () => {
+		beforeEach(() => {
+			mockCreate.mockClear()
+			mockResponsesCreate.mockClear()
+		})
+
+		it("Responses URL from Azure portal is converted to use Responses API", async () => {
+			const handler = new OpenAiHandler({
+				openAiApiKey: "test-azure",
+				openAiModelId: "my-deployment",
+				openAiBaseUrl: "https://sample-name.openai.azure.com/openai/responses?api-version=2025-04-01-preview",
+				openAiUseAzure: true,
+				openAiStreamingEnabled: false,
+				includeMaxTokens: true,
+				openAiCustomModelInfo: {
+					contextWindow: 128_000,
+					maxTokens: 64,
+					supportsPromptCache: false,
+				},
+			})
+
+			const messages: Anthropic.Messages.MessageParam[] = [
+				{ role: "user", content: [{ type: "text", text: "Hello!" }] },
+			]
+
+			const stream = handler.createMessage("You are Roo Code.", messages)
+			const chunks: any[] = []
+			for await (const ch of stream) {
+				chunks.push(ch)
+			}
+
+			// Should have used Responses API, not Chat Completions
+			expect(mockResponsesCreate).toHaveBeenCalled()
+			expect(mockCreate).not.toHaveBeenCalled()
+
+			// Payload shape sanity
+			const args = mockResponsesCreate.mock.calls[0][0]
+			expect(args).toHaveProperty("model", "my-deployment")
+			expect(args).toHaveProperty("input")
+			expect(typeof args.input).toBe("string")
+			expect(args.input).toContain("Developer: You are Roo Code.")
+			expect(args.input).toContain("User: Hello!")
+			expect(args).toHaveProperty("max_output_tokens", 64)
+
+			// Ensure returned text chunk surfaced
+			const textChunk = chunks.find((c) => c.type === "text")
+			expect(textChunk?.text).toBe("Test response")
+		})
+	})
+
 	it("should deduplicate model IDs", async () => {
 		const mockResponse = {
 			data: {
@@ -990,3 +1070,1000 @@ describe("getOpenAiModels", () => {
 		expect(result).toEqual(["gpt-4", "gpt-3.5-turbo"])
 	})
 })
+
+// -- Added Responses API tests (TDD) --
+
+describe("OpenAI Compatible - Responses API", () => {
+	let handler: OpenAiHandler
+	const baseMessages: Anthropic.Messages.MessageParam[] = [
+		{
+			role: "user",
+			content: [
+				{
+					type: "text" as const,
+					text: "Hello!",
+				},
+			],
+		},
+	]
+
+	beforeEach(() => {
+		mockCreate.mockClear()
+		mockResponsesCreate.mockClear()
+	})
+
+	it("Azure Responses happy path uses string input (no messages) and max_output_tokens", async () => {
+		const opts: ApiHandlerOptions = {
+			openAiApiKey: "test-azure",
+			openAiModelId: "my-deployment",
+			openAiBaseUrl: "https://myres.openai.azure.com/openai/v1/responses?api-version=preview",
+			openAiStreamingEnabled: false,
+			includeMaxTokens: true,
+			openAiCustomModelInfo: {
+				contextWindow: 128_000,
+				maxTokens: 256,
+				supportsPromptCache: false,
+			},
+			enableReasoningEffort: false,
+		}
+		handler = new OpenAiHandler(opts)
+
+		const stream = handler.createMessage("You are Roo Code.", baseMessages)
+		const chunks: any[] = []
+		for await (const chunk of stream) {
+			chunks.push(chunk)
+		}
+
+		// Should have produced a text chunk
+		const textChunk = chunks.find((c) => c.type === "text")
+		expect(textChunk?.text).toBe("Test response")
+
+		// Ensure Responses API was used
+		expect(mockResponsesCreate).toHaveBeenCalled()
+		expect(mockCreate).not.toHaveBeenCalled()
+
+		const callArgs = mockResponsesCreate.mock.calls[0][0]
+		expect(callArgs).not.toHaveProperty("messages")
+		expect(callArgs).toHaveProperty("input")
+		expect(typeof callArgs.input).toBe("string")
+		expect(callArgs.input).toContain("Developer: You are Roo Code.")
+		expect(callArgs.input).toContain("User: Hello!")
+		expect(callArgs).toHaveProperty("model", "my-deployment")
+		// Azure Responses naming
+		expect(callArgs).toHaveProperty("max_output_tokens", 256)
+	})
+
+	it("Auto-detect: '/v1/responses' => Responses payload; '/chat/completions' => Chat Completions payload", async () => {
+		// Responses URL
+		const respHandler = new OpenAiHandler({
+			openAiApiKey: "test",
+			openAiModelId: "gpt-5",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+			openAiStreamingEnabled: false,
+		})
+		for await (const _ of respHandler.createMessage("sys", baseMessages)) {
+		}
+		expect(mockResponsesCreate).toHaveBeenCalled()
+		const respArgs = mockResponsesCreate.mock.calls.pop()?.[0]
+		expect(respArgs).not.toHaveProperty("messages")
+		expect(respArgs).toHaveProperty("input")
+
+		// Chat Completions URL
+		mockResponsesCreate.mockClear()
+		mockCreate.mockClear()
+		const chatHandler = new OpenAiHandler({
+			openAiApiKey: "test",
+			openAiModelId: "gpt-4o",
+			openAiBaseUrl: "https://api.openai.com/v1/chat/completions",
+			openAiStreamingEnabled: false,
+		})
+		for await (const _ of chatHandler.createMessage("sys", baseMessages)) {
+		}
+		expect(mockCreate).toHaveBeenCalled()
+		const chatArgs = mockCreate.mock.calls.pop()?.[0]
+		expect(chatArgs).toHaveProperty("messages")
+		expect(chatArgs).not.toHaveProperty("input")
+	})
+
+	it("Reasoning effort mapping: Responses uses reasoning: { effort }, Chat uses reasoning_effort", async () => {
+		// Responses path
+		const responsesHandler = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+			openAiStreamingEnabled: false,
+			enableReasoningEffort: true,
+			reasoningEffort: "high",
+			openAiCustomModelInfo: {
+				contextWindow: 128_000,
+				supportsPromptCache: false,
+				supportsReasoningEffort: true,
+			},
+		})
+		for await (const _ of responsesHandler.createMessage("sys", baseMessages)) {
+		}
+		expect(mockResponsesCreate).toHaveBeenCalled()
+		const rArgs = mockResponsesCreate.mock.calls.pop()?.[0]
+		expect(rArgs).toHaveProperty("reasoning")
+		expect(rArgs.reasoning).toMatchObject({ effort: "high" })
+
+		// Chat path
+		mockResponsesCreate.mockClear()
+		mockCreate.mockClear()
+		const chatHandler = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-4o",
+			openAiBaseUrl: "https://api.openai.com/v1/chat/completions",
+			openAiStreamingEnabled: false,
+			enableReasoningEffort: true,
+			reasoningEffort: "high",
+			openAiCustomModelInfo: {
+				contextWindow: 128_000,
+				supportsPromptCache: false,
+				supportsReasoningEffort: true,
+			},
+		})
+		for await (const _ of chatHandler.createMessage("sys", baseMessages)) {
+		}
+		expect(mockCreate).toHaveBeenCalled()
+		const cArgs = mockCreate.mock.calls.pop()?.[0]
+		expect(cArgs).toHaveProperty("reasoning_effort", "high")
+	})
+
+	it("Verbosity (Responses): include when set; if server rejects, retry without it (warn once)", async () => {
+		// First call throws 400 for 'verbosity', second succeeds
+		mockResponsesCreate.mockImplementationOnce((_opts: unknown) => {
+			const err: ErrorWithStatus = new Error("Unsupported parameter: 'verbosity'")
+			err.status = 400
+			throw err
+		})
+
+		const h = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+			openAiStreamingEnabled: false,
+			verbosity: "high",
+		})
+
+		const stream = h.createMessage("sys", baseMessages)
+		const chunks: any[] = []
+		for await (const ch of stream) {
+			chunks.push(ch)
+		}
+
+		expect(mockResponsesCreate).toHaveBeenCalledTimes(2)
+		const first = mockResponsesCreate.mock.calls[0][0]
+		const second = mockResponsesCreate.mock.calls[1][0]
+		expect(first).toHaveProperty("text")
+		expect(first.text).toEqual({ verbosity: "high" })
+		expect(second).not.toHaveProperty("text")
+
+		// Should still yield text
+		const textChunk = chunks.find((c) => c.type === "text")
+		expect(textChunk?.text).toBe("Test response")
+	})
+
+	it("Azure naming: use max_output_tokens for Responses; keep max_completion_tokens for Chat Completions", async () => {
+		// Responses + includeMaxTokens
+		const r = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+			openAiStreamingEnabled: false,
+			includeMaxTokens: true,
+			modelMaxTokens: 128,
+			openAiCustomModelInfo: {
+				contextWindow: 128_000,
+				maxTokens: 4096,
+				supportsPromptCache: false,
+			},
+		})
+		for await (const _ of r.createMessage("sys", baseMessages)) {
+		}
+		const rArgs = mockResponsesCreate.mock.calls.pop()?.[0]
+		expect(rArgs).toHaveProperty("max_output_tokens", 128)
+		expect(rArgs).not.toHaveProperty("max_completion_tokens")
+
+		// Chat + includeMaxTokens
+		mockResponsesCreate.mockClear()
+		mockCreate.mockClear()
+		const c = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-4o",
+			openAiBaseUrl: "https://api.openai.com/v1/chat/completions",
+			openAiStreamingEnabled: false,
+			includeMaxTokens: true,
+			modelMaxTokens: 128,
+			openAiCustomModelInfo: {
+				contextWindow: 128_000,
+				maxTokens: 4096,
+				supportsPromptCache: false,
+			},
+		})
+		for await (const _ of c.createMessage("sys", baseMessages)) {
+		}
+		const cArgs = mockCreate.mock.calls.pop()?.[0]
+		expect(cArgs).toHaveProperty("max_completion_tokens", 128)
+		expect(cArgs).not.toHaveProperty("max_output_tokens")
+	})
+
+	it("Normalizes Azure portal responses URL to /openai/v1 with apiVersion=preview", async () => {
+		mockResponsesCreate.mockClear()
+		mockCreate.mockClear()
+
+		const portalUrl = "https://sample-name.openai.azure.com/openai/responses?api-version=2025-04-01-preview"
+
+		const handler = new OpenAiHandler({
+			openAiApiKey: "test-azure",
+			openAiModelId: "my-deployment",
+			openAiBaseUrl: portalUrl,
+			openAiStreamingEnabled: false,
+		})
+
+		for await (const _ of handler.createMessage("sys", baseMessages)) {
+		}
+
+		// Ensures Responses API path was used
+		expect(mockResponsesCreate).toHaveBeenCalled()
+
+		// Ensure SDK constructor was called with normalized baseURL and 'preview' apiVersion (per requirement)
+		// Note: AzureOpenAI and OpenAI share same mock constructor; inspect last call
+		const ctorCalls = getMockCallsOf(OpenAI)
+		const lastCall = ctorCalls[ctorCalls.length - 1]
+		const lastArg0 = Array.isArray(lastCall) ? lastCall[0] : undefined
+		const lastCtorArgs =
+			typeof lastArg0 === "object" && lastArg0 !== null ? (lastArg0 as Record<string, unknown>) : {}
+		expect(lastCtorArgs["baseURL"]).toBe("https://sample-name.openai.azure.com/openai/v1")
+		expect(lastCtorArgs["apiVersion"]).toBe("preview")
+	})
+
+	it("streams Responses API when provider returns AsyncIterable", async () => {
+		// Arrange: make responses.create return an AsyncIterable stream for this test
+		mockResponsesCreate.mockImplementationOnce(async (_opts: any) => {
+			return {
+				[Symbol.asyncIterator]: async function* () {
+					yield { type: "response.text.delta", delta: "Hello " }
+					yield { type: "response.text.delta", delta: "world" }
+					yield {
+						type: "response.completed",
+						response: { usage: { input_tokens: 7, output_tokens: 2 } },
+					}
+				},
+			}
+		})
+
+		const handler = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5-mini",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+			// streaming enabled by default
+		})
+
+		const stream = handler.createMessage("You are Roo.", [
+			{ role: "user", content: [{ type: "text" as const, text: "Say hi" }] },
+		])
+
+		const chunks: any[] = []
+		for await (const ch of stream) {
+			chunks.push(ch)
+		}
+
+		// Text should be streamed and concatenated in order
+		const text = chunks
+			.filter((c) => c.type === "text")
+			.map((c) => c.text)
+			.join("")
+		expect(text).toBe("Hello world")
+
+		// Usage chunk emitted at completion
+		const usage = chunks.find((c) => c.type === "usage")
+		expect(usage).toBeDefined()
+		expect(usage.inputTokens).toBe(7)
+		expect(usage.outputTokens).toBe(2)
+
+		// Ensure stream: true was sent
+		const args = mockResponsesCreate.mock.calls.pop()?.[0]
+		expect(args).toHaveProperty("stream", true)
+	})
+})
+
+describe("OpenAI Compatible - Responses API (extended streaming)", () => {
+	it("handles reasoning deltas and output_text in message content", async () => {
+		// Arrange: make responses.create return an AsyncIterable stream for this test
+		mockResponsesCreate.mockImplementationOnce(async (_opts: any) => {
+			return {
+				[Symbol.asyncIterator]: async function* () {
+					// Reasoning delta first
+					yield { type: "response.reasoning.delta", delta: "Thinking. " }
+					// Then a message item with output_text inside content array
+					yield {
+						type: "response.output_item.added",
+						item: {
+							type: "message",
+							content: [{ type: "output_text", text: "Answer." }],
+						},
+					}
+					// Completion with usage
+					yield {
+						type: "response.completed",
+						response: { usage: { input_tokens: 3, output_tokens: 2 } },
+					}
+				},
+			}
+		})
+
+		const handler = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5-mini",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+		})
+
+		const chunks: any[] = []
+		for await (const ch of handler.createMessage("sys", [
+			{ role: "user", content: [{ type: "text" as const, text: "Hi" }] },
+		])) {
+			chunks.push(ch)
+		}
+
+		const reasoning = chunks.find((c) => c.type === "reasoning")
+		expect(reasoning?.text).toBe("Thinking. ")
+
+		const text = chunks.find((c) => c.type === "text")
+		expect(text?.text).toBe("Answer.")
+
+		const usage = chunks.find((c) => c.type === "usage")
+		expect(usage).toBeDefined()
+		expect(usage.inputTokens).toBe(3)
+		expect(usage.outputTokens).toBe(2)
+
+		// Ensure stream: true was sent
+		const args = mockResponsesCreate.mock.calls.pop()?.[0]
+		expect(args).toHaveProperty("stream", true)
+	})
+
+	it("maps refusal deltas to text with prefix", async () => {
+		mockResponsesCreate.mockImplementationOnce(async (_opts: any) => {
+			return {
+				[Symbol.asyncIterator]: async function* () {
+					yield { type: "response.refusal.delta", delta: "Cannot comply" }
+					// Usage may be attached directly on the event for some implementations
+					yield { type: "response.done", usage: { prompt_tokens: 1, completion_tokens: 1 } }
+				},
+			}
+		})
+
+		const handler = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5-mini",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+		})
+
+		const result: any[] = []
+		for await (const ch of handler.createMessage("sys", [
+			{ role: "user", content: [{ type: "text" as const, text: "Hi" }] },
+		])) {
+			result.push(ch)
+		}
+
+		const textChunks = result.filter((c) => c.type === "text").map((c) => c.text)
+		expect(textChunks).toContain("[Refusal] Cannot comply")
+
+		const usage = result.find((c) => c.type === "usage")
+		expect(usage).toBeDefined()
+		expect(usage.inputTokens).toBe(1)
+		expect(usage.outputTokens).toBe(1)
+	})
+})
+
+describe("OpenAI Compatible - Responses API (multimodal)", () => {
+	it("builds structured array input with images (non-streaming)", async () => {
+		// Reset mocks for clarity
+		mockResponsesCreate.mockClear()
+		mockCreate.mockClear()
+
+		const handler = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5-mini",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+			openAiStreamingEnabled: false,
+			includeMaxTokens: false,
+		})
+
+		const messages: Anthropic.Messages.MessageParam[] = [
+			{
+				role: "user",
+				content: [
+					{ type: "text" as const, text: "Here is an image" },
+					{
+						type: "image" as const,
+						// Minimal Anthropic-style inline image (base64) block
+						source: { type: "base64" as const, media_type: "image/png", data: "BASE64DATA" },
+					},
+				],
+			},
+		]
+
+		const chunks: any[] = []
+		for await (const ch of handler.createMessage("You are Roo Code.", messages)) {
+			chunks.push(ch)
+		}
+
+		// Should have used Responses API
+		expect(mockResponsesCreate).toHaveBeenCalled()
+		const args = mockResponsesCreate.mock.calls[0][0]
+
+		// Input should be an array (structured input mode)
+		expect(Array.isArray(args.input)).toBe(true)
+		const arr = Array.isArray(args.input) ? args.input : []
+
+		// First element should be Developer preface as input_text
+		expect(arr[0]?.role).toBe("user")
+		expect(arr[0]?.content?.[0]?.type).toBe("input_text")
+		expect(arr[0]?.content?.[0]?.text).toContain("Developer: You are Roo Code.")
+
+		// There should be at least one input_image with a data URL for the provided image
+		const hasInputImage = arr.some((item: any) => {
+			const c = item?.content
+			return (
+				Array.isArray(c) &&
+				c.some(
+					(part: any) =>
+						part?.type === "input_image" &&
+						typeof part?.image_url === "string" &&
+						part.image_url.startsWith("data:image/png;base64,BASE64DATA"),
+				)
+			)
+		})
+		expect(hasInputImage).toBe(true)
+
+		// Should still yield a text chunk and usage (from default mock)
+		const textChunk = chunks.find((c: any) => c.type === "text")
+		const usageChunk = chunks.find((c: any) => c.type === "usage")
+		expect(textChunk?.text).toBe("Test response")
+		expect(usageChunk?.inputTokens).toBe(10)
+		expect(usageChunk?.outputTokens).toBe(5)
+	})
+
+	it("streams with multimodal input using array 'input'", async () => {
+		// Make responses.create return an AsyncIterable stream for this test
+		mockResponsesCreate.mockClear()
+		mockResponsesCreate.mockImplementationOnce(async (_opts: any) => {
+			return {
+				[Symbol.asyncIterator]: async function* () {
+					yield { type: "response.text.delta", delta: "A" }
+					yield { type: "response.text.delta", delta: "B" }
+					yield {
+						type: "response.completed",
+						response: { usage: { input_tokens: 2, output_tokens: 2 } },
+					}
+				},
+			}
+		})
+
+		const handler = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5-mini",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+		})
+
+		const messages: Anthropic.Messages.MessageParam[] = [
+			{
+				role: "user",
+				content: [
+					{ type: "text" as const, text: "Look at this" },
+					{
+						type: "image" as const,
+						source: { type: "base64" as const, media_type: "image/jpeg", data: "IMGDATA" },
+					},
+				],
+			},
+		]
+
+		const out: any[] = []
+		for await (const ch of handler.createMessage("System text", messages)) {
+			out.push(ch)
+		}
+
+		// Ensure stream: true was sent and input is array
+		expect(mockResponsesCreate).toHaveBeenCalled()
+		const args = mockResponsesCreate.mock.calls[0][0]
+		expect(args).toHaveProperty("stream", true)
+		expect(Array.isArray(args.input)).toBe(true)
+
+		// Verify streamed text concatenation and usage
+		const combined = out
+			.filter((c) => c.type === "text")
+			.map((c) => c.text)
+			.join("")
+		expect(combined).toBe("AB")
+
+		const usage = out.find((c) => c.type === "usage")
+		expect(usage?.inputTokens).toBe(2)
+		expect(usage?.outputTokens).toBe(2)
+	})
+})
+
+// --- New tests: Responses API conversation continuity (previous_response_id) ---
+describe("OpenAI Compatible - Responses API conversation continuity", () => {
+	beforeEach(() => {
+		mockCreate.mockClear()
+		mockResponsesCreate.mockClear()
+	})
+
+	it("propagates previous_response_id from first streaming response into the next request", async () => {
+		// First call will stream and include a response.id
+		mockResponsesCreate.mockImplementationOnce(async (_opts: any) => {
+			return {
+				[Symbol.asyncIterator]: async function* () {
+					yield { type: "response.text.delta", delta: "Desc " }
+					yield {
+						type: "response.completed",
+						response: { id: "resp-1", usage: { input_tokens: 5, output_tokens: 2 } },
+					}
+				},
+			}
+		})
+
+		const handler = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5-mini",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+		})
+
+		// 1) First call (establish response id)
+		const firstChunks: any[] = []
+		for await (const ch of handler.createMessage("You are Roo.", [
+			{ role: "user", content: [{ type: "text" as const, text: "Describe the image" }] },
+		])) {
+			firstChunks.push(ch)
+		}
+
+		// Ensure first call was made
+		expect(mockResponsesCreate).toHaveBeenCalledTimes(1)
+		// 2) Second call - should include previous_response_id from first call
+		const secondChunks: any[] = []
+		for await (const ch of handler.createMessage("You are Roo.", [
+			{ role: "user", content: [{ type: "text" as const, text: "Continue." }] },
+		])) {
+			secondChunks.push(ch)
+		}
+
+		// Validate that a second Responses.create call was made
+		expect(mockResponsesCreate).toHaveBeenCalledTimes(2)
+		const secondArgs = mockResponsesCreate.mock.calls[1][0]
+		expect(secondArgs).toHaveProperty("previous_response_id", "resp-1")
+	})
+
+	it("omits previous_response_id when metadata.suppressPreviousResponseId is true", async () => {
+		// First call streams and returns an id
+		mockResponsesCreate.mockImplementationOnce(async (_opts: any) => {
+			return {
+				[Symbol.asyncIterator]: async function* () {
+					yield { type: "response.text.delta", delta: "First" }
+					yield {
+						type: "response.completed",
+						response: { id: "rid-xyz", usage: { input_tokens: 1, output_tokens: 1 } },
+					}
+				},
+			}
+		})
+
+		const handler = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5-mini",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+		})
+
+		// First call to capture lastResponseId
+		for await (const _ of handler.createMessage("sys", [
+			{ role: "user", content: [{ type: "text" as const, text: "Turn 1" }] },
+		])) {
+		}
+
+		// Second call with suppressPreviousResponseId => should NOT include previous_response_id
+		for await (const _ of handler.createMessage(
+			"sys",
+			[{ role: "user", content: [{ type: "text" as const, text: "Turn 2" }] }],
+			{ taskId: "test", suppressPreviousResponseId: true },
+		)) {
+		}
+
+		expect(mockResponsesCreate).toHaveBeenCalledTimes(2)
+		const args = mockResponsesCreate.mock.calls[1][0]
+		expect(args).not.toHaveProperty("previous_response_id")
+	})
+	it("does not include previous_response_id when prior stream fails before id; defaults to store:true", async () => {
+		// First call: stream throws before emitting any response.id
+		mockResponsesCreate
+			.mockImplementationOnce(async (_opts: any) => {
+				return {
+					[Symbol.asyncIterator]: async function* () {
+						yield { type: "response.text.delta", delta: "Partial " }
+						throw new Error("stream interrupted")
+					},
+				}
+			})
+			// Second call: normal stream
+			.mockImplementationOnce(async (_opts: any) => {
+				return {
+					[Symbol.asyncIterator]: async function* () {
+						yield { type: "response.text.delta", delta: "OK" }
+						yield {
+							type: "response.completed",
+							response: { usage: { input_tokens: 1, output_tokens: 1 } },
+						}
+					},
+				}
+			})
+
+		const handler = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5-mini",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+		})
+
+		// First call fails mid-stream, so no response.id is captured
+		const first = handler.createMessage("You are Roo.", [
+			{ role: "user", content: [{ type: "text" as const, text: "Hi" }] },
+		])
+
+		await expect(async () => {
+			for await (const _ of first) {
+				// drain until error
+			}
+		}).rejects.toThrow("stream interrupted")
+
+		// Second call should not include previous_response_id and should default to store:true
+		const chunks: any[] = []
+		for await (const ch of handler.createMessage("You are Roo.", [
+			{ role: "user", content: [{ type: "text" as const, text: "Hi" }] },
+		])) {
+			chunks.push(ch)
+		}
+
+		expect(mockResponsesCreate).toHaveBeenCalledTimes(2)
+		const secondArgs = mockResponsesCreate.mock.calls[1][0]
+		expect(secondArgs).not.toHaveProperty("previous_response_id")
+		expect(secondArgs).toHaveProperty("store", true)
+		expect(typeof secondArgs.input).toBe("string")
+		expect(secondArgs.input).toContain("Developer: You are Roo.")
+		expect(secondArgs.input).toContain("User: Hi")
+	})
+})
+
+// --- New: Responses API parity improvements tests ---
+describe("OpenAI Compatible - Responses API parity improvements", () => {
+	beforeEach(() => {
+		mockCreate.mockClear()
+		mockResponsesCreate.mockClear()
+	})
+
+	it("retries without previous_response_id when server returns 400 'Previous response ... not found' (non-streaming)", async () => {
+		// First call throws 400 for previous_response_id, second succeeds
+		mockResponsesCreate
+			.mockImplementationOnce((_opts: unknown) => {
+				const err: ErrorWithStatus = new Error("Previous response rid-bad not found")
+				err.status = 400
+				throw err
+			})
+			.mockImplementationOnce(async (_opts: any) => {
+				return { id: "rid-good", output_text: "OK", usage: { input_tokens: 1, output_tokens: 1 } }
+			})
+
+		const h = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+			openAiStreamingEnabled: false,
+		})
+
+		const chunks: any[] = []
+		for await (const ch of h.createMessage(
+			"sys",
+			[{ role: "user", content: [{ type: "text" as const, text: "Turn" }] }],
+			{ taskId: "test", previousResponseId: "rid-bad" },
+		)) {
+			chunks.push(ch)
+		}
+
+		// Two calls made: first fails with 400, second retries without previous_response_id
+		expect(mockResponsesCreate).toHaveBeenCalledTimes(2)
+		const firstArgs = mockResponsesCreate.mock.calls[0][0]
+		expect(firstArgs).toHaveProperty("previous_response_id", "rid-bad")
+
+		const secondArgs = mockResponsesCreate.mock.calls[1][0]
+		expect(secondArgs).not.toHaveProperty("previous_response_id")
+
+		// Should still surface text
+		const textChunk = chunks.find((c: any) => c.type === "text")
+		expect(textChunk?.text).toBe("OK")
+	})
+
+	it("retries without previous_response_id when server returns 400 (streaming)", async () => {
+		// First call throws, second returns a stream
+		mockResponsesCreate
+			.mockImplementationOnce((_opts: unknown) => {
+				const err: ErrorWithStatus = new Error("Previous response not found")
+				err.status = 400
+				throw err
+			})
+			.mockImplementationOnce(async (_opts: any) => {
+				return {
+					[Symbol.asyncIterator]: async function* () {
+						yield { type: "response.text.delta", delta: "Hello" }
+						yield { type: "response.completed", response: { usage: { input_tokens: 1, output_tokens: 1 } } }
+					},
+				}
+			})
+
+		const h = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+			// streaming enabled by default
+		})
+
+		const out: any[] = []
+		for await (const ch of h.createMessage(
+			"sys",
+			[{ role: "user", content: [{ type: "text" as const, text: "Hi" }] }],
+			{ taskId: "test", previousResponseId: "bad-id" },
+		)) {
+			out.push(ch)
+		}
+
+		expect(mockResponsesCreate).toHaveBeenCalledTimes(2)
+		const first = mockResponsesCreate.mock.calls[0][0]
+		expect(first).toHaveProperty("previous_response_id", "bad-id")
+		const second = mockResponsesCreate.mock.calls[1][0]
+		expect(second).not.toHaveProperty("previous_response_id")
+
+		const combined = out
+			.filter((c) => c.type === "text")
+			.map((c) => c.text)
+			.join("")
+		expect(combined).toBe("Hello")
+	})
+
+	it("handles response.content_part.added by emitting text", async () => {
+		mockResponsesCreate.mockImplementationOnce(async (_opts: any) => {
+			return {
+				[Symbol.asyncIterator]: async function* () {
+					yield { type: "response.content_part.added", part: { type: "text", text: "Part" } }
+					yield { type: "response.completed", response: { usage: { input_tokens: 0, output_tokens: 0 } } }
+				},
+			}
+		})
+
+		const h = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+		})
+
+		const out: any[] = []
+		for await (const ch of h.createMessage("sys", [
+			{ role: "user", content: [{ type: "text" as const, text: "Hi" }] },
+		])) {
+			out.push(ch)
+		}
+
+		const texts = out.filter((c) => c.type === "text").map((c) => c.text)
+		expect(texts).toContain("Part")
+	})
+
+	it("maps response.audio_transcript.delta to text", async () => {
+		mockResponsesCreate.mockImplementationOnce(async (_opts: any) => {
+			return {
+				[Symbol.asyncIterator]: async function* () {
+					yield { type: "response.audio_transcript.delta", delta: "Transcript" }
+					yield { type: "response.completed", response: { usage: { input_tokens: 0, output_tokens: 0 } } }
+				},
+			}
+		})
+
+		const h = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+		})
+
+		const out: any[] = []
+		for await (const ch of h.createMessage("sys", [
+			{ role: "user", content: [{ type: "text" as const, text: "Hi" }] },
+		])) {
+			out.push(ch)
+		}
+
+		const texts = out.filter((c) => c.type === "text").map((c) => c.text)
+		expect(texts).toContain("Transcript")
+	})
+
+	it("includes reasoning: { effort: 'minimal', summary: 'auto' } when enabled (non-streaming)", async () => {
+		mockResponsesCreate.mockImplementationOnce(async (opts: any) => {
+			return { id: "rid-1", output_text: "ok", usage: { input_tokens: 1, output_tokens: 1 } }
+		})
+
+		const h = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+			openAiStreamingEnabled: false,
+			enableReasoningEffort: true,
+			reasoningEffort: "minimal",
+		})
+
+		for await (const _ of h.createMessage("sys", [
+			{ role: "user", content: [{ type: "text" as const, text: "Hi" }] },
+		])) {
+			// consume
+		}
+
+		expect(mockResponsesCreate).toHaveBeenCalledTimes(1)
+		const args = mockResponsesCreate.mock.calls[0][0]
+		expect(args).toHaveProperty("reasoning")
+		expect(args.reasoning).toMatchObject({ effort: "minimal", summary: "auto" })
+	})
+
+	it("omits reasoning.summary when enableGpt5ReasoningSummary is false", async () => {
+		mockResponsesCreate.mockImplementationOnce(async (opts: any) => {
+			return { id: "rid-2", output_text: "ok", usage: { input_tokens: 1, output_tokens: 1 } }
+		})
+
+		const h = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+			openAiStreamingEnabled: false,
+			enableReasoningEffort: true,
+			reasoningEffort: "low",
+			enableGpt5ReasoningSummary: false,
+		})
+
+		for await (const _ of h.createMessage("sys", [
+			{ role: "user", content: [{ type: "text" as const, text: "Hi" }] },
+		])) {
+			// consume
+		}
+
+		expect(mockResponsesCreate).toHaveBeenCalledTimes(1)
+		const args = mockResponsesCreate.mock.calls[0][0]
+		expect(args).toHaveProperty("reasoning")
+		expect(args.reasoning.effort).toBe("low")
+		expect(args.reasoning.summary).toBeUndefined()
+	})
+})
+
+describe("OpenAI Compatible - Responses API minimal input parity (new tests)", () => {
+	beforeEach(() => {
+		// @ts-ignore - reuse mocks from this spec module
+		mockCreate.mockClear()
+		// @ts-ignore - reuse mocks from this spec module
+		mockResponsesCreate.mockClear()
+	})
+
+	it("sends only latest user message when previous_response_id is provided (string input, no Developer preface)", async () => {
+		const handler = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+			openAiStreamingEnabled: false,
+		})
+
+		const msgs: Anthropic.Messages.MessageParam[] = [
+			{ role: "user", content: [{ type: "text" as const, text: "First" }] },
+			{ role: "assistant", content: [{ type: "text" as const, text: "Reply" }] },
+			{ role: "user", content: [{ type: "text" as const, text: "Latest" }] },
+		]
+
+		const chunks: any[] = []
+		for await (const ch of handler.createMessage("System Inst", msgs, {
+			taskId: "test",
+			previousResponseId: "prev-1",
+		})) {
+			chunks.push(ch)
+		}
+
+		// Ensure Responses API was used with minimal input
+		// @ts-ignore
+		expect(mockResponsesCreate).toHaveBeenCalled()
+		// @ts-ignore
+		const args = mockResponsesCreate.mock.calls[0][0]
+
+		expect(typeof args.input).toBe("string")
+		expect(args.input).toBe("User: Latest")
+		expect(String(args.input)).not.toContain("Developer: System Inst")
+	})
+
+	it("uses array input with only latest user content when previous_response_id and last user has images (no Developer preface)", async () => {
+		const handler = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+			openAiStreamingEnabled: false,
+		})
+
+		const msgs: Anthropic.Messages.MessageParam[] = [
+			{ role: "user", content: [{ type: "text" as const, text: "Prev" }] },
+			{ role: "assistant", content: [{ type: "text" as const, text: "Ok" }] },
+			{
+				role: "user",
+				content: [
+					{ type: "text" as const, text: "See" },
+					{
+						type: "image" as const,
+						source: { type: "base64" as const, media_type: "image/png", data: "IMGDATA" },
+					},
+				],
+			},
+		]
+
+		const iter = handler.createMessage("Sys", msgs, { taskId: "test", previousResponseId: "prev-2" })
+		for await (const _ of iter) {
+			// consume
+		}
+
+		// @ts-ignore
+		const args = mockResponsesCreate.mock.calls.pop()?.[0]
+		expect(Array.isArray(args.input)).toBe(true)
+
+		const arr = Array.isArray(args.input) ? args.input : []
+		expect(arr.length).toBe(1)
+		expect(arr[0]?.role).toBe("user")
+
+		const contents = arr[0]?.content || []
+		const hasImg = contents.some((p: any) => p?.type === "input_image")
+		expect(hasImg).toBe(true)
+
+		// No Developer preface should be injected in minimal mode
+		const hasDev = contents.some(
+			(p: any) => p?.type === "input_text" && typeof p.text === "string" && p.text.includes("Developer:"),
+		)
+		expect(hasDev).toBe(false)
+	})
+
+	it("always includes max_output_tokens for Responses API", async () => {
+		const handler = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+			openAiStreamingEnabled: false,
+			includeMaxTokens: false, // should still include based on model info
+			openAiCustomModelInfo: {
+				contextWindow: 128_000,
+				maxTokens: 123, // fallback used when modelMaxTokens not set
+				supportsPromptCache: false,
+			},
+		})
+
+		for await (const _ of handler.createMessage("sys", [
+			{ role: "user", content: [{ type: "text" as const, text: "Hi" }] },
+		])) {
+			// consume
+		}
+
+		// @ts-ignore
+		const args = mockResponsesCreate.mock.calls.pop()?.[0]
+		expect(args).toHaveProperty("max_output_tokens", 123)
+	})
+
+	it("does not include text.verbosity when not provided", async () => {
+		const handler = new OpenAiHandler({
+			openAiApiKey: "k",
+			openAiModelId: "gpt-5",
+			openAiBaseUrl: "https://api.openai.com/v1/responses",
+			openAiStreamingEnabled: false,
+		})
+
+		for await (const _ of handler.createMessage("sys", [
+			{ role: "user", content: [{ type: "text" as const, text: "Hi" }] },
+		])) {
+			// consume
+		}
+
+		// @ts-ignore
+		const args = mockResponsesCreate.mock.calls.pop()?.[0]
+		expect(args).not.toHaveProperty("text")
+	})
+})
diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
index 2a57f251318..b9b5b7da586 100644
--- a/src/api/providers/openai.ts
+++ b/src/api/providers/openai.ts
@@ -19,6 +19,7 @@ import { convertToR1Format } from "../transform/r1-format"
 import { convertToSimpleMessages } from "../transform/simple-format"
 import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
 import { getModelParams } from "../transform/model-params"
+import { handleResponsesStream } from "../transform/responses-stream"
 
 import { DEFAULT_HEADERS } from "./constants"
 import { BaseProvider } from "./base-provider"
@@ -29,19 +30,71 @@ import { handleOpenAIError } from "./utils/openai-error-handler"
 // TODO: Rename this to OpenAICompatibleHandler. Also, I think the
 // `OpenAINativeHandler` can subclass from this, since it's obviously
 // compatible with the OpenAI API. We can also rename it to `OpenAIHandler`.
+/**
+ * URL auto-detection overview
+ *
+ * Decision tree (host and path based):
+ * 1) Azure AI Inference Service:
+ *    - Detected when host ends with ".services.ai.azure.com"
+ *    - Uses OpenAI Chat Completions API shape with a path override
+ *      (see OPENAI_AZURE_AI_INFERENCE_PATH) when making requests.
+ *
+ * 2) Azure OpenAI:
+ *    - Detected when host is "openai.azure.com" or ends with ".openai.azure.com"
+ *      or when options.openAiUseAzure is explicitly true.
+ *    - Within Azure OpenAI, the API "flavor" is chosen by URL path:
+ *      - Responses API:
+ *        * Path contains "/v1/responses" or ends with "/responses"
+ *        * Also auto-detected for portal-style URLs (e.g. "/openai/responses?api-version=2025-04-01-preview")
+ *          which itself is not valid in request, are normalized to "/openai/v1" with apiVersion "preview".
+ *      - Chat Completions API:
+ *        * Path contains "/chat/completions"
+ *      - Default:
+ *        * Falls back to Chat Completions if none of the above match.
+ *
+ * 3) Generic OpenAI-compatible endpoints:
+ *    - Anything else (OpenAI, OpenRouter, LM Studio, vLLM, etc.)
+ *    - Flavor is again selected by URL path as above:
+ *      - "/v1/responses" or ending with "/responses" => Responses API
+ *      - "/chat/completions" => Chat Completions
+ *      - otherwise defaults to Chat Completions for backward compatibility.
+ *
+ * Examples:
+ * - https://api.openai.com/v1                      -> Chat Completions (default)
+ * - https://api.openai.com/v1/responses            -> Responses API
+ * - https://api.openai.com/v1/chat/completions     -> Chat Completions
+ * - https://myres.openai.azure.com/openai/v1/responses?api-version=preview
+ *                                                   -> Azure OpenAI + Responses API
+ * - https://myres.openai.azure.com/openai/responses?api-version=2025-04-01-preview
+ *                                                   -> normalized to base /openai/v1 + apiVersion "preview" (Responses)
+ * - https://test.services.ai.azure.com             -> Azure AI Inference Service (Chat Completions with path override)
+ */
 export class OpenAiHandler extends BaseProvider implements SingleCompletionHandler {
 	protected options: ApiHandlerOptions
 	private client: OpenAI
+	private lastResponseId: string | undefined
 	private readonly providerName = "OpenAI"
 
 	constructor(options: ApiHandlerOptions) {
 		super()
 		this.options = options
+		// Default to including reasoning.summary: "auto" for Responses API (parity with native provider)
+		if (this.options.enableGpt5ReasoningSummary === undefined) {
+			this.options.enableGpt5ReasoningSummary = true
+		}
 
-		const baseURL = this.options.openAiBaseUrl ?? "https://api.openai.com/v1"
+		// Normalize Azure Responses "web" URL shape if provided by users.
+		// Example input (Azure portal sometimes shows):
+		//   https://{resource}.openai.azure.com/openai/responses?api-version=2025-04-01-preview
+		// We normalize to Azure SDK-friendly base and version:
+		//   baseURL: https://{resource}.openai.azure.com/openai/v1
+		//   apiVersion: preview
+		const rawBaseURL = this.options.openAiBaseUrl ?? "https://api.openai.com/v1"
+		const azureNormalization = this._normalizeAzureResponsesBaseUrlAndVersion(rawBaseURL)
+		const baseURL = azureNormalization.baseURL
 		const apiKey = this.options.openAiApiKey ?? "not-provided"
-		const isAzureAiInference = this._isAzureAiInference(this.options.openAiBaseUrl)
-		const urlHost = this._getUrlHost(this.options.openAiBaseUrl)
+		const isAzureAiInference = this._isAzureAiInference(baseURL)
+		const urlHost = this._getUrlHost(baseURL)
 		const isAzureOpenAi = urlHost === "azure.com" || urlHost.endsWith(".azure.com") || options.openAiUseAzure
 
 		const headers = {
@@ -63,10 +116,23 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		} else if (isAzureOpenAi) {
 			// Azure API shape slightly differs from the core API shape:
 			// https://github.com/openai/openai-node?tab=readme-ov-file#microsoft-azure-openai
+
+			// Determine if we're using the Responses API flavor for Azure (auto-detect by URL only)
+			const flavor = this._resolveApiFlavor(this.options.openAiBaseUrl ?? "")
+			const isResponsesFlavor =
+				flavor === "responses" ||
+				this._isAzureOpenAiResponses(this.options.openAiBaseUrl) ||
+				this._isAzureOpenAiResponses(baseURL)
+
+			// Always use 'preview' for Azure Responses API calls (per user requirement)
+			const azureVersion = isResponsesFlavor
+				? "preview"
+				: this.options.azureApiVersion || azureOpenAiDefaultApiVersion
+
 			this.client = new AzureOpenAI({
 				baseURL,
 				apiKey,
-				apiVersion: this.options.azureApiVersion || azureOpenAiDefaultApiVersion,
+				apiVersion: azureVersion,
 				defaultHeaders: headers,
 				timeout,
 			})
@@ -85,7 +151,16 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		messages: Anthropic.Messages.MessageParam[],
 		metadata?: ApiHandlerCreateMessageMetadata,
 	): ApiStream {
-		const { info: modelInfo, reasoning } = this.getModel()
+		// Gather model params (centralized: temperature, max tokens, reasoning, verbosity)
+		const { info: modelInfo } = this.getModel()
+		const openAiParams = getModelParams({
+			format: "openai",
+			modelId: this.options.openAiModelId ?? "",
+			model: modelInfo,
+			settings: this.options,
+		})
+		const { reasoning, reasoningEffort, verbosity } = openAiParams
+
 		const modelUrl = this.options.openAiBaseUrl ?? ""
 		const modelId = this.options.openAiModelId ?? ""
 		const enabledR1Format = this.options.openAiR1FormatEnabled ?? false
@@ -94,6 +169,15 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		const deepseekReasoner = modelId.includes("deepseek-reasoner") || enabledR1Format
 		const ark = modelUrl.includes(".volces.com")
 
+		// Decide API flavor (auto-detect by URL)
+		const flavor = this._resolveApiFlavor(modelUrl)
+
+		// If Responses API is selected, use the Responses payload and endpoint
+		if (flavor === "responses") {
+			yield* this._handleResponsesFlavor(systemPrompt, messages, metadata, modelInfo, openAiParams)
+			return
+		}
+
 		if (modelId.includes("o1") || modelId.includes("o3") || modelId.includes("o4")) {
 			yield* this.handleO3FamilyMessage(modelId, systemPrompt, messages)
 			return
@@ -239,6 +323,10 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 						? [systemMessage, ...convertToSimpleMessages(messages)]
 						: [systemMessage, ...convertToOpenAiMessages(messages)],
 			}
+			// Include reasoning_effort for Chat Completions when available
+			if (reasoning) {
+				Object.assign(requestOptions, reasoning)
+			}
 
 			// Add max_tokens if needed
 			this.addMaxTokensIfNeeded(requestOptions, modelInfo)
@@ -282,9 +370,77 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 	async completePrompt(prompt: string): Promise<string> {
 		try {
 			const isAzureAiInference = this._isAzureAiInference(this.options.openAiBaseUrl)
+			const flavor = this._resolveApiFlavor(this.options.openAiBaseUrl ?? "")
 			const model = this.getModel()
 			const modelInfo = model.info
 
+			// Use Responses API when selected (non-streaming convenience method)
+			if (flavor === "responses") {
+				// Build structured single-turn input
+				const payload: Record<string, unknown> = {
+					model: model.id,
+					input: [
+						{
+							role: "user",
+							content: [{ type: "input_text", text: prompt }],
+						},
+					],
+					stream: false,
+					store: false,
+				}
+
+				// Reasoning effort (support "minimal"; include summary: "auto" unless disabled)
+				const effort = (this.options.reasoningEffort || model.reasoningEffort) as
+					| "minimal"
+					| "low"
+					| "medium"
+					| "high"
+					| undefined
+				if (this.options.enableReasoningEffort && effort) {
+					;(
+						payload as { reasoning?: { effort: "minimal" | "low" | "medium" | "high"; summary?: "auto" } }
+					).reasoning = {
+						effort,
+						...(this.options.enableGpt5ReasoningSummary !== false ? { summary: "auto" as const } : {}),
+					}
+				}
+
+				// Temperature if supported and set
+				if (modelInfo.supportsTemperature !== false && this.options.modelTemperature !== undefined) {
+					;(payload as Record<string, unknown>).temperature = this.options.modelTemperature
+				}
+
+				// Verbosity via text.verbosity - include only when supported
+				if (this.options.verbosity && modelInfo.supportsVerbosity) {
+					;(payload as { text?: { verbosity: "low" | "medium" | "high" } }).text = {
+						verbosity: this.options.verbosity as "low" | "medium" | "high",
+					}
+				}
+
+				// max_output_tokens
+				if (this.options.includeMaxTokens === true) {
+					;(payload as Record<string, unknown>).max_output_tokens =
+						this.options.modelMaxTokens || modelInfo.maxTokens
+				}
+
+				const response = await this._responsesCreateWithRetries(payload, {
+					usedArrayInput: true,
+					lastUserMessage: undefined,
+					previousId: undefined,
+					systemPrompt: "",
+					messages: [],
+				})
+				try {
+					const respId = (response as { id?: unknown } | undefined)?.id
+					if (typeof respId === "string" && respId.length > 0) {
+						this.lastResponseId = respId
+					}
+				} catch {
+					// ignore
+				}
+				return this._extractResponsesText(response) ?? ""
+			}
+
 			const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
 				model: model.id,
 				messages: [{ role: "user", content: prompt }],
@@ -420,16 +576,143 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		}
 	}
 
+	/**
+	 * Detects Grok xAI endpoints.
+	 * - Returns true when the host contains "x.ai" (e.g., "api.x.ai").
+	 * - Used to omit stream_options for streaming requests because Grok may not support them.
+	 *
+	 * Examples:
+	 * - https://api.x.ai/v1 -> true
+	 * - https://api.openai.com/v1 -> false
+	 */
 	private _isGrokXAI(baseUrl?: string): boolean {
 		const urlHost = this._getUrlHost(baseUrl)
 		return urlHost.includes("x.ai")
 	}
 
+	/**
+	 * Detects Azure AI Inference Service endpoints (distinct from Azure OpenAI).
+	 * - Returns true when host ends with ".services.ai.azure.com".
+	 * - These endpoints require a special path override when calling the Chat Completions API.
+	 *
+	 * Examples:
+	 * - https://myenv.services.ai.azure.com -> true
+	 * - https://myres.openai.azure.com      -> false (this is Azure OpenAI, not AI Inference)
+	 */
 	private _isAzureAiInference(baseUrl?: string): boolean {
 		const urlHost = this._getUrlHost(baseUrl)
 		return urlHost.endsWith(".services.ai.azure.com")
 	}
 
+	/**
+	 * Detects Azure OpenAI "Responses API" URLs by host and path.
+	 * - Host must be "openai.azure.com" or end with ".openai.azure.com"
+	 * - Path may be one of:
+	 *   • "/openai/v1/responses" (preferred v1 path)
+	 *   • "/openai/responses"    (portal/legacy style)
+	 *   • any path ending with "/responses"
+	 * - Trailing slashes are trimmed before matching.
+	 *
+	 * This is used to favor the Responses API flavor on Azure OpenAI when the base URL already
+	 * points to a Responses path.
+	 *
+	 * Examples (true):
+	 * - https://myres.openai.azure.com/openai/v1/responses?api-version=preview
+	 * - https://myres.openai.azure.com/openai/responses?api-version=2025-04-01-preview
+	 * - https://openai.azure.com/openai/v1/responses
+	 *
+	 * Examples (false):
+	 * - https://myres.openai.azure.com/openai/v1/chat/completions
+	 * - https://api.openai.com/v1/responses         (not an Azure host)
+	 */
+	private _isAzureOpenAiResponses(baseUrl?: string): boolean {
+		try {
+			if (!baseUrl) return false
+			const u = new URL(baseUrl)
+			const host = u.host
+			const path = u.pathname.replace(/\/+$/, "")
+			if (!(host.endsWith(".openai.azure.com") || host === "openai.azure.com")) return false
+			return (
+				path.endsWith("/openai/v1/responses") ||
+				path.endsWith("/openai/responses") ||
+				path.endsWith("/responses")
+			)
+		} catch {
+			return false
+		}
+	}
+
+	/**
+	 * Normalizes Azure OpenAI "Responses" portal URLs to an SDK-friendly base and version.
+	 *
+	 * Why:
+	 * - The Azure portal often presents a non-v1 Responses endpoint such as:
+	 *   https://{res}.openai.azure.com/openai/responses?api-version=2025-04-01-preview
+	 *   which is not the ideal base for SDK clients. We convert it to:
+	 *   baseURL = https://{res}.openai.azure.com/openai/v1
+	 *   apiVersionOverride = "preview"
+	 *
+	 * What it does:
+	 * - If the input is an Azure OpenAI host and its path is exactly "/openai/responses"
+	 *   with api-version=2025-04-01-preview, we:
+	 *     • return { baseURL: "https://{host}/openai/v1", apiVersionOverride: "preview" }
+	 * - If the input is already "/openai/v1/responses", we similarly normalize the base to "/openai/v1"
+	 *   and set apiVersionOverride to "preview".
+	 * - Otherwise, returns the original URL unchanged.
+	 *
+	 * Scope:
+	 * - Only applies to Azure OpenAI hosts ("openai.azure.com" or "*.openai.azure.com").
+	 * - Non-Azure URLs or already SDK-friendly bases are returned as-is.
+	 *
+	 * Examples:
+	 * - In:  https://sample.openai.azure.com/openai/responses?api-version=2025-04-01-preview
+	 *   Out: baseURL=https://sample.openai.azure.com/openai/v1, apiVersionOverride="preview"
+	 *
+	 * - In:  https://sample.openai.azure.com/openai/v1/responses?api-version=preview
+	 *   Out: baseURL=https://sample.openai.azure.com/openai/v1, apiVersionOverride="preview"
+	 *
+	 * - In:  https://api.openai.com/v1/responses
+	 *   Out: baseURL unchanged (non-Azure)
+	 */
+	private _normalizeAzureResponsesBaseUrlAndVersion(inputBaseUrl: string): {
+		baseURL: string
+		apiVersionOverride?: string
+	} {
+		try {
+			const url = new URL(inputBaseUrl)
+			const isAzureHost = url.hostname.endsWith(".openai.azure.com") || url.hostname === "openai.azure.com"
+			const pathname = (url.pathname || "").replace(/\/+$/, "")
+
+			// 1) Azure portal "non-v1" shape:
+			//    https://{res}.openai.azure.com/openai/responses?api-version=2025-04-01-preview
+			const isPortalNonV1 =
+				isAzureHost &&
+				pathname === "/openai/responses" &&
+				url.searchParams.get("api-version") === "2025-04-01-preview"
+
+			if (isPortalNonV1) {
+				const normalized = `${url.protocol}//${url.host}/openai/v1`
+				const ver = "preview"
+				return { baseURL: normalized, apiVersionOverride: ver }
+			}
+
+			// 2) v1 responses path passed as base URL:
+			//    https://{res}.openai.azure.com/openai/v1/responses?api-version=preview
+			// Normalize base to '/openai/v1' and force apiVersion 'preview' for Azure Responses v1 preview.
+			const isV1ResponsesPath = isAzureHost && pathname === "/openai/v1/responses"
+			if (isV1ResponsesPath) {
+				const normalized = `${url.protocol}//${url.host}/openai/v1`
+				const ver = "preview"
+				return { baseURL: normalized, apiVersionOverride: ver }
+			}
+
+			// If it's already '/openai/v1' or any other valid path, keep as-is
+			return { baseURL: inputBaseUrl }
+		} catch {
+			return { baseURL: inputBaseUrl }
+		}
+	}
+
 	/**
 	 * Adds max_completion_tokens to the request body if needed based on provider configuration
 	 * Note: max_tokens is deprecated in favor of max_completion_tokens as per OpenAI documentation
@@ -448,6 +731,458 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 			requestOptions.max_completion_tokens = this.options.modelMaxTokens || modelInfo.maxTokens
 		}
 	}
+
+	// --- Responses helpers ---
+
+	private async *_handleResponsesFlavor(
+		systemPrompt: string,
+		messages: Anthropic.Messages.MessageParam[],
+		metadata: ApiHandlerCreateMessageMetadata | undefined,
+		modelInfo: ModelInfo,
+		openAiParams: any,
+	): ApiStream {
+		const modelId = this.options.openAiModelId ?? ""
+		const nonStreaming = !(this.options.openAiStreamingEnabled ?? true)
+
+		// Determine conversation continuity id (skip when explicitly suppressed)
+		const previousId = metadata?.suppressPreviousResponseId
+			? undefined
+			: (metadata?.previousResponseId ?? this.lastResponseId)
+
+		// Prepare Responses API input per test expectations:
+		// - Non-minimal text-only => single string with Developer/User lines
+		// - Minimal (previous_response_id) => single string "User: ..." when last user has no images
+		// - Image cases => structured array; inject Developer preface as first item (non-minimal only)
+		const lastUserMessage = [...messages].reverse().find((m) => m.role === "user")
+		const lastUserHasImages =
+			!!lastUserMessage &&
+			Array.isArray(lastUserMessage.content) &&
+			lastUserMessage.content.some((b: any) => (b as any)?.type === "image")
+		const minimalInputMode = Boolean(previousId)
+
+		let inputPayload: unknown
+		if (minimalInputMode && lastUserMessage) {
+			// Minimal mode: only latest user turn
+			if (lastUserHasImages) {
+				inputPayload = this._toResponsesInput([lastUserMessage])
+			} else {
+				inputPayload = this._formatResponsesSingleMessage(lastUserMessage, true)
+			}
+		} else if (lastUserHasImages && lastUserMessage) {
+			// Initial turn with images: include Developer preface and minimal context
+			const lastAssistantMessage = [...messages].reverse().find((m) => m.role === "assistant")
+			const messagesForArray = messages.filter((m) => {
+				if (m.role === "assistant") {
+					return lastAssistantMessage ? m === lastAssistantMessage : false
+				}
+				if (m.role === "user") {
+					const hasImage =
+						Array.isArray(m.content) && m.content.some((b: any) => (b as any)?.type === "image")
+					return hasImage || m === lastUserMessage
+				}
+				return false
+			})
+
+			const arrayInput = this._toResponsesInput(messagesForArray)
+			const developerPreface = {
+				role: "user" as const,
+				content: [{ type: "input_text" as const, text: `Developer: ${systemPrompt}` }],
+			}
+			inputPayload = [developerPreface, ...arrayInput]
+		} else {
+			// Pure text history: compact transcript string
+			inputPayload = this._formatResponsesInput(systemPrompt, messages)
+		}
+
+		// Build base payload: use top-level instructions; default to storing unless explicitly disabled
+		const basePayload: Record<string, unknown> = {
+			model: modelId,
+			input: inputPayload,
+			...(previousId ? { previous_response_id: previousId } : {}),
+			instructions: systemPrompt,
+			store: metadata?.store !== false,
+		}
+
+		// Reasoning effort (support "minimal"; include summary: "auto" unless disabled)
+		if (this.options.enableReasoningEffort && (this.options.reasoningEffort || openAiParams?.reasoningEffort)) {
+			const effort = (this.options.reasoningEffort || openAiParams?.reasoningEffort) as
+				| "minimal"
+				| "low"
+				| "medium"
+				| "high"
+				| undefined
+			if (effort) {
+				;(
+					basePayload as { reasoning?: { effort: "minimal" | "low" | "medium" | "high"; summary?: "auto" } }
+				).reasoning = {
+					effort,
+					...(this.options.enableGpt5ReasoningSummary !== false ? { summary: "auto" as const } : {}),
+				}
+			}
+		}
+
+		// Temperature: include only if model supports it
+		const deepseekReasoner = modelId.includes("deepseek-reasoner") || (this.options.openAiR1FormatEnabled ?? false)
+		if (modelInfo.supportsTemperature !== false) {
+			if (this.options.modelTemperature !== undefined) {
+				;(basePayload as Record<string, unknown>).temperature = this.options.modelTemperature
+			} else if (deepseekReasoner) {
+				;(basePayload as Record<string, unknown>).temperature = DEEP_SEEK_DEFAULT_TEMPERATURE
+			}
+		}
+
+		// Verbosity: include when provided; retry logic removes it on 400
+		if (this.options.verbosity) {
+			;(basePayload as { text?: { verbosity: "low" | "medium" | "high" } }).text = {
+				verbosity: this.options.verbosity as "low" | "medium" | "high",
+			}
+		}
+
+		// Always include max_output_tokens for Responses API to cap output length
+		const reservedMax = openAiParams?.maxTokens
+		;(basePayload as Record<string, unknown>).max_output_tokens =
+			this.options.modelMaxTokens || reservedMax || modelInfo.maxTokens
+
+		// Non-streaming path
+		if (nonStreaming) {
+			const response = await this._responsesCreateWithRetries(basePayload, {
+				usedArrayInput: Array.isArray(inputPayload),
+				lastUserMessage,
+				previousId,
+				systemPrompt,
+				messages,
+			})
+			yield* this._yieldResponsesResult(response, modelInfo)
+			return
+		}
+
+		// Streaming path (auto-fallback to non-streaming result if provider ignores stream flag)
+		const streamingPayload: Record<string, unknown> = { ...basePayload, stream: true }
+		const maybeStream = await this._responsesCreateWithRetries(streamingPayload, {
+			usedArrayInput: Array.isArray(inputPayload),
+			lastUserMessage,
+			previousId,
+			systemPrompt,
+			messages,
+		})
+
+		const isAsyncIterable = (obj: unknown): obj is AsyncIterable<unknown> =>
+			typeof (obj as AsyncIterable<unknown>)[Symbol.asyncIterator] === "function"
+
+		if (isAsyncIterable(maybeStream)) {
+			for await (const chunk of handleResponsesStream(maybeStream, {
+				onResponseId: (id) => {
+					this.lastResponseId = id
+				},
+			})) {
+				yield chunk
+			}
+		} else {
+			// Some providers may ignore the stream flag and return a complete response
+			yield* this._yieldResponsesResult(maybeStream, modelInfo)
+		}
+	}
+
+	/**
+	 * Determines which OpenAI-compatible API flavor to use based on the URL path.
+	 * - This is purely path-based and provider-agnostic (works for OpenAI, Azure OpenAI after normalization, etc.).
+	 *
+	 * Rules:
+	 * - If path contains "/v1/responses" OR ends with "/responses" => "responses"
+	 * - Else if path contains "/chat/completions"                  => "chat"
+	 * - Else default to "chat" for backward compatibility
+	 *
+	 * Notes:
+	 * - Trailing slashes are not required to match; we rely on substring checks.
+	 * - Azure "portal" style URLs are normalized beforehand where applicable.
+	 *
+	 * Examples:
+	 * - https://api.openai.com/v1/responses            -> "responses"
+	 * - https://api.openai.com/v1/chat/completions     -> "chat"
+	 * - https://myres.openai.azure.com/openai/v1       -> "chat" (default)
+	 * - https://myres.openai.azure.com/openai/v1/responses -> "responses"
+	 */
+	private _resolveApiFlavor(baseUrl: string): "responses" | "chat" {
+		// Auto-detect by URL path
+		const url = this._safeParseUrl(baseUrl)
+		const path = url?.pathname || ""
+		if (path.includes("/v1/responses") || path.endsWith("/responses")) {
+			return "responses"
+		}
+		if (path.includes("/chat/completions")) {
+			return "chat"
+		}
+		// Default to Chat Completions for backward compatibility
+		return "chat"
+	}
+
+	private _safeParseUrl(input?: string): URL | undefined {
+		try {
+			if (!input) return undefined
+			return new URL(input)
+		} catch {
+			return undefined
+		}
+	}
+
+	private _toResponsesInput(anthropicMessages: Anthropic.Messages.MessageParam[]): Array<{
+		role: "user" | "assistant"
+		content: Array<
+			| { type: "input_text"; text: string }
+			| { type: "input_image"; image_url: string }
+			| { type: "output_text"; text: string }
+		>
+	}> {
+		const input: Array<{
+			role: "user" | "assistant"
+			content: Array<
+				| { type: "input_text"; text: string }
+				| { type: "input_image"; image_url: string }
+				| { type: "output_text"; text: string }
+			>
+		}> = []
+
+		for (const msg of anthropicMessages) {
+			const role = msg.role === "assistant" ? "assistant" : "user"
+			const parts: Array<
+				| { type: "input_text"; text: string }
+				| { type: "input_image"; image_url: string }
+				| { type: "output_text"; text: string }
+			> = []
+
+			if (typeof msg.content === "string") {
+				if (msg.content.length > 0) {
+					if (role === "assistant") {
+						parts.push({ type: "output_text", text: msg.content })
+					} else {
+						parts.push({ type: "input_text", text: msg.content })
+					}
+				}
+			} else if (Array.isArray(msg.content)) {
+				for (const block of msg.content) {
+					if (block.type === "text") {
+						if (role === "assistant") {
+							parts.push({ type: "output_text", text: block.text })
+						} else {
+							parts.push({ type: "input_text", text: block.text })
+						}
+					} else if (block.type === "image") {
+						// Images are treated as user input; ignore images on assistant turns
+						if (role === "user") {
+							parts.push({
+								type: "input_image",
+								image_url: `data:${block.source.media_type};base64,${block.source.data}`,
+							})
+						}
+					}
+					// tool_use/tool_result are omitted in this minimal mapping (can be added as needed)
+				}
+			}
+
+			if (parts.length > 0) {
+				input.push({ role, content: parts })
+			}
+		}
+		return input
+	}
+
+	private _extractResponsesText(response: any): string | undefined {
+		// Prefer the simple output_text if present, otherwise attempt to parse output array
+		if (response?.output_text) return response.output_text
+		if (Array.isArray(response?.output)) {
+			// Find assistant message with output_text
+			for (const item of response.output) {
+				if (item?.type === "message" && Array.isArray(item.content)) {
+					const textPart = item.content.find(
+						(c: any) => c.type === "output_text" && typeof c.text === "string",
+					)
+					if (textPart?.text) return textPart.text
+				}
+			}
+		}
+		return undefined
+	}
+
+	private _isInputTextInvalidError(err: unknown): boolean {
+		if (err == null || typeof err !== "object") return false
+		const anyErr = err as {
+			status?: unknown
+			response?: { status?: unknown }
+			message?: unknown
+			error?: { message?: unknown }
+		}
+		const statusRaw = anyErr.status ?? anyErr.response?.status
+		const status = typeof statusRaw === "number" ? statusRaw : Number(statusRaw)
+		const msgRaw = (anyErr.message ?? anyErr.error?.message ?? "").toString().toLowerCase()
+		return status === 400 && msgRaw.includes("invalid value") && msgRaw.includes("input_text")
+	}
+
+	/**
+	 * Centralized Responses.create with one-shot retries for common provider errors:
+	 * - 400 "Previous response ... not found" -> drop previous_response_id and retry
+	 * - 400 unknown/unsupported "text.verbosity" -> remove text and retry
+	 * - 400 invalid value for input_text (Azure) -> rebuild single-message string input and retry
+	 * Returns either an AsyncIterable (streaming) or a full response object (non-streaming).
+	 */
+	private async _responsesCreateWithRetries(
+		payload: Record<string, unknown>,
+		opts: {
+			usedArrayInput: boolean
+			lastUserMessage?: Anthropic.Messages.MessageParam
+			previousId?: string
+			systemPrompt: string
+			messages: Anthropic.Messages.MessageParam[]
+		},
+	): Promise<unknown> {
+		const create = (body: Record<string, unknown>) => {
+			const hasResponsesCreate = (
+				obj: unknown,
+			): obj is { responses: { create: (b: Record<string, unknown>) => Promise<unknown> } } => {
+				if (obj == null || typeof obj !== "object") return false
+				const responses = (obj as Record<string, unknown>).responses
+				if (responses == null || typeof responses !== "object") return false
+				return typeof (responses as Record<string, unknown>).create === "function"
+			}
+			if (!hasResponsesCreate(this.client)) {
+				throw new Error("Responses API not available on client")
+			}
+			return this.client.responses.create(body)
+		}
+
+		try {
+			return await create(payload)
+		} catch (err: unknown) {
+			// Retry without previous_response_id if server rejects it
+			if (opts.previousId && this._isPreviousResponseNotFoundError(err)) {
+				const { previous_response_id: _omitPrev, ...withoutPrev } = payload as {
+					previous_response_id?: unknown
+					[key: string]: unknown
+				}
+				this.lastResponseId = undefined
+				return await create(withoutPrev)
+			}
+
+			// Graceful downgrade if verbosity is rejected by server
+			if ("text" in payload && this._isVerbosityUnsupportedError(err)) {
+				const { text: _omit, ...withoutVerbosity } = payload as { text?: unknown } & Record<string, unknown>
+				return await create(withoutVerbosity)
+			}
+
+			// Azure-specific fallback when array input is rejected
+			if (opts.usedArrayInput && this._isInputTextInvalidError(err)) {
+				const fallbackInput =
+					opts.previousId && opts.lastUserMessage
+						? this._formatResponsesSingleMessage(opts.lastUserMessage, true)
+						: this._formatResponsesInput(opts.systemPrompt, opts.messages)
+
+				const retryPayload: Record<string, unknown> = {
+					...payload,
+					input: fallbackInput,
+				}
+				return await create(retryPayload)
+			}
+
+			throw err
+		}
+	}
+	private async *_yieldResponsesResult(response: any, modelInfo: ModelInfo): ApiStream {
+		// Capture response id for continuity when present
+		try {
+			const respId = (response as { id?: unknown } | undefined)?.id
+			if (typeof respId === "string" && respId.length > 0) {
+				this.lastResponseId = respId
+			}
+		} catch {
+			// ignore
+		}
+
+		const text = this._extractResponsesText(response) ?? ""
+		if (text) {
+			yield { type: "text", text }
+		}
+		// Translate usage fields if present
+		const usage = response?.usage
+		if (usage) {
+			yield {
+				type: "usage",
+				inputTokens: usage.input_tokens || usage.prompt_tokens || 0,
+				outputTokens: usage.output_tokens || usage.completion_tokens || 0,
+				cacheWriteTokens: usage.cache_creation_input_tokens || undefined,
+				cacheReadTokens: usage.cache_read_input_tokens || undefined,
+			}
+		}
+	}
+
+	private _isVerbosityUnsupportedError(err: unknown): boolean {
+		if (err == null || typeof err !== "object") return false
+
+		// you had hasOwnProperty("message") twice — likely a typo
+		if (!("message" in err)) return false
+
+		const msg = String((err as { message?: unknown }).message ?? "").toLowerCase()
+
+		const rawStatus = "status" in err ? (err as { status?: unknown }).status : undefined
+		const status = typeof rawStatus === "number" ? rawStatus : Number(rawStatus)
+
+		return (
+			status === 400 &&
+			(msg.includes("verbosity") || msg.includes("unknown parameter") || msg.includes("unsupported"))
+		)
+	}
+
+	private _isPreviousResponseNotFoundError(err: unknown): boolean {
+		if (err == null || typeof err !== "object") return false
+		const anyErr = err as {
+			status?: unknown
+			response?: { status?: unknown }
+			message?: unknown
+			error?: { message?: unknown }
+		}
+		const statusRaw = anyErr.status ?? anyErr.response?.status
+		const status = typeof statusRaw === "number" ? statusRaw : Number(statusRaw)
+		const msg = (anyErr.message ?? anyErr.error?.message ?? "").toString().toLowerCase()
+		return status === 400 && (msg.includes("previous response") || msg.includes("not found"))
+	}
+
+	// ---- Responses input formatting (align with openai-native.ts) ----
+
+	private _formatResponsesInput(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): string {
+		// Developer role for system prompt
+		let formattedInput = `Developer: ${systemPrompt}\n\n`
+		for (const message of messages) {
+			const role = message.role === "user" ? "User" : "Assistant"
+			if (typeof message.content === "string") {
+				formattedInput += `${role}: ${message.content}\n\n`
+			} else if (Array.isArray(message.content)) {
+				const textContent = message.content
+					.filter((block) => block.type === "text")
+					.map((block) => block.text)
+					.join("\n")
+				if (textContent) {
+					formattedInput += `${role}: ${textContent}\n\n`
+				}
+			}
+		}
+		return formattedInput.trim()
+	}
+
+	private _formatResponsesSingleMessage(
+		message: Anthropic.Messages.MessageParam,
+		includeRole: boolean = true,
+	): string {
+		const role = includeRole ? (message.role === "user" ? "User" : "Assistant") + ": " : ""
+		if (typeof message.content === "string") {
+			return `${role}${message.content}`
+		}
+		if (Array.isArray(message.content)) {
+			const textContent = message.content
+				.filter((block) => block.type === "text")
+				.map((block) => block.text)
+				.join("\n")
+			return `${role}${textContent}`
+		}
+		return role
+	}
 }
 
 export async function getOpenAiModels(baseUrl?: string, apiKey?: string, openAiHeaders?: Record<string, string>) {
diff --git a/src/api/transform/responses-stream.ts b/src/api/transform/responses-stream.ts
new file mode 100644
index 00000000000..f0152c9ec36
--- /dev/null
+++ b/src/api/transform/responses-stream.ts
@@ -0,0 +1,263 @@
+import type { ApiStreamChunk } from "./stream"
+
+/**
+ * Minimal, typed streaming handler for OpenAI/Azure Responses API streams.
+ * Consumes an AsyncIterable of events and yields ApiStreamChunk items.
+ *
+ * Notes:
+ * - We intentionally handle only the core, stable event shapes that we already
+ *   use in openai-native, to keep the surface area small and predictable.
+ * - If the event format changes, extend the type guards below conservatively.
+ */
+export async function* handleResponsesStream(
+	stream: AsyncIterable<unknown>,
+	options?: { onResponseId?: (id: string) => void },
+): AsyncGenerator<ApiStreamChunk> {
+	let lastUsage: ResponseUsage | undefined
+
+	for await (const event of stream) {
+		// Surface response.id to callers when available (for conversation continuity)
+		if (isObject(event)) {
+			const resp = (event as Record<string, unknown>).response
+			if (isObject(resp)) {
+				const rid = (resp as Record<string, unknown>).id
+				if (typeof rid === "string") {
+					options?.onResponseId?.(rid)
+				}
+			}
+		}
+		// 1) Streaming text deltas
+		if (isTextDelta(event)) {
+			const e = event as TextDeltaEvent
+			if (e.delta != null) {
+				yield { type: "text", text: String(e.delta) }
+			}
+			continue
+		}
+
+		// 2) Streaming reasoning deltas
+		if (isReasoningDelta(event)) {
+			const e = event as ReasoningDeltaEvent
+			if (e.delta != null) {
+				yield { type: "reasoning", text: String(e.delta) }
+			}
+			continue
+		}
+
+		// 2.1) Audio transcript deltas (map to text)
+		if (isAudioTranscriptDelta(event)) {
+			const e = event as AudioTranscriptDeltaEvent
+			if (e.delta != null) {
+				yield { type: "text", text: String(e.delta) }
+			}
+			continue
+		}
+
+		// 3) Refusal deltas (map to text with prefix, matching native handler behavior)
+		if (isRefusalDelta(event)) {
+			const e = event as RefusalDeltaEvent
+			if (e.delta != null) {
+				yield { type: "text", text: `[Refusal] ${String(e.delta)}` }
+			}
+			continue
+		}
+
+		// 4) Output-item added (alternative carrier for text/reasoning)
+		if (isOutputItemAdded(event)) {
+			const item = (event as OutputItemAddedEvent).item
+			if (item) {
+				if (item.type === "text" && typeof item.text === "string") {
+					yield { type: "text", text: item.text }
+				} else if (item.type === "reasoning" && typeof item.text === "string") {
+					yield { type: "reasoning", text: item.text }
+				} else if (item.type === "message" && Array.isArray(item.content)) {
+					for (const content of item.content) {
+						// Some servers use "text"; others use "output_text"
+						if (
+							(content?.type === "text" || content?.type === "output_text") &&
+							typeof content?.text === "string"
+						) {
+							yield { type: "text", text: content.text }
+						}
+					}
+				} else if (typeof item.text === "string") {
+					// Fallback: emit item.text even if item.type is unknown (matches native handler tolerance)
+					yield { type: "text", text: item.text }
+				}
+			}
+			continue
+		}
+
+		// 4.1) Content part added (SDK alternative format)
+		if (isContentPartAdded(event)) {
+			const part = (event as ContentPartAddedEvent).part
+			if (part && part.type === "text" && typeof part.text === "string") {
+				yield { type: "text", text: part.text }
+			}
+			continue
+		}
+
+		// 5) Fallback: some implementations (or older shapes) supply choices[0].delta.content
+		const content = getChoiceDeltaContent(event)
+		if (content) {
+			yield { type: "text", text: content }
+		}
+
+		// 6) Track usage whenever present
+		const usage = extractUsage(event)
+		if (usage) {
+			lastUsage = usage
+		}
+
+		// 7) Completion/done events - emit usage if we have it
+		if (isDoneEvent(event)) {
+			const u = lastUsage
+			if (u && hasAnyUsage(u)) {
+				yield makeUsageChunk(u)
+			}
+		}
+	}
+}
+
+/** Types, guards, and helpers */
+
+type ResponseUsage = {
+	input_tokens?: number
+	output_tokens?: number
+	prompt_tokens?: number
+	completion_tokens?: number
+	cache_creation_input_tokens?: number
+	cache_read_input_tokens?: number
+	prompt_tokens_details?: { cached_tokens?: number }
+}
+
+type TextDeltaEvent = {
+	type: "response.text.delta" | "response.output_text.delta"
+	delta?: unknown
+}
+
+type ReasoningDeltaEvent = {
+	type:
+		| "response.reasoning.delta"
+		| "response.reasoning_text.delta"
+		| "response.reasoning_summary.delta"
+		| "response.reasoning_summary_text.delta"
+	delta?: unknown
+}
+
+type RefusalDeltaEvent = {
+	type: "response.refusal.delta"
+	delta?: unknown
+}
+
+type OutputItemAddedEvent = {
+	type: "response.output_item.added"
+	item?: {
+		type?: string
+		text?: unknown
+		content?: Array<{ type?: string; text?: unknown }>
+	}
+}
+
+type DoneEvent = {
+	type: "response.done" | "response.completed"
+}
+
+type AudioTranscriptDeltaEvent = {
+	type: "response.audio_transcript.delta"
+	delta?: unknown
+}
+
+type ContentPartAddedEvent = {
+	type: "response.content_part.added"
+	part?: {
+		type?: string
+		text?: unknown
+	}
+}
+
+function isObject(value: unknown): value is Record<string, unknown> {
+	return typeof value === "object" && value !== null
+}
+
+function isTextDelta(event: unknown): event is TextDeltaEvent {
+	return (
+		isObject(event) &&
+		typeof (event as Record<string, unknown>).type === "string" &&
+		(((event as Record<string, unknown>).type as string) === "response.text.delta" ||
+			((event as Record<string, unknown>).type as string) === "response.output_text.delta")
+	)
+}
+
+function isReasoningDelta(event: unknown): event is ReasoningDeltaEvent {
+	if (!isObject(event)) return false
+	const t = (event as Record<string, unknown>).type
+	return (
+		t === "response.reasoning.delta" ||
+		t === "response.reasoning_text.delta" ||
+		t === "response.reasoning_summary.delta" ||
+		t === "response.reasoning_summary_text.delta"
+	)
+}
+
+function isRefusalDelta(event: unknown): event is RefusalDeltaEvent {
+	return isObject(event) && (event as Record<string, unknown>).type === "response.refusal.delta"
+}
+
+function isOutputItemAdded(event: unknown): event is OutputItemAddedEvent {
+	return isObject(event) && (event as Record<string, unknown>).type === "response.output_item.added"
+}
+
+function isAudioTranscriptDelta(event: unknown): event is AudioTranscriptDeltaEvent {
+	return isObject(event) && (event as Record<string, unknown>).type === "response.audio_transcript.delta"
+}
+
+function isContentPartAdded(event: unknown): event is ContentPartAddedEvent {
+	return isObject(event) && (event as Record<string, unknown>).type === "response.content_part.added"
+}
+
+function isDoneEvent(event: unknown): event is DoneEvent {
+	if (!isObject(event)) return false
+	const t = (event as Record<string, unknown>).type
+	return t === "response.done" || t === "response.completed"
+}
+
+function getChoiceDeltaContent(event: unknown): string | undefined {
+	if (!isObject(event)) return undefined
+	const choices = (event as Record<string, unknown>).choices
+	if (!Array.isArray(choices) || choices.length === 0) return undefined
+	const first = choices[0]
+	if (!isObject(first)) return undefined
+	const delta = (first as Record<string, unknown>).delta
+	if (!isObject(delta)) return undefined
+	const content = (delta as Record<string, unknown>).content
+	if (content == null) return undefined
+	return String(content)
+}
+
+function extractUsage(event: unknown): ResponseUsage | undefined {
+	if (!isObject(event)) return undefined
+	const resp = (event as Record<string, unknown>).response
+	if (isObject(resp) && isObject((resp as Record<string, unknown>).usage)) {
+		return (resp as Record<string, unknown>).usage as ResponseUsage
+	}
+	const usage = (event as Record<string, unknown>).usage
+	if (isObject(usage)) {
+		return usage as ResponseUsage
+	}
+	return undefined
+}
+
+function hasAnyUsage(usage: ResponseUsage): boolean {
+	return Boolean(usage.input_tokens || usage.output_tokens || usage.prompt_tokens || usage.completion_tokens)
+}
+
+function makeUsageChunk(usage: ResponseUsage): ApiStreamChunk {
+	return {
+		type: "usage",
+		inputTokens: usage.input_tokens ?? usage.prompt_tokens ?? 0,
+		outputTokens: usage.output_tokens ?? usage.completion_tokens ?? 0,
+		cacheWriteTokens: usage.cache_creation_input_tokens ?? undefined,
+		cacheReadTokens: usage.cache_read_input_tokens ?? usage.prompt_tokens_details?.cached_tokens ?? undefined,
+	}
+}
diff --git a/src/package.json b/src/package.json
index d9e986feac9..ccd16a9f869 100644
--- a/src/package.json
+++ b/src/package.json
@@ -468,7 +468,7 @@
 		"node-cache": "^5.1.2",
 		"node-ipc": "^12.0.0",
 		"ollama": "^0.5.17",
-		"openai": "^5.12.2",
+		"openai": "^5.15.0",
 		"os-name": "^6.0.0",
 		"p-limit": "^6.2.0",
 		"p-wait-for": "^5.0.2",
diff --git a/webview-ui/src/components/settings/providers/OpenAICompatible.tsx b/webview-ui/src/components/settings/providers/OpenAICompatible.tsx
index 736b0253c43..f67e23789cf 100644
--- a/webview-ui/src/components/settings/providers/OpenAICompatible.tsx
+++ b/webview-ui/src/components/settings/providers/OpenAICompatible.tsx
@@ -22,6 +22,7 @@ import { inputEventTransform, noTransform } from "../transforms"
 import { ModelPicker } from "../ModelPicker"
 import { R1FormatSetting } from "../R1FormatSetting"
 import { ThinkingBudget } from "../ThinkingBudget"
+import { Verbosity } from "../Verbosity"
 
 type OpenAICompatibleProps = {
 	apiConfiguration: ProviderSettings
@@ -40,6 +41,7 @@ export const OpenAICompatible = ({
 
 	const [azureApiVersionSelected, setAzureApiVersionSelected] = useState(!!apiConfiguration?.azureApiVersion)
 	const [openAiLegacyFormatSelected, setOpenAiLegacyFormatSelected] = useState(!!apiConfiguration?.openAiLegacyFormat)
+	const [verbositySelected, setVerbositySelected] = useState(!!apiConfiguration?.verbosity)
 
 	const [openAiModels, setOpenAiModels] = useState<Record<string, ModelInfo> | null>(null)
 
@@ -282,6 +284,27 @@ export const OpenAICompatible = ({
 					/>
 				)}
 			</div>
+			<div className="flex flex-col gap-1">
+				<Checkbox
+					checked={verbositySelected}
+					onChange={(checked: boolean) => {
+						setVerbositySelected(checked)
+						if (!checked) {
+							setApiConfigurationField("verbosity", undefined)
+						} else if (!apiConfiguration.verbosity) {
+							setApiConfigurationField("verbosity", "medium")
+						}
+					}}>
+					{t("settings:providers.verbosity.label")}
+				</Checkbox>
+				{verbositySelected && (
+					<Verbosity
+						apiConfiguration={apiConfiguration}
+						setApiConfigurationField={setApiConfigurationField}
+						modelInfo={apiConfiguration.openAiCustomModelInfo || openAiModelInfoSaneDefaults}
+					/>
+				)}
+			</div>
 			<div className="flex flex-col gap-3">
 				<div className="text-sm text-vscode-descriptionForeground whitespace-pre-line">
 					{t("settings:providers.customModel.capabilities")}