diff --git a/.tmp/review/Roo-Code b/.tmp/review/Roo-Code new file mode 160000 index 0000000000..8dbd8c4b1b --- /dev/null +++ b/.tmp/review/Roo-Code @@ -0,0 +1 @@ +Subproject commit 8dbd8c4b1b72fb48be3990a8e78285a787a1828c diff --git a/.work/pr-8373/Roo-Code b/.work/pr-8373/Roo-Code new file mode 160000 index 0000000000..b832e9253d --- /dev/null +++ b/.work/pr-8373/Roo-Code @@ -0,0 +1 @@ +Subproject commit b832e9253d2e62b4ce9e10a7a9a0f8d0263c3490 diff --git a/.work/reviews/Roo-Code b/.work/reviews/Roo-Code new file mode 160000 index 0000000000..ea8420be8c --- /dev/null +++ b/.work/reviews/Roo-Code @@ -0,0 +1 @@ +Subproject commit ea8420be8c5386d867fe6aa7b1f9756a44a3b5b1 diff --git a/src/api/providers/__tests__/bedrock-temperature-top-p.spec.ts b/src/api/providers/__tests__/bedrock-temperature-top-p.spec.ts new file mode 100644 index 0000000000..1103e20598 --- /dev/null +++ b/src/api/providers/__tests__/bedrock-temperature-top-p.spec.ts @@ -0,0 +1,195 @@ +// npx vitest run src/api/providers/__tests__/bedrock-temperature-top-p.spec.ts + +import { vi, describe, it, expect, beforeEach } from "vitest" +import { AwsBedrockHandler } from "../bedrock" +import { ConverseStreamCommand, ConverseCommand } from "@aws-sdk/client-bedrock-runtime" +import type { Anthropic } from "@anthropic-ai/sdk" + +// Mock AWS SDK credential providers +vi.mock("@aws-sdk/credential-providers", () => ({ + fromIni: vi.fn().mockReturnValue({ + accessKeyId: "test-access-key", + secretAccessKey: "test-secret-key", + }), +})) + +// Mock BedrockRuntimeClient +vi.mock("@aws-sdk/client-bedrock-runtime", () => { + const mockSend = vi.fn().mockResolvedValue({ + stream: [], + output: { + message: { + content: [{ text: "Test response" }], + }, + }, + }) + const mockConverseStreamCommand = vi.fn() + const mockConverseCommand = vi.fn() + + return { + BedrockRuntimeClient: vi.fn().mockImplementation(() => ({ + send: mockSend, + })), + ConverseStreamCommand: mockConverseStreamCommand, + ConverseCommand: mockConverseCommand, + } +}) + +const mockConverseStreamCommand = vi.mocked(ConverseStreamCommand) +const mockConverseCommand = vi.mocked(ConverseCommand) + +describe("Bedrock temperature and topP handling", () => { + let handler: AwsBedrockHandler + + beforeEach(() => { + vi.clearAllMocks() + }) + + describe("Claude Sonnet 4.5 (3.5 v2) model", () => { + beforeEach(() => { + handler = new AwsBedrockHandler({ + apiModelId: "anthropic.claude-sonnet-4-5-20250929-v1:0", + awsAccessKey: "test-access-key", + awsSecretKey: "test-secret-key", + awsRegion: "us-east-1", + modelTemperature: 0.7, + }) + }) + + it("should only send temperature and not topP in createMessage when thinking is disabled", async () => { + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: "Test message", + }, + ] + + const generator = handler.createMessage("", messages) + await generator.next() + + expect(mockConverseStreamCommand).toHaveBeenCalled() + const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any + + // Should have temperature but not topP + expect(commandArg.inferenceConfig).toBeDefined() + expect(commandArg.inferenceConfig.temperature).toBe(0.7) + expect(commandArg.inferenceConfig.topP).toBeUndefined() + }) + + it("should only send temperature and not topP in completePrompt", async () => { + await handler.completePrompt("Test prompt") + + expect(mockConverseCommand).toHaveBeenCalled() + const commandArg = mockConverseCommand.mock.calls[0][0] as any + + // Should have temperature but not topP + expect(commandArg.inferenceConfig).toBeDefined() + expect(commandArg.inferenceConfig.temperature).toBe(0.7) + expect(commandArg.inferenceConfig.topP).toBeUndefined() + }) + }) + + describe("Other Bedrock models", () => { + beforeEach(() => { + handler = new AwsBedrockHandler({ + apiModelId: "anthropic.claude-3-5-sonnet-20241022-v2:0", + awsAccessKey: "test-access-key", + awsSecretKey: "test-secret-key", + awsRegion: "us-east-1", + modelTemperature: 0.7, + }) + }) + + it("should only send temperature and not topP for Claude 3.5 Sonnet", async () => { + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: "Test message", + }, + ] + + const generator = handler.createMessage("", messages) + await generator.next() + + expect(mockConverseStreamCommand).toHaveBeenCalled() + const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any + + // Should have temperature but not topP + expect(commandArg.inferenceConfig).toBeDefined() + expect(commandArg.inferenceConfig.temperature).toBe(0.7) + expect(commandArg.inferenceConfig.topP).toBeUndefined() + }) + }) + + describe("Models with thinking enabled", () => { + beforeEach(() => { + handler = new AwsBedrockHandler({ + apiModelId: "anthropic.claude-3-7-sonnet-20250219-v1:0", + awsAccessKey: "test-access-key", + awsSecretKey: "test-secret-key", + awsRegion: "us-east-1", + modelTemperature: 0.7, + enableReasoningEffort: true, + }) + }) + + it("should only send temperature when thinking is enabled", async () => { + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: "Test message", + }, + ] + + const metadata = { + taskId: "test-task-id", + thinking: { + enabled: true, + maxThinkingTokens: 4096, + }, + } + + const generator = handler.createMessage("", messages, metadata as any) + await generator.next() + + expect(mockConverseStreamCommand).toHaveBeenCalled() + const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any + + // Should have temperature but not topP when thinking is enabled + expect(commandArg.inferenceConfig).toBeDefined() + // Temperature is overridden to 1.0 when reasoning is enabled + expect(commandArg.inferenceConfig.temperature).toBe(1.0) + expect(commandArg.inferenceConfig.topP).toBeUndefined() + }) + }) + + describe("Default temperature handling", () => { + it("should use default temperature when not specified", async () => { + handler = new AwsBedrockHandler({ + apiModelId: "anthropic.claude-sonnet-4-5-20250929-v1:0", + awsAccessKey: "test-access-key", + awsSecretKey: "test-secret-key", + awsRegion: "us-east-1", + // No modelTemperature specified + }) + + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: "Test message", + }, + ] + + const generator = handler.createMessage("", messages) + await generator.next() + + expect(mockConverseStreamCommand).toHaveBeenCalled() + const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any + + // Should have default temperature (0.3) but not topP + expect(commandArg.inferenceConfig).toBeDefined() + expect(commandArg.inferenceConfig.temperature).toBe(0.3) // BEDROCK_DEFAULT_TEMPERATURE + expect(commandArg.inferenceConfig.topP).toBeUndefined() + }) + }) +}) diff --git a/src/api/providers/bedrock.ts b/src/api/providers/bedrock.ts index c6a0b35df4..73c3f489c1 100644 --- a/src/api/providers/bedrock.ts +++ b/src/api/providers/bedrock.ts @@ -374,9 +374,9 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH temperature: modelConfig.temperature ?? (this.options.modelTemperature as number), } - if (!thinkingEnabled) { - inferenceConfig.topP = 0.1 - } + // AWS Bedrock doesn't allow both temperature and topP to be specified for certain models + // When thinking is not enabled and we would normally set topP, we'll skip it to avoid the error + // This maintains the existing behavior while being compatible with Bedrock's requirements // Check if 1M context is enabled for Claude Sonnet 4 // Use parseBaseModelId to handle cross-region inference prefixes @@ -647,7 +647,8 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH const inferenceConfig: BedrockInferenceConfig = { maxTokens: modelConfig.maxTokens || (modelConfig.info.maxTokens as number), temperature: modelConfig.temperature ?? (this.options.modelTemperature as number), - ...(thinkingEnabled ? {} : { topP: 0.1 }), // Only set topP when thinking is NOT enabled + // AWS Bedrock doesn't allow both temperature and topP to be specified for certain models + // We'll only use temperature to avoid the "cannot both be specified" error } // For completePrompt, use a unique conversation ID based on the prompt