|
1 | 1 | import { Anthropic } from "@anthropic-ai/sdk" |
2 | 2 | import OpenAI from "openai" |
3 | 3 |
|
4 | | -import { rooDefaultModelId } from "@roo-code/types" |
| 4 | +import { rooDefaultModelId, getApiProtocol } from "@roo-code/types" |
5 | 5 | import { CloudService } from "@roo-code/cloud" |
6 | 6 |
|
7 | 7 | import type { ApiHandlerOptions, ModelRecord } from "../../shared/api" |
@@ -163,12 +163,25 @@ export class RooHandler extends BaseOpenAiCompatibleProvider<string> { |
163 | 163 | const model = this.getModel() |
164 | 164 | const isFreeModel = model.info.isFree ?? false |
165 | 165 |
|
| 166 | + // Normalize input tokens based on protocol expectations: |
| 167 | + // - OpenAI protocol expects TOTAL input tokens (cached + non-cached) |
| 168 | + // - Anthropic protocol expects NON-CACHED input tokens (caches passed separately) |
| 169 | + const modelId = model.id |
| 170 | + const apiProtocol = getApiProtocol("roo", modelId) |
| 171 | + |
| 172 | + const promptTokens = lastUsage.prompt_tokens || 0 |
| 173 | + const cacheWrite = lastUsage.cache_creation_input_tokens || 0 |
| 174 | + const cacheRead = lastUsage.prompt_tokens_details?.cached_tokens || 0 |
| 175 | + const nonCached = Math.max(0, promptTokens - cacheWrite - cacheRead) |
| 176 | + |
| 177 | + const inputTokensForDownstream = apiProtocol === "anthropic" ? nonCached : promptTokens |
| 178 | + |
166 | 179 | yield { |
167 | 180 | type: "usage", |
168 | | - inputTokens: lastUsage.prompt_tokens || 0, |
| 181 | + inputTokens: inputTokensForDownstream, |
169 | 182 | outputTokens: lastUsage.completion_tokens || 0, |
170 | | - cacheWriteTokens: lastUsage.cache_creation_input_tokens, |
171 | | - cacheReadTokens: lastUsage.prompt_tokens_details?.cached_tokens, |
| 183 | + cacheWriteTokens: cacheWrite, |
| 184 | + cacheReadTokens: cacheRead, |
172 | 185 | totalCost: isFreeModel ? 0 : (lastUsage.cost ?? 0), |
173 | 186 | } |
174 | 187 | } |
|
0 commit comments