Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/fair-houses-deny.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"roo-cline": patch
---

Shows in the UI when the context is intelligently condensed
14 changes: 14 additions & 0 deletions evals/packages/types/src/roo-code.ts
Original file line number Diff line number Diff line change
Expand Up @@ -993,6 +993,7 @@ export const clineSays = [
"checkpoint_saved",
"rooignore_error",
"diff_error",
"condense_context",
] as const

export const clineSaySchema = z.enum(clineSays)
Expand All @@ -1011,6 +1012,18 @@ export const toolProgressStatusSchema = z.object({

export type ToolProgressStatus = z.infer<typeof toolProgressStatusSchema>

/**
* ContextCondense
*/

export const contextCondenseSchema = z.object({
cost: z.number(),
prevContextTokens: z.number(),
newContextTokens: z.number(),
})

export type ContextCondense = z.infer<typeof contextCondenseSchema>

/**
* ClineMessage
*/
Expand All @@ -1027,6 +1040,7 @@ export const clineMessageSchema = z.object({
conversationHistoryIndex: z.number().optional(),
checkpoint: z.record(z.string(), z.unknown()).optional(),
progressStatus: toolProgressStatusSchema.optional(),
contextCondense: contextCondenseSchema.optional(),
})

export type ClineMessage = z.infer<typeof clineMessageSchema>
Expand Down
64 changes: 56 additions & 8 deletions src/core/condense/__tests__/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,11 @@ describe("summarizeConversation", () => {
// Reset mocks
jest.clearAllMocks()

// Setup mock stream
// Setup mock stream with usage information
mockStream = (async function* () {
yield { type: "text" as const, text: "This is " }
yield { type: "text" as const, text: "a summary" }
yield { type: "usage" as const, totalCost: 0.05, outputTokens: 150 }
})()

// Setup mock API handler
Expand Down Expand Up @@ -103,7 +104,10 @@ describe("summarizeConversation", () => {
]

const result = await summarizeConversation(messages, mockApiHandler)
expect(result).toEqual(messages)
expect(result.messages).toEqual(messages)
expect(result.cost).toBe(0)
expect(result.summary).toBe("")
expect(result.newContextTokens).toBeUndefined()
expect(mockApiHandler.createMessage).not.toHaveBeenCalled()
})

Expand All @@ -119,7 +123,10 @@ describe("summarizeConversation", () => {
]

const result = await summarizeConversation(messages, mockApiHandler)
expect(result).toEqual(messages)
expect(result.messages).toEqual(messages)
expect(result.cost).toBe(0)
expect(result.summary).toBe("")
expect(result.newContextTokens).toBeUndefined()
expect(mockApiHandler.createMessage).not.toHaveBeenCalled()
})

Expand All @@ -142,17 +149,22 @@ describe("summarizeConversation", () => {

// Verify the structure of the result
// The result should be: original messages (except last N) + summary + last N messages
expect(result.length).toBe(messages.length + 1) // Original + summary
expect(result.messages.length).toBe(messages.length + 1) // Original + summary

// Check that the summary message was inserted correctly
const summaryMessage = result[result.length - N_MESSAGES_TO_KEEP - 1]
const summaryMessage = result.messages[result.messages.length - N_MESSAGES_TO_KEEP - 1]
expect(summaryMessage.role).toBe("assistant")
expect(summaryMessage.content).toBe("This is a summary")
expect(summaryMessage.isSummary).toBe(true)

// Check that the last N_MESSAGES_TO_KEEP messages are preserved
const lastMessages = messages.slice(-N_MESSAGES_TO_KEEP)
expect(result.slice(-N_MESSAGES_TO_KEEP)).toEqual(lastMessages)
expect(result.messages.slice(-N_MESSAGES_TO_KEEP)).toEqual(lastMessages)

// Check the cost and token counts
expect(result.cost).toBe(0.05)
expect(result.summary).toBe("This is a summary")
expect(result.newContextTokens).toBe(250) // 150 output tokens + 100 from countTokens
})

it("should handle empty summary response", async () => {
Expand All @@ -172,9 +184,10 @@ describe("summarizeConversation", () => {
const mockWarn = jest.fn()
console.warn = mockWarn

// Setup empty summary response
// Setup empty summary response with usage information
const emptyStream = (async function* () {
yield { type: "text" as const, text: "" }
yield { type: "usage" as const, totalCost: 0.02, outputTokens: 0 }
})()

// Create a new mock for createMessage that returns empty stream
Expand All @@ -189,7 +202,9 @@ describe("summarizeConversation", () => {
const result = await summarizeConversation(messages, mockApiHandler)

// Should return original messages when summary is empty
expect(result).toEqual(messages)
expect(result.messages).toEqual(messages)
expect(result.cost).toBe(0.02)
expect(result.summary).toBe("")
expect(mockWarn).toHaveBeenCalledWith("Received empty summary from API")

// Restore console.warn
Expand Down Expand Up @@ -225,4 +240,37 @@ describe("summarizeConversation", () => {
const mockCallArgs = (maybeRemoveImageBlocks as jest.Mock).mock.calls[0][0] as any[]
expect(mockCallArgs[mockCallArgs.length - 1]).toEqual(expectedFinalMessage)
})

it("should calculate newContextTokens correctly with systemPrompt", async () => {
const messages: ApiMessage[] = [
{ role: "user", content: "Hello", ts: 1 },
{ role: "assistant", content: "Hi there", ts: 2 },
{ role: "user", content: "How are you?", ts: 3 },
{ role: "assistant", content: "I'm good", ts: 4 },
{ role: "user", content: "What's new?", ts: 5 },
{ role: "assistant", content: "Not much", ts: 6 },
{ role: "user", content: "Tell me more", ts: 7 },
]

const systemPrompt = "You are a helpful assistant."

// Create a stream with usage information
const streamWithUsage = (async function* () {
yield { type: "text" as const, text: "This is a summary with system prompt" }
yield { type: "usage" as const, totalCost: 0.06, outputTokens: 200 }
})()

// Override the mock for this test
mockApiHandler.createMessage = jest.fn().mockReturnValue(streamWithUsage) as any

const result = await summarizeConversation(messages, mockApiHandler, systemPrompt)

// Verify that countTokens was called with the correct messages including system prompt
expect(mockApiHandler.countTokens).toHaveBeenCalled()

// Check the newContextTokens calculation includes system prompt
expect(result.newContextTokens).toBe(300) // 200 output tokens + 100 from countTokens
expect(result.cost).toBe(0.06)
expect(result.summary).toBe("This is a summary with system prompt")
})
})
48 changes: 38 additions & 10 deletions src/core/condense/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,23 +45,35 @@ Example summary structure:
Output only the summary of the conversation so far, without any additional commentary or explanation.
`

export type SummarizeResponse = {
messages: ApiMessage[] // The messages after summarization
summary: string // The summary text; empty string for no summary
cost: number // The cost of the summarization operation
newContextTokens?: number // The number of tokens in the context for the next API request
}

/**
* Summarizes the conversation messages using an LLM call
*
* @param {ApiMessage[]} messages - The conversation messages
* @param {ApiHandler} apiHandler - The API handler to use for token counting.
* @returns {ApiMessage[]} - The input messages, potentially including a new summary message before the last message.
* @returns {SummarizeResponse} - The result of the summarization operation (see above)
*/
export async function summarizeConversation(messages: ApiMessage[], apiHandler: ApiHandler): Promise<ApiMessage[]> {
export async function summarizeConversation(
messages: ApiMessage[],
apiHandler: ApiHandler,
systemPrompt?: string,
): Promise<SummarizeResponse> {
const response: SummarizeResponse = { messages, cost: 0, summary: "" }
const messagesToSummarize = getMessagesSinceLastSummary(messages.slice(0, -N_MESSAGES_TO_KEEP))
if (messagesToSummarize.length <= 1) {
return messages // Not enough messages to warrant a summary
return response // Not enough messages to warrant a summary
}
const keepMessages = messages.slice(-N_MESSAGES_TO_KEEP)
for (const message of keepMessages) {
if (message.isSummary) {
return messages // We recently summarized these messages; it's too soon to summarize again.
}
// Check if there's a recent summary in the messages we're keeping
const recentSummaryExists = keepMessages.some((message) => message.isSummary)
if (recentSummaryExists) {
return response // We recently summarized these messages; it's too soon to summarize again.
Comment on lines +73 to +76
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey @canrobins13,

Just a question about the truncateConversationIfNeeded function in sliding-window:

When autoCondenseContext is true and the token limit is hit, it tries to summarize. But if summarizeConversation decides not to create a summary (maybe because there aren't many new messages or there was a recent summary), it looks like truncateConversationIfNeeded still goes ahead and does the simple truncation.

I am aware this behavior was present before your changes, but would it make more sense to use the previous summary considering it might still be fresh rather than using the simple truncation?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When making an API request we only send messages starting from the latest summary if one exists, so the recent summary message would be "used" in the case you highlighted (when making API requests and when calculating the context window usage).
This case should be pretty hard to hit through the existing implementation, but will be more relevant because we're about to add a button for manually triggering context summarization through the UI.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense, if the condense feature becomes manual and the user clicks summarize multiple times, would this trigger the simple truncation? Or is it only triggered when the context is already bigger than the window?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not implemented yet, but we wouldn't trigger simple truncation or dropping of messages from the user button, that would only hit the new condense code

}
const finalRequestMessage: Anthropic.MessageParam = {
role: "user",
Expand All @@ -73,25 +85,41 @@ export async function summarizeConversation(messages: ApiMessage[], apiHandler:
// Note: this doesn't need to be a stream, consider using something like apiHandler.completePrompt
const stream = apiHandler.createMessage(SUMMARY_PROMPT, requestMessages)
let summary = ""
// TODO(canyon): compute usage and cost for this operation and update the global metrics.
let cost = 0
let outputTokens = 0
for await (const chunk of stream) {
if (chunk.type === "text") {
summary += chunk.text
} else if (chunk.type === "usage") {
// Record final usage chunk only
cost = chunk.totalCost ?? 0
outputTokens = chunk.outputTokens ?? 0
}
}
summary = summary.trim()
if (summary.length === 0) {
console.warn("Received empty summary from API")
return messages
return { ...response, cost }
}
const summaryMessage: ApiMessage = {
role: "assistant",
content: summary,
ts: keepMessages[0].ts,
isSummary: true,
}
const newMessages = [...messages.slice(0, -N_MESSAGES_TO_KEEP), summaryMessage, ...keepMessages]

return [...messages.slice(0, -N_MESSAGES_TO_KEEP), summaryMessage, ...keepMessages]
// Count the tokens in the context for the next API request
// We only estimate the tokens in summaryMesage if outputTokens is 0, otherwise we use outputTokens
const contextMessages = outputTokens ? [...keepMessages] : [summaryMessage, ...keepMessages]
if (systemPrompt) {
contextMessages.unshift({ role: "user", content: systemPrompt })
}
const contextBlocks = contextMessages.flatMap((message) =>
typeof message.content === "string" ? [{ text: message.content, type: "text" as const }] : message.content,
)
const newContextTokens = outputTokens + (await apiHandler.countTokens(contextBlocks))
return { messages: newMessages, summary, cost, newContextTokens }
}

/* Returns the list of all messages since the last summary message, including the summary. Returns all messages if there is no summary. */
Expand Down
Loading