Skip to content

Commit fef3a2d

Browse files
committed
fix: Properly handle CDATA sections in XML output
This commit fixes an issue where the XML export would fail with "unexpected EOF in CDATA section" errors when file content contained the CDATA end marker sequence ']]>'. The fix implements a proper CDATA handling strategy that: - Detects all occurrences of ']]>' in file content - Splits the content around these markers - Creates properly nested CDATA sections to preserve the original content - Ensures all XML output is well-formed regardless of source content This approach maintains the efficiency of CDATA for storing large code blocks while ensuring compatibility with all possible file content. Fixes the XML validation error that would occur when processing files containing CDATA end marker sequences.
1 parent bfac458 commit fef3a2d

File tree

2 files changed

+38
-16
lines changed

2 files changed

+38
-16
lines changed

chat.json

Lines changed: 0 additions & 1 deletion
This file was deleted.

prompt/prompt.go

Lines changed: 38 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -214,11 +214,12 @@ func OutputGitRepo(repo *GitRepo, preambleFile string, scrubComments bool) (stri
214214
return output, nil
215215
}
216216

217+
217218
func OutputGitRepoXML(repo *GitRepo, scrubComments bool) (string, error) {
218219
if scrubComments {
219-
for i, file := range repo.Files {
220-
repo.Files[i].Contents = utils.RemoveCodeComments(file.Contents)
221-
}
220+
for i, file := range repo.Files {
221+
repo.Files[i].Contents = utils.RemoveCodeComments(file.Contents)
222+
}
222223
}
223224
var result strings.Builder
224225
result.WriteString("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")
@@ -229,16 +230,38 @@ func OutputGitRepoXML(repo *GitRepo, scrubComments bool) (string, error) {
229230
result.WriteString(" <files>\n")
230231

231232
for _, file := range repo.Files {
232-
result.WriteString(" <file>\n")
233-
result.WriteString(fmt.Sprintf(" <path>%s</path>\n", escapeXML(file.Path)))
234-
result.WriteString(fmt.Sprintf(" <tokens>%d</tokens>\n", file.Tokens))
233+
result.WriteString(" <file>\n")
234+
result.WriteString(fmt.Sprintf(" <path>%s</path>\n", escapeXML(file.Path)))
235+
result.WriteString(fmt.Sprintf(" <tokens>%d</tokens>\n", file.Tokens))
236+
237+
// Split content around CDATA end marker (]]>) and create multiple CDATA sections
238+
contents := file.Contents
239+
result.WriteString(" <contents>")
240+
241+
for {
242+
idx := strings.Index(contents, "]]>")
243+
if idx == -1 {
244+
// No more CDATA end markers, write remaining content in one CDATA section
245+
result.WriteString("<![CDATA[")
246+
result.WriteString(contents)
247+
result.WriteString("]]>")
248+
break
249+
}
250+
251+
// Write content up to the CDATA end marker
252+
result.WriteString("<![CDATA[")
253+
result.WriteString(contents[:idx+2]) // Include the "]]" part
254+
result.WriteString("]]>") // Close this CDATA section
235255

236-
safeContents := strings.ReplaceAll(file.Contents, "]]]]><![CDATA[>", "]]]]]]><![CDATA[><![CDATA[>")
256+
// Start a new CDATA section with the ">" character
257+
result.WriteString("<![CDATA[>")
237258

238-
result.WriteString(" <contents><![CDATA[")
239-
result.WriteString(safeContents)
240-
result.WriteString("]]]]><![CDATA[></contents>\n")
241-
result.WriteString(" </file>\n")
259+
// Move past the "]]>" in the original content
260+
contents = contents[idx+3:]
261+
}
262+
263+
result.WriteString("</contents>\n")
264+
result.WriteString(" </file>\n")
242265
}
243266

244267
result.WriteString(" </files>\n")
@@ -250,10 +273,10 @@ func OutputGitRepoXML(repo *GitRepo, scrubComments bool) (string, error) {
250273
repo.TotalTokens = tokenCount
251274

252275
outputStr = strings.Replace(
253-
outputStr,
254-
"<total_tokens>PLACEHOLDER</total_tokens>",
255-
fmt.Sprintf("<total_tokens>%d</total_tokens>", tokenCount),
256-
1,
276+
outputStr,
277+
"<total_tokens>PLACEHOLDER</total_tokens>",
278+
fmt.Sprintf("<total_tokens>%d</total_tokens>", tokenCount),
279+
1,
257280
)
258281

259282
return outputStr, nil

0 commit comments

Comments
 (0)