@@ -3,7 +3,9 @@ package prompt
33import (
44 "bufio"
55 "encoding/json"
6+ "encoding/xml"
67 "fmt"
8+ "io"
79 "os"
810 "path/filepath"
911 "strings"
@@ -16,16 +18,16 @@ import (
1618
1719// GitFile is a file in a Git repository
1820type GitFile struct {
19- Path string `json:"path"` // path to the file relative to the repository root
20- Tokens int64 `json:"tokens"` // number of tokens in the file
21- Contents string `json:"contents"` // contents of the file
21+ Path string `json:"path" xml:"path" ` // path to the file relative to the repository root
22+ Tokens int64 `json:"tokens" xml:"tokens" ` // number of tokens in the file
23+ Contents string `json:"contents" xml:"contents" ` // contents of the file
2224}
2325
2426// GitRepo is a Git repository
2527type GitRepo struct {
26- TotalTokens int64 `json:"total_tokens"`
27- Files []GitFile `json:"files"`
28- FileCount int `json:"file_count"`
28+ TotalTokens int64 `json:"total_tokens" xml:"total_tokens" `
29+ Files []GitFile `json:"files" xml:"files>file" `
30+ FileCount int `json:"file_count" xml:"file_count" `
2931}
3032
3133// contains checks if a string is in a slice of strings
@@ -164,6 +166,80 @@ func OutputGitRepo(repo *GitRepo, preambleFile string, scrubComments bool) (stri
164166 return output , nil
165167}
166168
169+ func OutputGitRepoXML (repo * GitRepo , scrubComments bool ) (string , error ) {
170+ // Prepare XML content
171+ if scrubComments {
172+ for i , file := range repo .Files {
173+ repo .Files [i ].Contents = utils .RemoveCodeComments (file .Contents )
174+ }
175+ }
176+
177+ // Add XML header
178+ var result strings.Builder
179+ result .WriteString ("<?xml version=\" 1.0\" encoding=\" UTF-8\" ?>\n " )
180+
181+ // Use custom marshaling with proper CDATA for code contents
182+ result .WriteString ("<GitRepo>\n " )
183+
184+ // Skip the tokens for now
185+ result .WriteString (" <total_tokens>PLACEHOLDER</total_tokens>\n " )
186+ result .WriteString (fmt .Sprintf (" <file_count>%d</file_count>\n " , repo .FileCount ))
187+ result .WriteString (" <files>\n " )
188+
189+ for _ , file := range repo .Files {
190+ result .WriteString (" <file>\n " )
191+ result .WriteString (fmt .Sprintf (" <path>%s</path>\n " , escapeXML (file .Path )))
192+ result .WriteString (fmt .Sprintf (" <tokens>%d</tokens>\n " , file .Tokens ))
193+ result .WriteString (" <contents><![CDATA[" )
194+ result .WriteString (file .Contents )
195+ result .WriteString ("]]></contents>\n " )
196+ result .WriteString (" </file>\n " )
197+ }
198+
199+ result .WriteString (" </files>\n " )
200+ result .WriteString ("</GitRepo>" )
201+
202+ // Get the output string
203+ outputStr := result .String ()
204+
205+ // Calculate tokens
206+ tokenCount := EstimateTokens (outputStr )
207+ repo .TotalTokens = tokenCount
208+
209+ // Replace the placeholder with the actual token count
210+ outputStr = strings .Replace (outputStr , "<total_tokens>PLACEHOLDER</total_tokens>" ,
211+ fmt .Sprintf ("<total_tokens>%d</total_tokens>" , tokenCount ), 1 )
212+
213+ return outputStr , nil
214+ }
215+
216+ // escapeXML escapes XML special characters in a string
217+ func escapeXML (s string ) string {
218+ s = strings .ReplaceAll (s , "&" , "&" )
219+ s = strings .ReplaceAll (s , "<" , "<" )
220+ s = strings .ReplaceAll (s , ">" , ">" )
221+ s = strings .ReplaceAll (s , "\" " , """ )
222+ s = strings .ReplaceAll (s , "'" , "'" )
223+ return s
224+ }
225+
226+ // ValidateXML checks if the given XML string is well-formed
227+ func ValidateXML (xmlString string ) error {
228+ decoder := xml .NewDecoder (strings .NewReader (xmlString ))
229+ for {
230+ _ , err := decoder .Token ()
231+ if err == io .EOF {
232+ break
233+ }
234+ if err != nil {
235+ return fmt .Errorf ("XML validation error: %w" , err )
236+ }
237+ }
238+ return nil
239+ }
240+
241+
242+
167243func MarshalRepo (repo * GitRepo , scrubComments bool ) ([]byte , error ) {
168244 // run the output function to get the total tokens
169245 _ , err := OutputGitRepo (repo , "" , scrubComments )
0 commit comments