Skip to content

Commit c424f22

Browse files
authored
Add the option to output to XML (#11)
* Add the option to output to XML * feat: add XML validation and custom marshaling for GitRepo output
1 parent e4cbaba commit c424f22

File tree

3 files changed

+117
-6
lines changed

3 files changed

+117
-6
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ By default, your `.git` directory and your `.gitignore` files are ignored. Any f
3434
* `-o`, `--output`: Path to the output file. If not specified, will print to standard output.
3535
* `-e`, `--estimate`: Estimate the tokens of the output file. If not specified, does not estimate.
3636
* `-j`, `--json`: Output to JSON rather than plain text. Use with `-o` to specify the output file.
37+
* `-x`, `--xml`: Output to XML rather than plain text. Use with `-o` to specify the output file.
3738
* `-i`, `--ignore`: Path to the `.gptignore` file. If not specified, will look for a `.gptignore` file in the same directory as the `.gitignore` file.
3839
* `-g`, `--ignore-gitignore`: Ignore the `.gitignore` file.
3940
* `-s`, `--scrub-comments`: Remove comments from the output file to save tokens.

cmd/root.go

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ var estimateTokens bool
1515
var ignoreFilePath string
1616
var ignoreGitignore bool
1717
var outputJSON bool
18+
var outputXML bool
1819
var debug bool
1920
var scrubComments bool
2021

@@ -54,6 +55,37 @@ var rootCmd = &cobra.Command{
5455
}
5556
return
5657
}
58+
if outputXML {
59+
output, err := prompt.OutputGitRepoXML(repo, scrubComments)
60+
if err != nil {
61+
fmt.Printf("Error: %s\n", err)
62+
os.Exit(1)
63+
}
64+
65+
// Validate the XML output
66+
if err := prompt.ValidateXML(output); err != nil {
67+
fmt.Printf("Error: %s\n", err)
68+
os.Exit(1)
69+
}
70+
71+
if outputFile != "" {
72+
// if output file exists, throw error
73+
if _, err := os.Stat(outputFile); err == nil {
74+
fmt.Printf("Error: output file %s already exists\n", outputFile)
75+
os.Exit(1)
76+
}
77+
err = os.WriteFile(outputFile, []byte(output), 0644)
78+
if err != nil {
79+
fmt.Printf("Error: could not write to output file %s\n", outputFile)
80+
os.Exit(1)
81+
}
82+
} else {
83+
if !debug {
84+
fmt.Println(output)
85+
}
86+
}
87+
return
88+
}
5789
output, err := prompt.OutputGitRepo(repo, preambleFile, scrubComments)
5890
if err != nil {
5991
fmt.Printf("Error: %s\n", err)
@@ -93,6 +125,8 @@ func init() {
93125
rootCmd.Flags().BoolVarP(&ignoreGitignore, "ignore-gitignore", "g", false, "ignore .gitignore file")
94126
// output JSON. Should be a bool
95127
rootCmd.Flags().BoolVarP(&outputJSON, "json", "j", false, "output JSON")
128+
// output XML. Should be a bool
129+
rootCmd.Flags().BoolVarP(&outputXML, "xml", "x", false, "output XML")
96130
// debug. Should be a bool
97131
rootCmd.Flags().BoolVarP(&debug, "debug", "d", false, "debug mode. Do not output to standard output")
98132
// scrub comments. Should be a bool

prompt/prompt.go

Lines changed: 82 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@ package prompt
33
import (
44
"bufio"
55
"encoding/json"
6+
"encoding/xml"
67
"fmt"
8+
"io"
79
"os"
810
"path/filepath"
911
"strings"
@@ -16,16 +18,16 @@ import (
1618

1719
// GitFile is a file in a Git repository
1820
type GitFile struct {
19-
Path string `json:"path"` // path to the file relative to the repository root
20-
Tokens int64 `json:"tokens"` // number of tokens in the file
21-
Contents string `json:"contents"` // contents of the file
21+
Path string `json:"path" xml:"path"` // path to the file relative to the repository root
22+
Tokens int64 `json:"tokens" xml:"tokens"` // number of tokens in the file
23+
Contents string `json:"contents" xml:"contents"` // contents of the file
2224
}
2325

2426
// GitRepo is a Git repository
2527
type GitRepo struct {
26-
TotalTokens int64 `json:"total_tokens"`
27-
Files []GitFile `json:"files"`
28-
FileCount int `json:"file_count"`
28+
TotalTokens int64 `json:"total_tokens" xml:"total_tokens"`
29+
Files []GitFile `json:"files" xml:"files>file"`
30+
FileCount int `json:"file_count" xml:"file_count"`
2931
}
3032

3133
// contains checks if a string is in a slice of strings
@@ -164,6 +166,80 @@ func OutputGitRepo(repo *GitRepo, preambleFile string, scrubComments bool) (stri
164166
return output, nil
165167
}
166168

169+
func OutputGitRepoXML(repo *GitRepo, scrubComments bool) (string, error) {
170+
// Prepare XML content
171+
if scrubComments {
172+
for i, file := range repo.Files {
173+
repo.Files[i].Contents = utils.RemoveCodeComments(file.Contents)
174+
}
175+
}
176+
177+
// Add XML header
178+
var result strings.Builder
179+
result.WriteString("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")
180+
181+
// Use custom marshaling with proper CDATA for code contents
182+
result.WriteString("<GitRepo>\n")
183+
184+
// Skip the tokens for now
185+
result.WriteString(" <total_tokens>PLACEHOLDER</total_tokens>\n")
186+
result.WriteString(fmt.Sprintf(" <file_count>%d</file_count>\n", repo.FileCount))
187+
result.WriteString(" <files>\n")
188+
189+
for _, file := range repo.Files {
190+
result.WriteString(" <file>\n")
191+
result.WriteString(fmt.Sprintf(" <path>%s</path>\n", escapeXML(file.Path)))
192+
result.WriteString(fmt.Sprintf(" <tokens>%d</tokens>\n", file.Tokens))
193+
result.WriteString(" <contents><![CDATA[")
194+
result.WriteString(file.Contents)
195+
result.WriteString("]]></contents>\n")
196+
result.WriteString(" </file>\n")
197+
}
198+
199+
result.WriteString(" </files>\n")
200+
result.WriteString("</GitRepo>")
201+
202+
// Get the output string
203+
outputStr := result.String()
204+
205+
// Calculate tokens
206+
tokenCount := EstimateTokens(outputStr)
207+
repo.TotalTokens = tokenCount
208+
209+
// Replace the placeholder with the actual token count
210+
outputStr = strings.Replace(outputStr, "<total_tokens>PLACEHOLDER</total_tokens>",
211+
fmt.Sprintf("<total_tokens>%d</total_tokens>", tokenCount), 1)
212+
213+
return outputStr, nil
214+
}
215+
216+
// escapeXML escapes XML special characters in a string
217+
func escapeXML(s string) string {
218+
s = strings.ReplaceAll(s, "&", "&amp;")
219+
s = strings.ReplaceAll(s, "<", "&lt;")
220+
s = strings.ReplaceAll(s, ">", "&gt;")
221+
s = strings.ReplaceAll(s, "\"", "&quot;")
222+
s = strings.ReplaceAll(s, "'", "&apos;")
223+
return s
224+
}
225+
226+
// ValidateXML checks if the given XML string is well-formed
227+
func ValidateXML(xmlString string) error {
228+
decoder := xml.NewDecoder(strings.NewReader(xmlString))
229+
for {
230+
_, err := decoder.Token()
231+
if err == io.EOF {
232+
break
233+
}
234+
if err != nil {
235+
return fmt.Errorf("XML validation error: %w", err)
236+
}
237+
}
238+
return nil
239+
}
240+
241+
242+
167243
func MarshalRepo(repo *GitRepo, scrubComments bool) ([]byte, error) {
168244
// run the output function to get the total tokens
169245
_, err := OutputGitRepo(repo, "", scrubComments)

0 commit comments

Comments
 (0)