diff --git a/README.md b/README.md
index ca02121..a370b13 100644
--- a/README.md
+++ b/README.md
@@ -34,6 +34,7 @@ By default, your `.git` directory and your `.gitignore` files are ignored. Any f
* `-o`, `--output`: Path to the output file. If not specified, will print to standard output.
* `-e`, `--estimate`: Estimate the tokens of the output file. If not specified, does not estimate.
* `-j`, `--json`: Output to JSON rather than plain text. Use with `-o` to specify the output file.
+* `-x`, `--xml`: Output to XML rather than plain text. Use with `-o` to specify the output file.
* `-i`, `--ignore`: Path to the `.gptignore` file. If not specified, will look for a `.gptignore` file in the same directory as the `.gitignore` file.
* `-g`, `--ignore-gitignore`: Ignore the `.gitignore` file.
* `-s`, `--scrub-comments`: Remove comments from the output file to save tokens.
diff --git a/cmd/root.go b/cmd/root.go
index 80cd3ae..5f1aeb6 100644
--- a/cmd/root.go
+++ b/cmd/root.go
@@ -15,6 +15,7 @@ var estimateTokens bool
var ignoreFilePath string
var ignoreGitignore bool
var outputJSON bool
+var outputXML bool
var debug bool
var scrubComments bool
@@ -54,6 +55,37 @@ var rootCmd = &cobra.Command{
}
return
}
+ if outputXML {
+ output, err := prompt.OutputGitRepoXML(repo, scrubComments)
+ if err != nil {
+ fmt.Printf("Error: %s\n", err)
+ os.Exit(1)
+ }
+
+ // Validate the XML output
+ if err := prompt.ValidateXML(output); err != nil {
+ fmt.Printf("Error: %s\n", err)
+ os.Exit(1)
+ }
+
+ if outputFile != "" {
+ // if output file exists, throw error
+ if _, err := os.Stat(outputFile); err == nil {
+ fmt.Printf("Error: output file %s already exists\n", outputFile)
+ os.Exit(1)
+ }
+ err = os.WriteFile(outputFile, []byte(output), 0644)
+ if err != nil {
+ fmt.Printf("Error: could not write to output file %s\n", outputFile)
+ os.Exit(1)
+ }
+ } else {
+ if !debug {
+ fmt.Println(output)
+ }
+ }
+ return
+ }
output, err := prompt.OutputGitRepo(repo, preambleFile, scrubComments)
if err != nil {
fmt.Printf("Error: %s\n", err)
@@ -93,6 +125,8 @@ func init() {
rootCmd.Flags().BoolVarP(&ignoreGitignore, "ignore-gitignore", "g", false, "ignore .gitignore file")
// output JSON. Should be a bool
rootCmd.Flags().BoolVarP(&outputJSON, "json", "j", false, "output JSON")
+ // output XML. Should be a bool
+ rootCmd.Flags().BoolVarP(&outputXML, "xml", "x", false, "output XML")
// debug. Should be a bool
rootCmd.Flags().BoolVarP(&debug, "debug", "d", false, "debug mode. Do not output to standard output")
// scrub comments. Should be a bool
diff --git a/prompt/prompt.go b/prompt/prompt.go
index b1facb6..1c0a462 100644
--- a/prompt/prompt.go
+++ b/prompt/prompt.go
@@ -3,7 +3,9 @@ package prompt
import (
"bufio"
"encoding/json"
+ "encoding/xml"
"fmt"
+ "io"
"os"
"path/filepath"
"strings"
@@ -16,16 +18,16 @@ import (
// GitFile is a file in a Git repository
type GitFile struct {
- Path string `json:"path"` // path to the file relative to the repository root
- Tokens int64 `json:"tokens"` // number of tokens in the file
- Contents string `json:"contents"` // contents of the file
+ Path string `json:"path" xml:"path"` // path to the file relative to the repository root
+ Tokens int64 `json:"tokens" xml:"tokens"` // number of tokens in the file
+ Contents string `json:"contents" xml:"contents"` // contents of the file
}
// GitRepo is a Git repository
type GitRepo struct {
- TotalTokens int64 `json:"total_tokens"`
- Files []GitFile `json:"files"`
- FileCount int `json:"file_count"`
+ TotalTokens int64 `json:"total_tokens" xml:"total_tokens"`
+ Files []GitFile `json:"files" xml:"files>file"`
+ FileCount int `json:"file_count" xml:"file_count"`
}
// contains checks if a string is in a slice of strings
@@ -164,6 +166,80 @@ func OutputGitRepo(repo *GitRepo, preambleFile string, scrubComments bool) (stri
return output, nil
}
+func OutputGitRepoXML(repo *GitRepo, scrubComments bool) (string, error) {
+ // Prepare XML content
+ if scrubComments {
+ for i, file := range repo.Files {
+ repo.Files[i].Contents = utils.RemoveCodeComments(file.Contents)
+ }
+ }
+
+ // Add XML header
+ var result strings.Builder
+ result.WriteString("\n")
+
+ // Use custom marshaling with proper CDATA for code contents
+ result.WriteString("\n")
+
+ // Skip the tokens for now
+ result.WriteString(" PLACEHOLDER\n")
+ result.WriteString(fmt.Sprintf(" %d\n", repo.FileCount))
+ result.WriteString(" \n")
+
+ for _, file := range repo.Files {
+ result.WriteString(" \n")
+ result.WriteString(fmt.Sprintf(" %s\n", escapeXML(file.Path)))
+ result.WriteString(fmt.Sprintf(" %d\n", file.Tokens))
+ result.WriteString(" \n")
+ result.WriteString(" \n")
+ }
+
+ result.WriteString(" \n")
+ result.WriteString("")
+
+ // Get the output string
+ outputStr := result.String()
+
+ // Calculate tokens
+ tokenCount := EstimateTokens(outputStr)
+ repo.TotalTokens = tokenCount
+
+ // Replace the placeholder with the actual token count
+ outputStr = strings.Replace(outputStr, "PLACEHOLDER",
+ fmt.Sprintf("%d", tokenCount), 1)
+
+ return outputStr, nil
+}
+
+// escapeXML escapes XML special characters in a string
+func escapeXML(s string) string {
+ s = strings.ReplaceAll(s, "&", "&")
+ s = strings.ReplaceAll(s, "<", "<")
+ s = strings.ReplaceAll(s, ">", ">")
+ s = strings.ReplaceAll(s, "\"", """)
+ s = strings.ReplaceAll(s, "'", "'")
+ return s
+}
+
+// ValidateXML checks if the given XML string is well-formed
+func ValidateXML(xmlString string) error {
+ decoder := xml.NewDecoder(strings.NewReader(xmlString))
+ for {
+ _, err := decoder.Token()
+ if err == io.EOF {
+ break
+ }
+ if err != nil {
+ return fmt.Errorf("XML validation error: %w", err)
+ }
+ }
+ return nil
+}
+
+
+
func MarshalRepo(repo *GitRepo, scrubComments bool) ([]byte, error) {
// run the output function to get the total tokens
_, err := OutputGitRepo(repo, "", scrubComments)