diff --git a/README.md b/README.md index ca02121..a370b13 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ By default, your `.git` directory and your `.gitignore` files are ignored. Any f * `-o`, `--output`: Path to the output file. If not specified, will print to standard output. * `-e`, `--estimate`: Estimate the tokens of the output file. If not specified, does not estimate. * `-j`, `--json`: Output to JSON rather than plain text. Use with `-o` to specify the output file. +* `-x`, `--xml`: Output to XML rather than plain text. Use with `-o` to specify the output file. * `-i`, `--ignore`: Path to the `.gptignore` file. If not specified, will look for a `.gptignore` file in the same directory as the `.gitignore` file. * `-g`, `--ignore-gitignore`: Ignore the `.gitignore` file. * `-s`, `--scrub-comments`: Remove comments from the output file to save tokens. diff --git a/cmd/root.go b/cmd/root.go index 80cd3ae..5f1aeb6 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -15,6 +15,7 @@ var estimateTokens bool var ignoreFilePath string var ignoreGitignore bool var outputJSON bool +var outputXML bool var debug bool var scrubComments bool @@ -54,6 +55,37 @@ var rootCmd = &cobra.Command{ } return } + if outputXML { + output, err := prompt.OutputGitRepoXML(repo, scrubComments) + if err != nil { + fmt.Printf("Error: %s\n", err) + os.Exit(1) + } + + // Validate the XML output + if err := prompt.ValidateXML(output); err != nil { + fmt.Printf("Error: %s\n", err) + os.Exit(1) + } + + if outputFile != "" { + // if output file exists, throw error + if _, err := os.Stat(outputFile); err == nil { + fmt.Printf("Error: output file %s already exists\n", outputFile) + os.Exit(1) + } + err = os.WriteFile(outputFile, []byte(output), 0644) + if err != nil { + fmt.Printf("Error: could not write to output file %s\n", outputFile) + os.Exit(1) + } + } else { + if !debug { + fmt.Println(output) + } + } + return + } output, err := prompt.OutputGitRepo(repo, preambleFile, scrubComments) if err != nil { fmt.Printf("Error: %s\n", err) @@ -93,6 +125,8 @@ func init() { rootCmd.Flags().BoolVarP(&ignoreGitignore, "ignore-gitignore", "g", false, "ignore .gitignore file") // output JSON. Should be a bool rootCmd.Flags().BoolVarP(&outputJSON, "json", "j", false, "output JSON") + // output XML. Should be a bool + rootCmd.Flags().BoolVarP(&outputXML, "xml", "x", false, "output XML") // debug. Should be a bool rootCmd.Flags().BoolVarP(&debug, "debug", "d", false, "debug mode. Do not output to standard output") // scrub comments. Should be a bool diff --git a/prompt/prompt.go b/prompt/prompt.go index b1facb6..1c0a462 100644 --- a/prompt/prompt.go +++ b/prompt/prompt.go @@ -3,7 +3,9 @@ package prompt import ( "bufio" "encoding/json" + "encoding/xml" "fmt" + "io" "os" "path/filepath" "strings" @@ -16,16 +18,16 @@ import ( // GitFile is a file in a Git repository type GitFile struct { - Path string `json:"path"` // path to the file relative to the repository root - Tokens int64 `json:"tokens"` // number of tokens in the file - Contents string `json:"contents"` // contents of the file + Path string `json:"path" xml:"path"` // path to the file relative to the repository root + Tokens int64 `json:"tokens" xml:"tokens"` // number of tokens in the file + Contents string `json:"contents" xml:"contents"` // contents of the file } // GitRepo is a Git repository type GitRepo struct { - TotalTokens int64 `json:"total_tokens"` - Files []GitFile `json:"files"` - FileCount int `json:"file_count"` + TotalTokens int64 `json:"total_tokens" xml:"total_tokens"` + Files []GitFile `json:"files" xml:"files>file"` + FileCount int `json:"file_count" xml:"file_count"` } // contains checks if a string is in a slice of strings @@ -164,6 +166,80 @@ func OutputGitRepo(repo *GitRepo, preambleFile string, scrubComments bool) (stri return output, nil } +func OutputGitRepoXML(repo *GitRepo, scrubComments bool) (string, error) { + // Prepare XML content + if scrubComments { + for i, file := range repo.Files { + repo.Files[i].Contents = utils.RemoveCodeComments(file.Contents) + } + } + + // Add XML header + var result strings.Builder + result.WriteString("\n") + + // Use custom marshaling with proper CDATA for code contents + result.WriteString("\n") + + // Skip the tokens for now + result.WriteString(" PLACEHOLDER\n") + result.WriteString(fmt.Sprintf(" %d\n", repo.FileCount)) + result.WriteString(" \n") + + for _, file := range repo.Files { + result.WriteString(" \n") + result.WriteString(fmt.Sprintf(" %s\n", escapeXML(file.Path))) + result.WriteString(fmt.Sprintf(" %d\n", file.Tokens)) + result.WriteString(" \n") + result.WriteString(" \n") + } + + result.WriteString(" \n") + result.WriteString("") + + // Get the output string + outputStr := result.String() + + // Calculate tokens + tokenCount := EstimateTokens(outputStr) + repo.TotalTokens = tokenCount + + // Replace the placeholder with the actual token count + outputStr = strings.Replace(outputStr, "PLACEHOLDER", + fmt.Sprintf("%d", tokenCount), 1) + + return outputStr, nil +} + +// escapeXML escapes XML special characters in a string +func escapeXML(s string) string { + s = strings.ReplaceAll(s, "&", "&") + s = strings.ReplaceAll(s, "<", "<") + s = strings.ReplaceAll(s, ">", ">") + s = strings.ReplaceAll(s, "\"", """) + s = strings.ReplaceAll(s, "'", "'") + return s +} + +// ValidateXML checks if the given XML string is well-formed +func ValidateXML(xmlString string) error { + decoder := xml.NewDecoder(strings.NewReader(xmlString)) + for { + _, err := decoder.Token() + if err == io.EOF { + break + } + if err != nil { + return fmt.Errorf("XML validation error: %w", err) + } + } + return nil +} + + + func MarshalRepo(repo *GitRepo, scrubComments bool) ([]byte, error) { // run the output function to get the total tokens _, err := OutputGitRepo(repo, "", scrubComments)