Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ By default, your `.git` directory and your `.gitignore` files are ignored. Any f
* `-o`, `--output`: Path to the output file. If not specified, will print to standard output.
* `-e`, `--estimate`: Estimate the tokens of the output file. If not specified, does not estimate.
* `-j`, `--json`: Output to JSON rather than plain text. Use with `-o` to specify the output file.
* `-x`, `--xml`: Output to XML rather than plain text. Use with `-o` to specify the output file.
* `-i`, `--ignore`: Path to the `.gptignore` file. If not specified, will look for a `.gptignore` file in the same directory as the `.gitignore` file.
* `-g`, `--ignore-gitignore`: Ignore the `.gitignore` file.
* `-s`, `--scrub-comments`: Remove comments from the output file to save tokens.
Expand Down
34 changes: 34 additions & 0 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ var estimateTokens bool
var ignoreFilePath string
var ignoreGitignore bool
var outputJSON bool
var outputXML bool
var debug bool
var scrubComments bool

Expand Down Expand Up @@ -54,6 +55,37 @@ var rootCmd = &cobra.Command{
}
return
}
if outputXML {
output, err := prompt.OutputGitRepoXML(repo, scrubComments)
if err != nil {
fmt.Printf("Error: %s\n", err)
os.Exit(1)
}

// Validate the XML output
if err := prompt.ValidateXML(output); err != nil {
fmt.Printf("Error: %s\n", err)
os.Exit(1)
}

if outputFile != "" {
// if output file exists, throw error
if _, err := os.Stat(outputFile); err == nil {
fmt.Printf("Error: output file %s already exists\n", outputFile)
os.Exit(1)
}
err = os.WriteFile(outputFile, []byte(output), 0644)
if err != nil {
fmt.Printf("Error: could not write to output file %s\n", outputFile)
os.Exit(1)
}
} else {
if !debug {
fmt.Println(output)
}
}
return
}
output, err := prompt.OutputGitRepo(repo, preambleFile, scrubComments)
if err != nil {
fmt.Printf("Error: %s\n", err)
Expand Down Expand Up @@ -93,6 +125,8 @@ func init() {
rootCmd.Flags().BoolVarP(&ignoreGitignore, "ignore-gitignore", "g", false, "ignore .gitignore file")
// output JSON. Should be a bool
rootCmd.Flags().BoolVarP(&outputJSON, "json", "j", false, "output JSON")
// output XML. Should be a bool
rootCmd.Flags().BoolVarP(&outputXML, "xml", "x", false, "output XML")
// debug. Should be a bool
rootCmd.Flags().BoolVarP(&debug, "debug", "d", false, "debug mode. Do not output to standard output")
// scrub comments. Should be a bool
Expand Down
88 changes: 82 additions & 6 deletions prompt/prompt.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ package prompt
import (
"bufio"
"encoding/json"
"encoding/xml"
"fmt"
"io"
"os"
"path/filepath"
"strings"
Expand All @@ -16,16 +18,16 @@ import (

// GitFile is a file in a Git repository
type GitFile struct {
Path string `json:"path"` // path to the file relative to the repository root
Tokens int64 `json:"tokens"` // number of tokens in the file
Contents string `json:"contents"` // contents of the file
Path string `json:"path" xml:"path"` // path to the file relative to the repository root
Tokens int64 `json:"tokens" xml:"tokens"` // number of tokens in the file
Contents string `json:"contents" xml:"contents"` // contents of the file
}

// GitRepo is a Git repository
type GitRepo struct {
TotalTokens int64 `json:"total_tokens"`
Files []GitFile `json:"files"`
FileCount int `json:"file_count"`
TotalTokens int64 `json:"total_tokens" xml:"total_tokens"`
Files []GitFile `json:"files" xml:"files>file"`
FileCount int `json:"file_count" xml:"file_count"`
}

// contains checks if a string is in a slice of strings
Expand Down Expand Up @@ -164,6 +166,80 @@ func OutputGitRepo(repo *GitRepo, preambleFile string, scrubComments bool) (stri
return output, nil
}

func OutputGitRepoXML(repo *GitRepo, scrubComments bool) (string, error) {
// Prepare XML content
if scrubComments {
for i, file := range repo.Files {
repo.Files[i].Contents = utils.RemoveCodeComments(file.Contents)
}
}

// Add XML header
var result strings.Builder
result.WriteString("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")

// Use custom marshaling with proper CDATA for code contents
result.WriteString("<GitRepo>\n")

// Skip the tokens for now
result.WriteString(" <total_tokens>PLACEHOLDER</total_tokens>\n")
result.WriteString(fmt.Sprintf(" <file_count>%d</file_count>\n", repo.FileCount))
result.WriteString(" <files>\n")

for _, file := range repo.Files {
result.WriteString(" <file>\n")
result.WriteString(fmt.Sprintf(" <path>%s</path>\n", escapeXML(file.Path)))
result.WriteString(fmt.Sprintf(" <tokens>%d</tokens>\n", file.Tokens))
result.WriteString(" <contents><![CDATA[")
result.WriteString(file.Contents)
result.WriteString("]]></contents>\n")
result.WriteString(" </file>\n")
}

result.WriteString(" </files>\n")
result.WriteString("</GitRepo>")

// Get the output string
outputStr := result.String()

// Calculate tokens
tokenCount := EstimateTokens(outputStr)
repo.TotalTokens = tokenCount

// Replace the placeholder with the actual token count
outputStr = strings.Replace(outputStr, "<total_tokens>PLACEHOLDER</total_tokens>",
fmt.Sprintf("<total_tokens>%d</total_tokens>", tokenCount), 1)

return outputStr, nil
}

// escapeXML escapes XML special characters in a string
func escapeXML(s string) string {
s = strings.ReplaceAll(s, "&", "&amp;")
s = strings.ReplaceAll(s, "<", "&lt;")
s = strings.ReplaceAll(s, ">", "&gt;")
s = strings.ReplaceAll(s, "\"", "&quot;")
s = strings.ReplaceAll(s, "'", "&apos;")
return s
}

// ValidateXML checks if the given XML string is well-formed
func ValidateXML(xmlString string) error {
decoder := xml.NewDecoder(strings.NewReader(xmlString))
for {
_, err := decoder.Token()
if err == io.EOF {
break
}
if err != nil {
return fmt.Errorf("XML validation error: %w", err)
}
}
return nil
}



func MarshalRepo(repo *GitRepo, scrubComments bool) ([]byte, error) {
// run the output function to get the total tokens
_, err := OutputGitRepo(repo, "", scrubComments)
Expand Down