Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gptinclude
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
prompt/
42 changes: 39 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,54 @@ To use the git2gpt utility, run the following command:
git2gpt [flags] /path/to/git/repository
```

### Ignoring Files
### Including and Ignoring Files

By default, your `.git` directory and your `.gitignore` files are ignored. Any files in your `.gitignore` are also skipped. If you want to change this behavior, you should add a `.gptignore` file to your repository. The `.gptignore` file should contain a list of files and directories to ignore, one per line. The `.gptignore` file should be in the same directory as your `.gitignore` file. Please note that this overwrites the default ignore list, so you should include the default ignore list in your `.gptignore` file if you want to keep it.
By default, your `.git` directory and your `.gitignore` files are ignored. Any files in your `.gitignore` are also skipped. You can customize the files to include or ignore in several ways:

### Flags
### Including Only Specific Files (.gptinclude)

Add a `.gptinclude` file to your repository to specify which files should be included in the output. Each line in the file should contain a glob pattern of files or directories to include. If a `.gptinclude` file is present, only files that match these patterns will be included.

Example `.gptinclude` file:
```
# Include only these file types
*.go
*.js
*.html
*.css

# Include specific directories
src/**
docs/api/**
```

### Ignoring Specific Files (.gptignore)

Add a `.gptignore` file to your repository to specify which files should be ignored. This works similar to `.gitignore`, but is specific to git2gpt. The `.gptignore` file should contain a list of files and directories to ignore, one per line.

Example `.gptignore` file:
```
# Ignore these file types
*.log
*.tmp
*.bak

# Ignore specific directories
node_modules/**
build/**
```

**Note**: When both `.gptinclude` and `.gptignore` files exist, git2gpt will first include files matching the `.gptinclude` patterns, and then exclude any of those files that also match `.gptignore` patterns.

## Command Line Options

* `-p`, `--preamble`: Path to a text file containing a preamble to include at the beginning of the output file.
* `-o`, `--output`: Path to the output file. If not specified, will print to standard output.
* `-e`, `--estimate`: Estimate the tokens of the output file. If not specified, does not estimate.
* `-j`, `--json`: Output to JSON rather than plain text. Use with `-o` to specify the output file.
* `-x`, `--xml`: Output to XML rather than plain text. Use with `-o` to specify the output file.
* `-i`, `--ignore`: Path to the `.gptignore` file. If not specified, will look for a `.gptignore` file in the same directory as the `.gitignore` file.
* `-I`, `--include`: Path to the `.gptinclude` file. If not specified, will look for a `.gptinclude` file in the repository root.
* `-g`, `--ignore-gitignore`: Ignore the `.gitignore` file.
* `-s`, `--scrub-comments`: Remove comments from the output file to save tokens.

Expand Down
36 changes: 5 additions & 31 deletions cmd/root.go
Original file line number Diff line number Diff line change
@@ -1,49 +1,40 @@
package cmd

import (
"fmt"
"os"

"github.com/chand1012/git2gpt/prompt"
"github.com/spf13/cobra"
)

var repoPath string
var preambleFile string
var outputFile string
var estimateTokens bool
var ignoreFilePath string
var includeFilePath string // New: Add variable for include file path
var ignoreGitignore bool
var outputJSON bool
var outputXML bool
var debug bool
var scrubComments bool

var rootCmd = &cobra.Command{
Use: "git2gpt [flags] /path/to/git/repository [/path/to/another/repository ...]",
Short: "git2gpt is a utility to convert one or more Git repositories to a text file for input into an LLM",
Args: cobra.MinimumNArgs(1),
Run: func(cmd *cobra.Command, args []string) {
// Create a combined repository to hold all files
combinedRepo := &prompt.GitRepo{
Files: []prompt.GitFile{},
}

// Process each repository path
for _, path := range args {
repoPath = path
ignoreList := prompt.GenerateIgnoreList(repoPath, ignoreFilePath, !ignoreGitignore)
repo, err := prompt.ProcessGitRepo(repoPath, ignoreList)
includeList := prompt.GenerateIncludeList(repoPath, includeFilePath) // New: Generate include list
repo, err := prompt.ProcessGitRepo(repoPath, includeList, ignoreList) // Modified: Pass includeList
if err != nil {
fmt.Printf("Error processing %s: %s\n", repoPath, err)
os.Exit(1)
}

// Add files from this repo to the combined repo
combinedRepo.Files = append(combinedRepo.Files, repo.Files...)
}

// Update the file count
combinedRepo.FileCount = len(combinedRepo.Files)
if outputJSON {
output, err := prompt.MarshalRepo(combinedRepo, scrubComments)
Expand All @@ -52,7 +43,6 @@ var rootCmd = &cobra.Command{
os.Exit(1)
}
if outputFile != "" {
// if output file exists, throw error
if _, err := os.Stat(outputFile); err == nil {
fmt.Printf("Error: output file %s already exists\n", outputFile)
os.Exit(1)
Expand All @@ -75,15 +65,11 @@ var rootCmd = &cobra.Command{
fmt.Printf("Error: %s\n", err)
os.Exit(1)
}

// Validate the XML output
if err := prompt.ValidateXML(output); err != nil {
fmt.Printf("Error: %s\n", err)
os.Exit(1)
}

if outputFile != "" {
// if output file exists, throw error
if _, err := os.Stat(outputFile); err == nil {
fmt.Printf("Error: output file %s already exists\n", outputFile)
os.Exit(1)
Expand All @@ -106,7 +92,6 @@ var rootCmd = &cobra.Command{
os.Exit(1)
}
if outputFile != "" {
// if output file exists, throw error
if _, err := os.Stat(outputFile); err == nil {
fmt.Printf("Error: output file %s already exists\n", outputFile)
os.Exit(1)
Expand All @@ -126,33 +111,22 @@ var rootCmd = &cobra.Command{
}
},
}

func init() {
rootCmd.Flags().StringVarP(&preambleFile, "preamble", "p", "", "path to preamble text file")
// output to file flag. Should be a string
rootCmd.Flags().StringVarP(&outputFile, "output", "o", "", "path to output file")
// estimate tokens. Should be a bool
rootCmd.Flags().BoolVarP(&estimateTokens, "estimate", "e", false, "estimate the number of tokens in the output")
// ignore file path. Should be a string
rootCmd.Flags().StringVarP(&ignoreFilePath, "ignore", "i", "", "path to .gptignore file")
// ignore gitignore. Should be a bool
rootCmd.Flags().StringVarP(&includeFilePath, "include", "I", "", "path to .gptinclude file") // New: Add flag for include file
rootCmd.Flags().BoolVarP(&ignoreGitignore, "ignore-gitignore", "g", false, "ignore .gitignore file")
// output JSON. Should be a bool
rootCmd.Flags().BoolVarP(&outputJSON, "json", "j", false, "output JSON")
// output XML. Should be a bool
rootCmd.Flags().BoolVarP(&outputXML, "xml", "x", false, "output XML")
// debug. Should be a bool
rootCmd.Flags().BoolVarP(&debug, "debug", "d", false, "debug mode. Do not output to standard output")
// scrub comments. Should be a bool
rootCmd.Flags().BoolVarP(&scrubComments, "scrub-comments", "s", false, "scrub comments from the output. Decreases token count")

// Update the example usage to show multiple paths
rootCmd.Example = " git2gpt /path/to/repo1 /path/to/repo2\n git2gpt -o output.txt /path/to/repo1 /path/to/repo2"
}

func Execute() {
if err := rootCmd.Execute(); err != nil {
fmt.Println(err)
os.Exit(1)
}
}
}
140 changes: 140 additions & 0 deletions prompt/gptinclude_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
package prompt

import (
"os"
"path/filepath"
"testing"
)

func TestGptIncludeAndIgnore(t *testing.T) {
// Create a temporary directory structure for testing
tempDir, err := os.MkdirTemp("", "git2gpt-test")
if err != nil {
t.Fatalf("Failed to create temp directory: %v", err)
}
defer os.RemoveAll(tempDir)

// Create test files
testFiles := []struct {
path string
contents string
}{
{"file1.txt", "Content of file1"},
{"file2.txt", "Content of file2"},
{"file3.txt", "Content of file3"},
{"src/main.go", "package main\nfunc main() {}"},
{"src/lib/util.go", "package lib\nfunc Util() {}"},
{"docs/README.md", "# Documentation"},
}

for _, tf := range testFiles {
fullPath := filepath.Join(tempDir, tf.path)
// Create directory if it doesn't exist
dir := filepath.Dir(fullPath)
if err := os.MkdirAll(dir, 0755); err != nil {
t.Fatalf("Failed to create directory %s: %v", dir, err)
}
// Write the file
if err := os.WriteFile(fullPath, []byte(tf.contents), 0644); err != nil {
t.Fatalf("Failed to write file %s: %v", fullPath, err)
}
}

// Test cases
testCases := []struct {
name string
includeContent string
ignoreContent string
expectedFiles []string
unexpectedFiles []string
}{
{
name: "Only include src directory",
includeContent: "src/**",
ignoreContent: "",
expectedFiles: []string{"src/main.go", "src/lib/util.go"},
unexpectedFiles: []string{"file1.txt", "file2.txt", "file3.txt", "docs/README.md"},
},
{
name: "Include all, but ignore .txt files",
includeContent: "**",
ignoreContent: "*.txt",
expectedFiles: []string{"src/main.go", "src/lib/util.go", "docs/README.md"},
unexpectedFiles: []string{"file1.txt", "file2.txt", "file3.txt"},
},
{
name: "Include src and docs, but ignore lib directory",
includeContent: "src/**\ndocs/**",
ignoreContent: "src/lib/**",
expectedFiles: []string{"src/main.go", "docs/README.md"},
unexpectedFiles: []string{"file1.txt", "file2.txt", "file3.txt", "src/lib/util.go"},
},
{
name: "No include file (should include all), ignore .txt files",
includeContent: "",
ignoreContent: "*.txt",
expectedFiles: []string{"src/main.go", "src/lib/util.go", "docs/README.md"},
unexpectedFiles: []string{"file1.txt", "file2.txt", "file3.txt"},
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
// Create .gptinclude file if needed
includeFilePath := filepath.Join(tempDir, ".gptinclude")
if tc.includeContent != "" {
if err := os.WriteFile(includeFilePath, []byte(tc.includeContent), 0644); err != nil {
t.Fatalf("Failed to write .gptinclude file: %v", err)
}
} else {
// Ensure no .gptinclude file exists
os.Remove(includeFilePath)
}

// Create .gptignore file if needed
ignoreFilePath := filepath.Join(tempDir, ".gptignore")
if tc.ignoreContent != "" {
if err := os.WriteFile(ignoreFilePath, []byte(tc.ignoreContent), 0644); err != nil {
t.Fatalf("Failed to write .gptignore file: %v", err)
}
} else {
// Ensure no .gptignore file exists
os.Remove(ignoreFilePath)
}

// Generate include and ignore lists
includeList := GenerateIncludeList(tempDir, "")
ignoreList := GenerateIgnoreList(tempDir, "", false)

// Process the repository
repo, err := ProcessGitRepo(tempDir, includeList, ignoreList)
if err != nil {
t.Fatalf("Failed to process repository: %v", err)
}

// Check if expected files are included
for _, expectedFile := range tc.expectedFiles {
found := false
for _, file := range repo.Files {
if file.Path == expectedFile {
found = true
break
}
}
if !found {
t.Errorf("Expected file %s to be included, but it wasn't", expectedFile)
}
}

// Check if unexpected files are excluded
for _, unexpectedFile := range tc.unexpectedFiles {
for _, file := range repo.Files {
if file.Path == unexpectedFile {
t.Errorf("File %s should have been excluded, but it was included", unexpectedFile)
break
}
}
}
})
}
}
Loading