+{"total_tokens":3557,"files":[{"path":"prompt/gptinclude_test.go","tokens":1181,"contents":"package prompt\n\nimport (\n\t\"os\"\n\t\"path/filepath\"\n\t\"testing\"\n)\n\nfunc TestGptIncludeAndIgnore(t *testing.T) {\n\t// Create a temporary directory structure for testing\n\ttempDir, err := os.MkdirTemp(\"\", \"git2gpt-test\")\n\tif err != nil {\n\t\tt.Fatalf(\"Failed to create temp directory: %v\", err)\n\t}\n\tdefer os.RemoveAll(tempDir)\n\n\t// Create test files\n\ttestFiles := []struct {\n\t\tpath string\n\t\tcontents string\n\t}{\n\t\t{\"file1.txt\", \"Content of file1\"},\n\t\t{\"file2.txt\", \"Content of file2\"},\n\t\t{\"file3.txt\", \"Content of file3\"},\n\t\t{\"src/main.go\", \"package main\\nfunc main() {}\"},\n\t\t{\"src/lib/util.go\", \"package lib\\nfunc Util() {}\"},\n\t\t{\"docs/README.md\", \"# Documentation\"},\n\t}\n\n\tfor _, tf := range testFiles {\n\t\tfullPath := filepath.Join(tempDir, tf.path)\n\t\t// Create directory if it doesn't exist\n\t\tdir := filepath.Dir(fullPath)\n\t\tif err := os.MkdirAll(dir, 0755); err != nil {\n\t\t\tt.Fatalf(\"Failed to create directory %s: %v\", dir, err)\n\t\t}\n\t\t// Write the file\n\t\tif err := os.WriteFile(fullPath, []byte(tf.contents), 0644); err != nil {\n\t\t\tt.Fatalf(\"Failed to write file %s: %v\", fullPath, err)\n\t\t}\n\t}\n\n\t// Test cases\n\ttestCases := []struct {\n\t\tname string\n\t\tincludeContent string\n\t\tignoreContent string\n\t\texpectedFiles []string\n\t\tunexpectedFiles []string\n\t}{\n\t\t{\n\t\t\tname: \"Only include src directory\",\n\t\t\tincludeContent: \"src/**\",\n\t\t\tignoreContent: \"\",\n\t\t\texpectedFiles: []string{\"src/main.go\", \"src/lib/util.go\"},\n\t\t\tunexpectedFiles: []string{\"file1.txt\", \"file2.txt\", \"file3.txt\", \"docs/README.md\"},\n\t\t},\n\t\t{\n\t\t\tname: \"Include all, but ignore .txt files\",\n\t\t\tincludeContent: \"**\",\n\t\t\tignoreContent: \"*.txt\",\n\t\t\texpectedFiles: []string{\"src/main.go\", \"src/lib/util.go\", \"docs/README.md\"},\n\t\t\tunexpectedFiles: []string{\"file1.txt\", \"file2.txt\", \"file3.txt\"},\n\t\t},\n\t\t{\n\t\t\tname: \"Include src and docs, but ignore lib directory\",\n\t\t\tincludeContent: \"src/**\\ndocs/**\",\n\t\t\tignoreContent: \"src/lib/**\",\n\t\t\texpectedFiles: []string{\"src/main.go\", \"docs/README.md\"},\n\t\t\tunexpectedFiles: []string{\"file1.txt\", \"file2.txt\", \"file3.txt\", \"src/lib/util.go\"},\n\t\t},\n\t\t{\n\t\t\tname: \"No include file (should include all), ignore .txt files\",\n\t\t\tincludeContent: \"\",\n\t\t\tignoreContent: \"*.txt\",\n\t\t\texpectedFiles: []string{\"src/main.go\", \"src/lib/util.go\", \"docs/README.md\"},\n\t\t\tunexpectedFiles: []string{\"file1.txt\", \"file2.txt\", \"file3.txt\"},\n\t\t},\n\t}\n\n\tfor _, tc := range testCases {\n\t\tt.Run(tc.name, func(t *testing.T) {\n\t\t\t// Create .gptinclude file if needed\n\t\t\tincludeFilePath := filepath.Join(tempDir, \".gptinclude\")\n\t\t\tif tc.includeContent != \"\" {\n\t\t\t\tif err := os.WriteFile(includeFilePath, []byte(tc.includeContent), 0644); err != nil {\n\t\t\t\t\tt.Fatalf(\"Failed to write .gptinclude file: %v\", err)\n\t\t\t\t}\n\t\t\t} else {\n\t\t\t\t// Ensure no .gptinclude file exists\n\t\t\t\tos.Remove(includeFilePath)\n\t\t\t}\n\n\t\t\t// Create .gptignore file if needed\n\t\t\tignoreFilePath := filepath.Join(tempDir, \".gptignore\")\n\t\t\tif tc.ignoreContent != \"\" {\n\t\t\t\tif err := os.WriteFile(ignoreFilePath, []byte(tc.ignoreContent), 0644); err != nil {\n\t\t\t\t\tt.Fatalf(\"Failed to write .gptignore file: %v\", err)\n\t\t\t\t}\n\t\t\t} else {\n\t\t\t\t// Ensure no .gptignore file exists\n\t\t\t\tos.Remove(ignoreFilePath)\n\t\t\t}\n\n\t\t\t// Generate include and ignore lists\n\t\t\tincludeList := GenerateIncludeList(tempDir, \"\")\n\t\t\tignoreList := GenerateIgnoreList(tempDir, \"\", false)\n\n\t\t\t// Process the repository\n\t\t\trepo, err := ProcessGitRepo(tempDir, includeList, ignoreList)\n\t\t\tif err != nil {\n\t\t\t\tt.Fatalf(\"Failed to process repository: %v\", err)\n\t\t\t}\n\n\t\t\t// Check if expected files are included\n\t\t\tfor _, expectedFile := range tc.expectedFiles {\n\t\t\t\tfound := false\n\t\t\t\tfor _, file := range repo.Files {\n\t\t\t\t\tif file.Path == expectedFile {\n\t\t\t\t\t\tfound = true\n\t\t\t\t\t\tbreak\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t\tif !found {\n\t\t\t\t\tt.Errorf(\"Expected file %s to be included, but it wasn't\", expectedFile)\n\t\t\t\t}\n\t\t\t}\n\n\t\t\t// Check if unexpected files are excluded\n\t\t\tfor _, unexpectedFile := range tc.unexpectedFiles {\n\t\t\t\tfor _, file := range repo.Files {\n\t\t\t\t\tif file.Path == unexpectedFile {\n\t\t\t\t\t\tt.Errorf(\"File %s should have been excluded, but it was included\", unexpectedFile)\n\t\t\t\t\t\tbreak\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\t\t})\n\t}\n}"},{"path":"prompt/prompt.go","tokens":2521,"contents":"package prompt\n\nimport (\n\t\"bufio\"\n\t\"encoding/json\"\n\t\"encoding/xml\"\n\t\"fmt\"\n\t\"io\"\n\t\"os\"\n\t\"path/filepath\"\n\t\"strings\"\n\t\"unicode/utf8\"\n\t\"github.com/chand1012/git2gpt/utils\"\n\t\"github.com/gobwas/glob\"\n\t\"github.com/pkoukk/tiktoken-go\"\n)\n\ntype GitFile struct {\n\tPath string `json:\"path\" xml:\"path\"` // path to the file relative to the repository root\n\tTokens int64 `json:\"tokens\" xml:\"tokens\"` // number of tokens in the file\n\tContents string `json:\"contents\" xml:\"contents\"` // contents of the file\n}\n\ntype GitRepo struct {\n\tTotalTokens int64 `json:\"total_tokens\" xml:\"total_tokens\"`\n\tFiles []GitFile `json:\"files\" xml:\"files\u003efile\"`\n\tFileCount int `json:\"file_count\" xml:\"file_count\"`\n}\n\nfunc contains(s []string, e string) bool {\n\tfor _, a := range s {\n\t\tif a == e {\n\t\t\treturn true\n\t\t}\n\t}\n\treturn false\n}\n\nfunc getIgnoreList(ignoreFilePath string) ([]string, error) {\n\tvar ignoreList []string\n\tfile, err := os.Open(ignoreFilePath)\n\tif err != nil {\n\t\treturn ignoreList, err\n\t}\n\tdefer file.Close()\n\tscanner := bufio.NewScanner(file)\n\tfor scanner.Scan() {\n\t\tline := strings.TrimSpace(scanner.Text())\n\t\tif line == \"\" || strings.HasPrefix(line, \"#\") {\n\t\t\tcontinue\n\t\t}\n\t\tif strings.HasSuffix(line, \"/\") {\n\t\t\tline = line + \"**\"\n\t\t}\n\t\tline = strings.TrimPrefix(line, \"/\")\n\t\tignoreList = append(ignoreList, line)\n\t}\n\treturn ignoreList, scanner.Err()\n}\n\n// Similar to getIgnoreList, but for .gptinclude files\nfunc getIncludeList(includeFilePath string) ([]string, error) {\n\tvar includeList []string\n\tfile, err := os.Open(includeFilePath)\n\tif err != nil {\n\t\treturn includeList, err\n\t}\n\tdefer file.Close()\n\tscanner := bufio.NewScanner(file)\n\tfor scanner.Scan() {\n\t\tline := strings.TrimSpace(scanner.Text())\n\t\tif line == \"\" || strings.HasPrefix(line, \"#\") {\n\t\t\tcontinue\n\t\t}\n\t\tif strings.HasSuffix(line, \"/\") {\n\t\t\tline = line + \"**\"\n\t\t}\n\t\tline = strings.TrimPrefix(line, \"/\")\n\t\tincludeList = append(includeList, line)\n\t}\n\treturn includeList, scanner.Err()\n}\n\nfunc windowsToUnixPath(windowsPath string) string {\n\tunixPath := strings.ReplaceAll(windowsPath, \"\\\\\", \"/\")\n\treturn unixPath\n}\n\n// This function is kept for backward compatibility\nfunc shouldIgnore(filePath string, ignoreList []string) bool {\n\tfor _, pattern := range ignoreList {\n\t\tg := glob.MustCompile(pattern, '/')\n\t\tif g.Match(windowsToUnixPath(filePath)) {\n\t\t\treturn true\n\t\t}\n\t}\n\treturn false\n}\n\n// Determines if a file should be included in the output\n// First checks if the file matches the include list (if provided)\n// Then checks if the file is excluded by the ignore list\nfunc shouldProcess(filePath string, includeList, ignoreList []string) bool {\n\t// If includeList is provided, check if the file is included\n\tif len(includeList) \u003e 0 {\n\t\tincluded := false\n\t\tfor _, pattern := range includeList {\n\t\t\tg := glob.MustCompile(pattern, '/')\n\t\t\tif g.Match(windowsToUnixPath(filePath)) {\n\t\t\t\tincluded = true\n\t\t\t\tbreak\n\t\t\t}\n\t\t}\n\t\tif !included {\n\t\t\treturn false // If not in the include list, skip it\n\t\t}\n\t}\n\t\n\t// Check if the file is excluded by ignoreList\n\tfor _, pattern := range ignoreList {\n\t\tg := glob.MustCompile(pattern, '/')\n\t\tif g.Match(windowsToUnixPath(filePath)) {\n\t\t\treturn false // If in the ignore list, skip it\n\t\t}\n\t}\n\t\n\treturn true // Process this file\n}\n\nfunc GenerateIgnoreList(repoPath, ignoreFilePath string, useGitignore bool) []string {\n\tif ignoreFilePath == \"\" {\n\t\tignoreFilePath = filepath.Join(repoPath, \".gptignore\")\n\t}\n\tvar ignoreList []string\n\tif _, err := os.Stat(ignoreFilePath); err == nil {\n\t\tignoreList, _ = getIgnoreList(ignoreFilePath)\n\t}\n\tignoreList = append(ignoreList, \".git/**\", \".gitignore\", \".gptignore\", \".gptinclude\")\n\tif useGitignore {\n\t\tgitignorePath := filepath.Join(repoPath, \".gitignore\")\n\t\tif _, err := os.Stat(gitignorePath); err == nil {\n\t\t\tgitignoreList, _ := getIgnoreList(gitignorePath)\n\t\t\tignoreList = append(ignoreList, gitignoreList...)\n\t\t}\n\t}\n\tvar finalIgnoreList []string\n\tfor _, pattern := range ignoreList {\n\t\tif !contains(finalIgnoreList, pattern) {\n\t\t\tinfo, err := os.Stat(filepath.Join(repoPath, pattern))\n\t\t\tif err == nil \u0026\u0026 info.IsDir() {\n\t\t\t\tpattern = filepath.Join(pattern, \"**\")\n\t\t\t}\n\t\t\tfinalIgnoreList = append(finalIgnoreList, pattern)\n\t\t}\n\t}\n\treturn finalIgnoreList\n}\n\n// Generate include list from .gptinclude file\nfunc GenerateIncludeList(repoPath, includeFilePath string) []string {\n\tif includeFilePath == \"\" {\n\t\tincludeFilePath = filepath.Join(repoPath, \".gptinclude\")\n\t}\n\tvar includeList []string\n\tif _, err := os.Stat(includeFilePath); err == nil {\n\t\tincludeList, _ = getIncludeList(includeFilePath)\n\t}\n\t\n\tvar finalIncludeList []string\n\tfor _, pattern := range includeList {\n\t\tif !contains(finalIncludeList, pattern) {\n\t\t\tinfo, err := os.Stat(filepath.Join(repoPath, pattern))\n\t\t\tif err == nil \u0026\u0026 info.IsDir() {\n\t\t\t\tpattern = filepath.Join(pattern, \"**\")\n\t\t\t}\n\t\t\tfinalIncludeList = append(finalIncludeList, pattern)\n\t\t}\n\t}\n\treturn finalIncludeList\n}\n\n// Update the function signature to accept includeList\nfunc ProcessGitRepo(repoPath string, includeList, ignoreList []string) (*GitRepo, error) {\n\tvar repo GitRepo\n\terr := processRepository(repoPath, includeList, ignoreList, \u0026repo)\n\tif err != nil {\n\t\treturn nil, fmt.Errorf(\"error processing repository: %w\", err)\n\t}\n\treturn \u0026repo, nil\n}\n\nfunc OutputGitRepo(repo *GitRepo, preambleFile string, scrubComments bool) (string, error) {\n\tvar repoBuilder strings.Builder\n\tif preambleFile != \"\" {\n\t\tpreambleText, err := os.ReadFile(preambleFile)\n\t\tif err != nil {\n\t\t\treturn \"\", fmt.Errorf(\"error reading preamble file: %w\", err)\n\t\t}\n\t\trepoBuilder.WriteString(fmt.Sprintf(\"%s\\n\", string(preambleText)))\n\t} else {\n\t\trepoBuilder.WriteString(\"The following text is a Git repository with code. The structure of the text are sections that begin with ----, followed by a single line containing the file path and file name, followed by a variable amount of lines containing the file contents. The text representing the Git repository ends when the symbols --END-- are encounted. Any further text beyond --END-- are meant to be interpreted as instructions using the aforementioned Git repository as context.\\n\")\n\t}\n\tfor _, file := range repo.Files {\n\t\trepoBuilder.WriteString(\"----\\n\")\n\t\trepoBuilder.WriteString(fmt.Sprintf(\"%s\\n\", file.Path))\n\t\tif scrubComments {\n\t\t\tfile.Contents = utils.RemoveCodeComments(file.Contents)\n\t\t}\n\t\trepoBuilder.WriteString(fmt.Sprintf(\"%s\\n\", file.Contents))\n\t}\n\trepoBuilder.WriteString(\"--END--\")\n\toutput := repoBuilder.String()\n\trepo.TotalTokens = EstimateTokens(output)\n\treturn output, nil\n}\n\nfunc OutputGitRepoXML(repo *GitRepo, scrubComments bool) (string, error) {\n\tif scrubComments {\n\t\t\tfor i, file := range repo.Files {\n\t\t\t\t\trepo.Files[i].Contents = utils.RemoveCodeComments(file.Contents)\n\t\t\t}\n\t}\n\tvar result strings.Builder\n\tresult.WriteString(\"\u003c?xml version=\\\"1.0\\\" encoding=\\\"UTF-8\\\"?\u003e\\n\")\n\tresult.WriteString(\"\u003croot\u003e\\n\")\n\t\n\tresult.WriteString(\" \u003ctotal_tokens\u003ePLACEHOLDER\u003c/total_tokens\u003e\\n\")\n\tresult.WriteString(fmt.Sprintf(\" \u003cfile_count\u003e%d\u003c/file_count\u003e\\n\", repo.FileCount))\n\tresult.WriteString(\" \u003cfiles\u003e\\n\")\n\t\n\tfor _, file := range repo.Files {\n\t\t\tresult.WriteString(\" \u003cfile\u003e\\n\")\n\t\t\tresult.WriteString(fmt.Sprintf(\" \u003cpath\u003e%s\u003c/path\u003e\\n\", escapeXML(file.Path)))\n\t\t\tresult.WriteString(fmt.Sprintf(\" \u003ctokens\u003e%d\u003c/tokens\u003e\\n\", file.Tokens))\n\t\t\t\n\t\t\tsafeContents := strings.ReplaceAll(file.Contents, \"]]]]\u003e\u003c![CDATA[\u003e\", \"]]]]]]\u003e\u003c![CDATA[\u003e\u003c![CDATA[\u003e\")\n\t\t\t\n\t\t\tresult.WriteString(\" \u003ccontents\u003e\u003c![CDATA[\")\n\t\t\tresult.WriteString(safeContents)\n\t\t\tresult.WriteString(\"]]]]\u003e\u003c![CDATA[\u003e\u003c/contents\u003e\\n\")\n\t\t\tresult.WriteString(\" \u003c/file\u003e\\n\")\n\t}\n\t\n\tresult.WriteString(\" \u003c/files\u003e\\n\")\n\tresult.WriteString(\"\u003c/root\u003e\\n\")\n\t\n\toutputStr := result.String()\n\t\n\ttokenCount := EstimateTokens(outputStr)\n\trepo.TotalTokens = tokenCount\n\t\n\toutputStr = strings.Replace(\n\t\t\toutputStr, \n\t\t\t\"\u003ctotal_tokens\u003ePLACEHOLDER\u003c/total_tokens\u003e\", \n\t\t\tfmt.Sprintf(\"\u003ctotal_tokens\u003e%d\u003c/total_tokens\u003e\", tokenCount), \n\t\t\t1,\n\t)\n\t\n\treturn outputStr, nil\n}\n\nfunc escapeXML(s string) string {\n\ts = strings.ReplaceAll(s, \"\u0026\", \"\u0026amp;\")\n\ts = strings.ReplaceAll(s, \"\u003c\", \"\u0026lt;\")\n\ts = strings.ReplaceAll(s, \"\u003e\", \"\u0026gt;\")\n\ts = strings.ReplaceAll(s, \"\\\"\", \"\u0026quot;\")\n\ts = strings.ReplaceAll(s, \"'\", \"\u0026apos;\")\n\treturn s\n}\n\nfunc ValidateXML(xmlString string) error {\n decoder := xml.NewDecoder(strings.NewReader(xmlString))\n for {\n _, err := decoder.Token()\n if err == io.EOF {\n break\n }\n if err != nil {\n return fmt.Errorf(\"XML validation error: %w\", err)\n }\n }\n return nil\n}\n\nfunc MarshalRepo(repo *GitRepo, scrubComments bool) ([]byte, error) {\n\t_, err := OutputGitRepo(repo, \"\", scrubComments)\n\tif err != nil {\n\t\treturn nil, fmt.Errorf(\"error marshalling repo: %w\", err)\n\t}\n\treturn json.Marshal(repo)\n}\n\n// Update the function signature to accept includeList and use shouldProcess\nfunc processRepository(repoPath string, includeList, ignoreList []string, repo *GitRepo) error {\n\terr := filepath.Walk(repoPath, func(path string, info os.FileInfo, err error) error {\n\t\tif err != nil {\n\t\t\treturn err\n\t\t}\n\t\tif !info.IsDir() {\n\t\t\trelativeFilePath, _ := filepath.Rel(repoPath, path)\n\t\t\tprocess := shouldProcess(relativeFilePath, includeList, ignoreList)\n\t\t\tif process {\n\t\t\t\tcontents, err := os.ReadFile(path)\n\t\t\t\tif !utf8.Valid(contents) {\n\t\t\t\t\treturn nil\n\t\t\t\t}\n\t\t\t\tif err != nil {\n\t\t\t\t\treturn err\n\t\t\t\t}\n\t\t\t\tvar file GitFile\n\t\t\t\tfile.Path = relativeFilePath\n\t\t\t\tfile.Contents = string(contents)\n\t\t\t\tfile.Tokens = EstimateTokens(file.Contents)\n\t\t\t\trepo.Files = append(repo.Files, file)\n\t\t\t}\n\t\t}\n\t\treturn nil\n\t})\n\trepo.FileCount = len(repo.Files)\n\tif err != nil {\n\t\treturn fmt.Errorf(\"error walking the path %q: %w\", repoPath, err)\n\t}\n\treturn nil\n}\n\nfunc EstimateTokens(output string) int64 {\n\ttke, err := tiktoken.GetEncoding(\"cl100k_base\")\n\tif err != nil {\n\t\tfmt.Println(\"Error getting encoding:\", err)\n\t\treturn 0\n\t}\n\ttokens := tke.Encode(output, nil, nil)\n\treturn int64(len(tokens))\n}"}],"file_count":2}
0 commit comments