diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index a64c0c7..87aa85b 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -1,19 +1,13 @@ name: Bug Report description: Report a bug or unexpected behavior -title: "[Bug]: " labels: ["bug"] body: - - type: markdown - attributes: - value: | - Thanks for reporting an issue! Please fill out the sections below. - - type: textarea id: description attributes: label: Description - description: A clear description of what the bug is. - placeholder: Tell us what happened + description: What happened? + placeholder: Describe the issue validations: required: true @@ -21,37 +15,19 @@ body: id: reproduction attributes: label: Steps to Reproduce - description: Steps to reproduce the behavior placeholder: | 1. Load diff with `load_diff("path/to/file.diff")` 2. Run `list_chunks()` 3. See error - validations: - required: true - - - type: textarea - id: expected - attributes: - label: Expected Behavior - description: What you expected to happen - validations: - required: true - type: textarea id: environment attributes: label: Environment - description: Your environment details - value: | - - OS: [e.g., Windows 11, macOS 14, Ubuntu 22.04] - - Python version: [e.g., 3.10.12] - - diffchunk version: [e.g., 0.1.0] - - MCP client: [e.g., Claude Desktop] - validations: - required: true + placeholder: OS, Python version, diffchunk version, etc. - type: textarea id: additional attributes: label: Additional Context - description: Any other context, screenshots, or files related to the problem \ No newline at end of file + description: Screenshots, logs, or other relevant information diff --git a/.gitignore b/.gitignore index ce14b5a..9df717f 100644 --- a/.gitignore +++ b/.gitignore @@ -75,3 +75,6 @@ Thumbs.db *.patch !tests/test_data/*.diff test_output/ + +# example repos folders for diffs +repos/ \ No newline at end of file diff --git a/src/parser.py b/src/parser.py index 30e59ed..134ab44 100644 --- a/src/parser.py +++ b/src/parser.py @@ -19,11 +19,7 @@ def __init__(self): def parse_diff_file(self, file_path: str) -> Iterator[Tuple[List[str], str]]: """Parse a diff file and yield (files, content) tuples.""" - try: - with open(file_path, "r", encoding="utf-8", errors="replace") as f: - lines = f.readlines() - except (IOError, OSError) as e: - raise ValueError(f"Cannot read diff file {file_path}: {e}") + lines = self._read_diff_file(file_path) if not lines: return @@ -126,6 +122,31 @@ def should_include_file( return True # Include by default if no patterns specified + def _read_diff_file(self, file_path: str) -> List[str]: + """Read diff file with proper encoding handling.""" + # Try UTF-8 first (most common) + encodings = ["utf-8", "utf-8-sig", "cp1252", "latin-1"] + + for encoding in encodings: + try: + with open(file_path, "r", encoding=encoding) as f: + content = f.read() + + # Strip BOM if present + if content.startswith("\ufeff"): + content = content[1:] + + lines = content.splitlines(keepends=True) + return lines + + except (UnicodeDecodeError, IOError): + continue + + # If all encodings failed, raise clear error + raise ValueError( + f"Cannot read diff file {file_path}: unable to decode with any common encoding" + ) + def count_lines(self, content: str) -> int: """Count meaningful lines in diff content.""" return len([line for line in content.split("\n") if line.strip()]) diff --git a/tests/test_data/minimal_bom.diff b/tests/test_data/minimal_bom.diff new file mode 100644 index 0000000..0ba54c0 --- /dev/null +++ b/tests/test_data/minimal_bom.diff @@ -0,0 +1,8 @@ +diff --git a/file1.txt b/file1.txt +index 1234567..abcdefg 100644 +--- a/file1.txt ++++ b/file1.txt +@@ -1,2 +1,2 @@ + line 1 +-old line ++new line diff --git a/tests/test_data/minimal_latin1.diff b/tests/test_data/minimal_latin1.diff new file mode 100644 index 0000000..8b154c9 --- /dev/null +++ b/tests/test_data/minimal_latin1.diff @@ -0,0 +1,8 @@ +diff --git a/fil�.txt b/fil�.txt +index 1234567..abcdefg 100644 +--- a/fil�.txt ++++ b/fil�.txt +@@ -1,2 +1,2 @@ + line 1 +-old lin� ++new lin� diff --git a/tests/test_data/minimal_windows.diff b/tests/test_data/minimal_windows.diff new file mode 100644 index 0000000..d51f7d0 --- /dev/null +++ b/tests/test_data/minimal_windows.diff @@ -0,0 +1,8 @@ +diff --git a/file1.txt b/file1.txt +index 1234567..abcdefg 100644 +--- a/file1.txt ++++ b/file1.txt +@@ -1,2 +1,2 @@ + line 1 +-old line ++new line diff --git a/tests/test_data/minimal_working.diff b/tests/test_data/minimal_working.diff new file mode 100644 index 0000000..b07d4fc --- /dev/null +++ b/tests/test_data/minimal_working.diff @@ -0,0 +1,8 @@ +diff --git a/file1.txt b/file1.txt +index 1234567..abcdefg 100644 +--- a/file1.txt ++++ b/file1.txt +@@ -1,2 +1,2 @@ + line 1 +-old line ++new line diff --git a/tests/test_windows_repro.py b/tests/test_windows_repro.py new file mode 100644 index 0000000..3be1f41 --- /dev/null +++ b/tests/test_windows_repro.py @@ -0,0 +1,34 @@ +"""Test to reproduce Windows "No valid diff content found" issue.""" + +import pytest +from pathlib import Path + +from src.tools import DiffChunkTools + + +class TestWindowsEncoding: + """Test Windows encoding issues that cause 'No valid diff content found'.""" + + @pytest.fixture + def test_data_dir(self): + return Path(__file__).parent / "test_data" + + @pytest.fixture + def tools(self): + return DiffChunkTools() + + def test_encoding_scenarios_work_with_fix(self, tools, test_data_dir): + """Test that encoding scenarios work with the fix.""" + working_files = [ + "minimal_working.diff", # UTF-8 baseline + "minimal_windows.diff", # Windows \r\n line endings + "minimal_bom.diff", # UTF-8 BOM (now handled) + "minimal_latin1.diff", # Latin-1 encoding (now handled) + ] + + for filename in working_files: + result = tools.load_diff( + str(test_data_dir / filename), max_chunk_lines=1000 + ) + assert result["chunks"] > 0, f"{filename} should work with fix" + assert result["files"] > 0, f"{filename} should have files"