Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions .codecov.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
coverage:
status:
project:
default:
target: 85%
threshold: 3%
patch:
default:
target: 70%
threshold: 5%

comment:
layout: "header, diff"
behavior: default
require_changes: true
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "diffchunk"
version = "0.1.6"
version = "0.1.7"
description = "MCP server for navigating large diff files with intelligent chunking"
readme = "README.md"
requires-python = ">=3.10"
Expand All @@ -26,6 +26,7 @@ classifiers = [
dependencies = [
"click>=8.2.1",
"mcp>=1.10.0",
"chardet>=4.0.0",
]

[project.urls]
Expand Down
2 changes: 1 addition & 1 deletion src/chunker.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def chunk_diff(
raise ValueError(f"Failed to parse diff: {e}")

if not file_changes:
raise ValueError("No valid diff content found")
raise ValueError("Diff file parsed successfully but contains no changes")

for files, content in file_changes:
# Apply filters
Expand Down
39 changes: 21 additions & 18 deletions src/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,29 +123,32 @@ def should_include_file(
return True # Include by default if no patterns specified

def _read_diff_file(self, file_path: str) -> List[str]:
"""Read diff file with proper encoding handling."""
# Try UTF-8 first (most common)
encodings = ["utf-8", "utf-8-sig", "cp1252", "latin-1"]
"""Read diff file with encoding detection."""
import chardet

for encoding in encodings:
try:
with open(file_path, "r", encoding=encoding) as f:
content = f.read()
# Detect encoding from sample
with open(file_path, "rb") as f:
sample = f.read(8192)
result = chardet.detect(sample)

# Strip BOM if present
if content.startswith("\ufeff"):
content = content[1:]
# Use detected encoding if confident, otherwise UTF-8
encoding = (
result.get("encoding") if result.get("confidence", 0) > 0.7 else "utf-8"
)

lines = content.splitlines(keepends=True)
return lines
try:
with open(file_path, "r", encoding=encoding) as f:
content = f.read()
except UnicodeDecodeError:
# Fallback with error replacement
with open(file_path, "r", encoding="utf-8", errors="replace") as f:
content = f.read()

except (UnicodeDecodeError, IOError):
continue
# Strip BOM if present
if content.startswith("\ufeff"):
content = content[1:]

# If all encodings failed, raise clear error
raise ValueError(
f"Cannot read diff file {file_path}: unable to decode with any common encoding"
)
return content.splitlines(keepends=True)

def count_lines(self, content: str) -> int:
"""Count meaningful lines in diff content."""
Expand Down
60 changes: 60 additions & 0 deletions tests/test_encodings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
"""Test encoding support for diff files."""

import pytest
from pathlib import Path

from src.tools import DiffChunkTools


class TestEncodings:
"""Test encoding detection and parsing."""

@pytest.fixture
def test_data_dir(self):
return Path(__file__).parent / "test_data"

@pytest.fixture
def tools(self):
return DiffChunkTools()

def test_encoding_support(self, tools, test_data_dir):
"""Test that various encodings are supported."""
test_files = [
"minimal_working.diff", # UTF-8
"minimal_windows.diff", # Windows line endings
"minimal_bom.diff", # UTF-8 BOM
"minimal_latin1.diff", # Latin-1
]

for filename in test_files:
result = tools.load_diff(str(test_data_dir / filename))
assert result["chunks"] > 0, f"{filename} should parse successfully"
assert result["files"] > 0, f"{filename} should contain files"

def test_encoding_detection(self, tools, tmp_path):
"""Test encoding detection with UTF-16."""
# Create a minimal UTF-16 diff file
content = """diff --git a/test.txt b/test.txt
index 1234567..abcdefg 100644
--- a/test.txt
+++ b/test.txt
@@ -1 +1 @@
-old line
+new line
"""
utf16_file = tmp_path / "test_utf16.diff"
utf16_file.write_text(content, encoding="utf-16")

result = tools.load_diff(str(utf16_file))
assert result["chunks"] > 0
assert result["files"] > 0

def test_empty_diff_error(self, tools, tmp_path):
"""Test error message for empty diff files."""
empty_file = tmp_path / "empty.diff"
empty_file.write_text("")

with pytest.raises(
ValueError, match="Diff file parsed successfully but contains no changes"
):
tools.load_diff(str(empty_file))
34 changes: 0 additions & 34 deletions tests/test_windows_repro.py

This file was deleted.

13 changes: 12 additions & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading