Skip to content

Commit 16c4c44

Browse files
ROB: Deal with wrong size for incremental PDF files (#3495)
1 parent 54c0dd7 commit 16c4c44

File tree

2 files changed

+41
-4
lines changed

2 files changed

+41
-4
lines changed

pypdf/_writer.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@
7878
from .constants import FieldDictionaryAttributes as FA
7979
from .constants import PageAttributes as PG
8080
from .constants import TrailerKeys as TK
81-
from .errors import PyPdfError
81+
from .errors import PdfReadError, PyPdfError
8282
from .generic import (
8383
PAGE_FIT,
8484
ArrayObject,
@@ -1253,13 +1253,27 @@ def clone_reader_document_root(self, reader: PdfReader) -> None:
12531253
self._root_object = reader.root_object.clone(self)
12541254
self._pages = self._root_object.raw_get("/Pages")
12551255

1256-
assert len(self._objects) <= cast(int, reader.trailer["/Size"]) # for pytest
1256+
if len(self._objects) > cast(int, reader.trailer["/Size"]):
1257+
if self.strict:
1258+
raise PdfReadError(
1259+
f"Object count {len(self._objects)} exceeds defined trailer size {reader.trailer['/Size']}"
1260+
)
1261+
logger_warning(
1262+
f"Object count {len(self._objects)} exceeds defined trailer size {reader.trailer['/Size']}",
1263+
__name__
1264+
)
1265+
12571266
# must be done here before rewriting
12581267
if self.incremental:
12591268
self._original_hash = [
12601269
(obj.hash_bin() if obj is not None else 0) for obj in self._objects
12611270
]
1262-
self._flatten()
1271+
1272+
try:
1273+
self._flatten()
1274+
except IndexError:
1275+
raise PdfReadError("Got index error while flattening.")
1276+
12631277
assert self.flattened_pages is not None
12641278
for p in self.flattened_pages:
12651279
self._replace_object(cast(IndirectObject, p.indirect_reference).idnum, p)

tests/test_writer.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
Transformation,
2121
)
2222
from pypdf.annotations import Link
23-
from pypdf.errors import DeprecationError, PageSizeNotDefinedError, PyPdfError
23+
from pypdf.errors import DeprecationError, PageSizeNotDefinedError, PdfReadError, PyPdfError
2424
from pypdf.generic import (
2525
ArrayObject,
2626
ByteStringObject,
@@ -2851,3 +2851,26 @@ def test_unterminated_object__with_incremental_writer():
28512851
writer.write(fi)
28522852
b = fi.getvalue()
28532853
assert b[-39:] == b"\nendstream\nendobj\nstartxref\n1240\n%%EOF\n"
2854+
2855+
2856+
def test_wrong_size_in_incremental_pdf(caplog):
2857+
source_data = RESOURCE_ROOT.joinpath("crazyones.pdf").read_bytes()
2858+
writer = PdfWriter(BytesIO(source_data), incremental=True)
2859+
writer._add_object(DictionaryObject())
2860+
2861+
incremental_data = BytesIO()
2862+
writer.write(incremental_data)
2863+
modified_data = incremental_data.getvalue().replace(b"/Size 25", b"/Size 2")
2864+
2865+
writer = PdfWriter(BytesIO(modified_data), incremental=False)
2866+
assert "Object count 19 exceeds defined trailer size 2" in caplog.text
2867+
assert len(writer._objects) == 20
2868+
2869+
caplog.clear()
2870+
writer = PdfWriter(incremental=False)
2871+
writer.strict = True
2872+
with pytest.raises(expected_exception=PdfReadError, match=r"^Object count 19 exceeds defined trailer size 2$"):
2873+
writer.clone_reader_document_root(reader=PdfReader(BytesIO(modified_data)))
2874+
2875+
with pytest.raises(expected_exception=PdfReadError, match=r"^Got index error while flattening\.$"):
2876+
PdfWriter(BytesIO(modified_data), incremental=True)

0 commit comments

Comments
 (0)