From 716c0cabcce07e6842db3e8c4cc8499ea8d1a960 Mon Sep 17 00:00:00 2001
From: Sebastian Schuberth <sebastian@doubleopen.org>
Date: Thu, 28 Aug 2025 16:36:05 +0200
Subject: [PATCH 1/2] Fixup my own entry in `AUTHORS.rst`

Something went wrong in the Git history as I never was at FSFE, but
still my past commits

1b428ef3970c82f1ed34b08536381806e74f091d
a0ebb35a69021b68d1245e346042a531beb9a903

were made under that address. Fixup the address to my real current one,
and remove myself from the header of this file.
---
 AUTHORS.rst | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/AUTHORS.rst b/AUTHORS.rst
index 96fb3f5a..29d0fc86 100644
--- a/AUTHORS.rst
+++ b/AUTHORS.rst
@@ -1,6 +1,5 @@
 ..
   SPDX-FileCopyrightText: 2017 Free Software Foundation Europe e.V. <https://fsfe.org>
-  SPDX-FileCopyrightText: 2017 Sebastian Schuberth <schuberth@fsfe.org>
 
   SPDX-License-Identifier: CC-BY-SA-4.0
 
@@ -20,7 +19,7 @@ Contributors
 - Max Mehl <max.mehl@fsfe.org>
 - Nico Rikken <nico.rikken@fsfe.org>
 - Florian Snow <florian@familysnow.net>
-- Sebastian Schuberth <schuberth@fsfe.org>
+- Sebastian Schuberth <sebastian@doubleopen.org>
 - Samuel Gaist <samuel.gaist@idiap.ch>
 - Diego Elio Pettenò <flameeyes@flameeyes.com>
 - Matija Šuklje <matija@suklje.name>

From 96e1263e78c093e2a7de6e86799e35bd42fd3249 Mon Sep 17 00:00:00 2001
From: Sebastian Schuberth <sebastian@doubleopen.org>
Date: Thu, 28 Aug 2025 16:25:42 +0200
Subject: [PATCH 2/2] Round down decoded text to full lines

This avoids cropped text in the middle of a line to trigger bogus "Bad
licenses" errors.

Fixes #793.
---
 changelog.d/fixed/round-down-to-full-lines.md |  4 ++++
 src/reuse/extract.py                          | 12 +++++++++++-
 tests/test_extract.py                         |  9 +++++++++
 3 files changed, 24 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/fixed/round-down-to-full-lines.md

diff --git a/changelog.d/fixed/round-down-to-full-lines.md b/changelog.d/fixed/round-down-to-full-lines.md
new file mode 100644
index 00000000..d393ea32
--- /dev/null
+++ b/changelog.d/fixed/round-down-to-full-lines.md
@@ -0,0 +1,4 @@
+- Text extraction is limited to a certain number of bytes, which could happen to
+  cut a text line in between a license identifier, triggering bogus "Bad
+  licenses" errors. This is now avoided by "rounding down" extracted text to
+  full lines of text.
diff --git a/src/reuse/extract.py b/src/reuse/extract.py
index 34bcd0fa..f8835efc 100644
--- a/src/reuse/extract.py
+++ b/src/reuse/extract.py
@@ -8,6 +8,7 @@
 # SPDX-FileCopyrightText: 2023 Johannes Zarl-Zierl <johannes@zarl-zierl.at>
 # SPDX-FileCopyrightText: 2024 Rivos Inc.
 # SPDX-FileCopyrightText: 2024 Skyler Grey <sky@a.starrysky.fyi>
+# SPDX-FileCopyrightText: 2025 Double Open Oy
 # SPDX-FileCopyrightText: © 2020 Liferay, Inc. <https://liferay.com>
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
@@ -118,7 +119,16 @@ def decoded_text_from_binary(
         size = -1
     rawdata = binary_file.read(size)
     result = rawdata.decode("utf-8", errors="replace")
-    return result.replace("\r\n", "\n")
+    norm_str = result.replace("\r\n", "\n")
+    if len(rawdata) != size:
+        return norm_str
+    # If we have read exactly *size* bytes, we might have cut off in the
+    # middle of a line. To avoid returning a partial line, we only return
+    # complete lines.
+    newline_pos = norm_str.rfind("\n")
+    if newline_pos in (-1, len(norm_str) - 1):
+        return norm_str
+    return norm_str[:newline_pos]
 
 
 def _contains_snippet(binary_file: BinaryIO) -> bool:
diff --git a/tests/test_extract.py b/tests/test_extract.py
index 46bcad68..5ce1e913 100644
--- a/tests/test_extract.py
+++ b/tests/test_extract.py
@@ -4,6 +4,7 @@
 # SPDX-FileCopyrightText: 2022 Nico Rikken <nico.rikken@fsfe.org>
 # SPDX-FileCopyrightText: 2022 Pietro Albini <pietro.albini@ferrous-systems.com>
 # SPDX-FileCopyrightText: 2024 Rivos Inc.
+# SPDX-FileCopyrightText: 2025 Double Open Oy
 # SPDX-FileCopyrightText: © 2020 Liferay, Inc. <https://liferay.com>
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
@@ -391,6 +392,14 @@ def test_decoded_text_from_binary_crlf():
     assert decoded_text_from_binary(BytesIO(encoded)) == "Hello\nworld"
 
 
+def test_decoded_text_from_binary_round_down():
+    """Given a cropped line of text, only take full lines."""
+    text = "Hello\nbeautiful\nworld"
+    encoded = text.encode("utf-8")
+    result = decoded_text_from_binary(BytesIO(encoded), size=len(encoded))
+    assert result == "Hello\nbeautiful"
+
+
 def test_detect_line_endings_windows():
     """Given a CRLF string, detect the line endings."""
     assert detect_line_endings("hello\r\nworld") == "\r\n"