diff --git a/machine/corpora/usfm_text_base.py b/machine/corpora/usfm_text_base.py index c286c001..9afc6c16 100644 --- a/machine/corpora/usfm_text_base.py +++ b/machine/corpora/usfm_text_base.py @@ -178,6 +178,8 @@ def end_note(self, state: UsfmParserState, marker: str, closed: bool) -> None: def opt_break(self, state: UsfmParserState) -> None: super().opt_break(state) + if len(self._row_texts_stack) == 0: + return if self._text._include_markers: self._row_texts_stack[-1] += "//" elif self._current_text_type != ScriptureTextType.VERSE or state.is_verse_text: diff --git a/tests/corpora/test_usfm_memory_text.py b/tests/corpora/test_usfm_memory_text.py index 37b87563..367ec721 100644 --- a/tests/corpora/test_usfm_memory_text.py +++ b/tests/corpora/test_usfm_memory_text.py @@ -136,6 +136,22 @@ def test_get_rows_verse_para_comment_first() -> None: assert len(rows) == 2, str.join(",", [tr.text for tr in rows]) +def test_get_rows_opt_break_outside_of_segment() -> None: + rows: List[TextRow] = get_rows( + r"""\id MAT - Test +\c 1 +// +\p +\v 1 This is the first verse. +""", + include_all_text=True, + include_markers=True, + ) + assert len(rows) == 2, str.join(",", [tr.text for tr in rows]) + assert rows[0].text == "" + assert rows[1].text == "This is the first verse." + + def get_rows(usfm: str, include_markers: bool = False, include_all_text: bool = False) -> List[TextRow]: text = UsfmMemoryText( UsfmStylesheet("usfm.sty"),