sillsdev · isaac091 · Jul 9, 2025 · Jul 8, 2025
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -70,7 +70,7 @@ tqdm = "^4.62.2"
 sacrebleu = "^2.3.1"
 ctranslate2 = "^3.5.1"
 libclang = "14.0.6"
-sil-machine = {extras = ["thot"], version = "1.7.2"}
+sil-machine = {extras = ["thot"], version = "1.7.3"}
 datasets = "^2.7.1"
 torch = {version = "^2.4", source = "torch"}
 sacremoses = "^0.0.53"

diff --git a/silnlp/common/translator.py b/silnlp/common/translator.py
@@ -31,17 +31,6 @@
 nltk.download("punkt")
 
 
-def insert_draft_remarks(usfm: str, remarks: List[str]) -> str:
-    lines = usfm.split("\n")
-    insert_idx = (
-        1
-        + (len(lines) > 1 and (lines[1].startswith("\\ide") or lines[1].startswith("\\usfm")))
-        + (len(lines) > 2 and (lines[2].startswith("\\ide") or lines[2].startswith("\\usfm")))
-    )
-    remarks = [f"\\rem {r}" for r in remarks]
-    return "\n".join(lines[:insert_idx] + remarks + lines[insert_idx:])
-
-
 # A group of multiple translations of a single sentence
 TranslationGroup = List[str]
 
@@ -224,6 +213,12 @@ def translate_usfm(
             postprocess_handler.create_update_block_handlers(vrefs, sentences, translated_draft)
 
             for config in postprocess_handler.configs:
+                # Compile draft remarks
+                draft_src_str = f"project {src_file_text.project}" if src_from_project else f"file {src_file_path.name}"
+                draft_remark = f"This draft of {vrefs[0].book} was machine translated on {date.today()} from {draft_src_str} using model {experiment_ckpt_str}. It should be reviewed and edited carefully."
+                postprocess_remark = config.get_postprocess_remark()
+                remarks = [draft_remark] + ([postprocess_remark] if len(postprocess_remark) > 0 else [])
+
                 # Insert translation into the USFM structure of an existing project
                 # If the target project is not the same as the translated file's original project,
                 # no verses outside of the ones translated will be overwritten
@@ -239,6 +234,7 @@ def translate_usfm(
                         embed_behavior=config.get_embed_behavior(),
                         style_behavior=config.get_style_behavior(),
                         update_block_handlers=config.update_block_handlers,
+                        remarks=remarks,
                     )
 
                     if usfm_out is None:
@@ -256,20 +252,11 @@ def translate_usfm(
                         embed_behavior=config.get_embed_behavior(),
                         style_behavior=config.get_style_behavior(),
                         update_block_handlers=config.update_block_handlers,
+                        remarks=remarks,
                     )
                     parse_usfm(usfm, handler)
                     usfm_out = handler.get_usfm()
 
-                # Insert draft remarks
-                description = f"project {src_file_text.project}" if src_from_project else f"file {src_file_path.name}"
-                remarks = [
-                    f"This draft of {vrefs[0].book} was machine translated on {date.today()} from {description} using model {experiment_ckpt_str}. It should be reviewed and edited carefully."
-                ]
-                postprocess_remark = config.get_postprocess_remark()
-                if len(postprocess_remark) > 0:
-                    remarks.append(postprocess_remark)
-                usfm_out = insert_draft_remarks(usfm_out, remarks)
-
                 # Construct output file name write to file
                 trg_draft_file_path = trg_file_path.with_stem(trg_file_path.stem + config.get_postprocess_suffix())
                 if produce_multiple_translations:

diff --git a/silnlp/nmt/postprocess.py b/silnlp/nmt/postprocess.py
@@ -30,13 +30,6 @@
 LOGGER = logging.getLogger(__package__ + ".postprocess")
 
 
-# NOTE: to be replaced by new machine.py remark functionality
-def insert_draft_remarks(usfm: str, remarks: List[str]) -> str:
-    lines = usfm.split("\n")
-    remark_lines = [f"\\rem {r}" for r in remarks]
-    return "\n".join(lines[:1] + remark_lines + lines[1:])
-
-
 # Takes the path to a USFM file and the relevant info to parse it
 # and returns the text of all non-embed sentences and their respective references,
 # along with any remarks (\rem) that were inserted at the beginning of the file
@@ -48,7 +41,7 @@ def get_sentences(
     draft_remarks = []
     for sent in UsfmFileText(stylesheet, encoding, book, book_path, include_all_text=True):
         marker = sent.ref.path[-1].name if len(sent.ref.path) > 0 else ""
-        if marker == "rem" and len(refs) == 0:  # TODO: \ide and \usfm lines could potentially come before the remark(s)
+        if marker == "rem" and len(refs) == 0:
             draft_remarks.append(sent.text)
             continue
         if (
@@ -154,12 +147,11 @@ def postprocess_draft(
             embed_behavior=config.get_embed_behavior(),
             style_behavior=config.get_style_behavior(),
             update_block_handlers=config.update_block_handlers,
+            remarks=(draft_remarks + [config.get_postprocess_remark()]),
         )
         parse_usfm(usfm, handler)
         usfm_out = handler.get_usfm()
 
-        usfm_out = insert_draft_remarks(usfm_out, draft_remarks + [config.get_postprocess_remark()])
-
         if not out_dir:
             out_dir = draft_path.parent
         out_path = out_dir / f"{draft_path.stem}{config.get_postprocess_suffix()}{draft_path.suffix}"