Skip to content

Commit 61b0ae9

Browse files
authored
Merge pull request #1 from ExtReMLapin/patch-2
Allow markdown serializer to inline OCR text instead of images placeh…
2 parents 07b2703 + af928d4 commit 61b0ae9

File tree

1 file changed

+7
-0
lines changed

1 file changed

+7
-0
lines changed

docling_core/transforms/serializer/markdown.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,7 @@ def _serialize_image_part(
421421
doc: DoclingDocument,
422422
image_mode: ImageRefMode,
423423
image_placeholder: str,
424+
image_join_text: str = "\n",
424425
**kwargs: Any,
425426
) -> SerializationResult:
426427
error_response = (
@@ -457,6 +458,12 @@ def _serialize_image_part(
457458
text_res = image_placeholder
458459
else:
459460
text_res = f"![Image]({str(item.image.uri)})"
461+
elif image_mode == ImageRefMode.INLINE:
462+
total_text = []
463+
for item, level in doc.iterate_items(root=item, traverse_pictures=True):
464+
if isinstance(item, TextItem):
465+
total_text.append(item.text)
466+
text_res = image_join_text.join(total_text) if total_text else image_placeholder
460467
else:
461468
text_res = image_placeholder
462469

0 commit comments

Comments
 (0)