Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions src/doc_builder/convert_md_to_mdx.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@


_re_doctest_flags = re.compile(r"^(>>>.*\S)(\s+)# doctest:\s+\+[A-Z_]+\s*$", flags=re.MULTILINE)
_re_lt_html = re.compile(r"<(((!(DOCTYPE|--))|((\/\s*)?[a-z]+))[^>]*?)>", re.IGNORECASE)
_re_lcub_svelte = re.compile(
r"<(Question|Tip|Added|Changed|Deprecated|DocNotebookDropdown|CourseFloatingBanner|FrameworkSwitch|audio|PipelineIcon|PipelineTag)(((?!<(Question|Tip|Added|Changed|Deprecated|DocNotebookDropdown|CourseFloatingBanner|FrameworkSwitch|audio|PipelineIcon|PipelineTag)).)*)>|&amp;lcub;(#if|:else}|/if})",
re.DOTALL,
)


def convert_md_to_mdx(md_text, page_info):
Expand Down Expand Up @@ -68,18 +73,13 @@ def convert_special_chars(text):
"""
Convert { and < that have special meanings in MDX.
"""
_re_lcub_svelte = re.compile(
r"<(Question|Tip|Added|Changed|Deprecated|DocNotebookDropdown|CourseFloatingBanner|FrameworkSwitch|audio|PipelineIcon|PipelineTag)(((?!<(Question|Tip|Added|Changed|Deprecated|DocNotebookDropdown|CourseFloatingBanner|FrameworkSwitch|audio|PipelineIcon|PipelineTag)).)*)>|&amp;lcub;(#if|:else}|/if})",
re.DOTALL,
)
text = text.replace("{", "&amp;lcub;")
# We don't want to escape `{` that are part of svelte syntax
text = _re_lcub_svelte.sub(lambda match: match[0].replace("&amp;lcub;", "{"), text)
# We don't want to replace those by the HTML code, so we temporarily set them at LTHTML
# source is a special tag, it can be standalone (html tag) or closing (doc tag)

# Temporarily replace all valid HTML tags with LTHTML
_re_lt_html = re.compile(r"<(((!(DOCTYPE|--))|((\/\s*)?\w+))[^>]*?)>", re.DOTALL)
text = re.sub(_re_lt_html, r"LTHTML\1>", text)
# Encode remaining < symbols
text = text.replace("<", "&amp;lt;")
Expand Down
10 changes: 10 additions & 0 deletions tests/test_convert_md_to_mdx.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,16 @@ def test_convert_special_chars(self):
comment = "<!-- comment -->"
self.assertEqual(convert_special_chars(comment), comment)

comment = "<!-- multi line\ncomment -->"
self.assertEqual(convert_special_chars(comment), comment)

# Regression test for https://github.com/huggingface/doc-builder/pull/394
# '<' must not be considered an HTML tag before a number
self.assertEqual(
convert_special_chars("something <5MB something else -> here"),
"something &amp;lt;5MB something else -> here",
)

def test_convert_img_links(self):
page_info = {"package_name": "transformers", "version": "v4.10.0", "language": "fr"}

Expand Down