From 23d5b6abc0ef3e622d7309a32946fdc614e0a9ae Mon Sep 17 00:00:00 2001
From: Mishig Davaadorj
Date: Sat, 23 Sep 2023 15:29:21 +0200
Subject: [PATCH 1/7] Remark md escape svelte special chars
---
kit/package-lock.json | 63 ++++++++++++++++++++++++++++++++++++++++++-
kit/package.json | 3 ++-
kit/preprocess.js | 16 +++++++++++
3 files changed, 80 insertions(+), 2 deletions(-)
diff --git a/kit/package-lock.json b/kit/package-lock.json
index de0d4248..fb0f8763 100644
--- a/kit/package-lock.json
+++ b/kit/package-lock.json
@@ -37,7 +37,8 @@
"svelte-preprocess": "^4.10.1",
"tailwindcss": "^3.0.22",
"tslib": "^2.3.1",
- "typescript": "~4.5.4"
+ "typescript": "~4.5.4",
+ "unist-util-visit": "^5.0.0"
}
},
"node_modules/@babel/code-frame": {
@@ -3471,6 +3472,25 @@
"node": ">=4.2.0"
}
},
+ "node_modules/unist-util-is": {
+ "version": "6.0.0",
+ "resolved": "https://registry.npmjs.org/unist-util-is/-/unist-util-is-6.0.0.tgz",
+ "integrity": "sha512-2qCTHimwdxLfz+YzdGfkqNlH0tLi9xjTnHddPmJwtIG9MGsdbutfTc4P+haPD7l7Cjxf/WZj+we5qfVPvvxfYw==",
+ "dev": true,
+ "dependencies": {
+ "@types/unist": "^3.0.0"
+ },
+ "funding": {
+ "type": "opencollective",
+ "url": "https://opencollective.com/unified"
+ }
+ },
+ "node_modules/unist-util-is/node_modules/@types/unist": {
+ "version": "3.0.0",
+ "resolved": "https://registry.npmjs.org/@types/unist/-/unist-3.0.0.tgz",
+ "integrity": "sha512-MFETx3tbTjE7Uk6vvnWINA/1iJ7LuMdO4fcq8UfF0pRbj01aGLduVvQcRyswuACJdpnHgg8E3rQLhaRdNEJS0w==",
+ "dev": true
+ },
"node_modules/unist-util-stringify-position": {
"version": "2.0.3",
"resolved": "https://registry.npmjs.org/unist-util-stringify-position/-/unist-util-stringify-position-2.0.3.tgz",
@@ -3484,6 +3504,47 @@
"url": "https://opencollective.com/unified"
}
},
+ "node_modules/unist-util-visit": {
+ "version": "5.0.0",
+ "resolved": "https://registry.npmjs.org/unist-util-visit/-/unist-util-visit-5.0.0.tgz",
+ "integrity": "sha512-MR04uvD+07cwl/yhVuVWAtw+3GOR/knlL55Nd/wAdblk27GCVt3lqpTivy/tkJcZoNPzTwS1Y+KMojlLDhoTzg==",
+ "dev": true,
+ "dependencies": {
+ "@types/unist": "^3.0.0",
+ "unist-util-is": "^6.0.0",
+ "unist-util-visit-parents": "^6.0.0"
+ },
+ "funding": {
+ "type": "opencollective",
+ "url": "https://opencollective.com/unified"
+ }
+ },
+ "node_modules/unist-util-visit-parents": {
+ "version": "6.0.1",
+ "resolved": "https://registry.npmjs.org/unist-util-visit-parents/-/unist-util-visit-parents-6.0.1.tgz",
+ "integrity": "sha512-L/PqWzfTP9lzzEa6CKs0k2nARxTdZduw3zyh8d2NVBnsyvHjSX4TWse388YrrQKbvI8w20fGjGlhgT96WwKykw==",
+ "dev": true,
+ "dependencies": {
+ "@types/unist": "^3.0.0",
+ "unist-util-is": "^6.0.0"
+ },
+ "funding": {
+ "type": "opencollective",
+ "url": "https://opencollective.com/unified"
+ }
+ },
+ "node_modules/unist-util-visit-parents/node_modules/@types/unist": {
+ "version": "3.0.0",
+ "resolved": "https://registry.npmjs.org/@types/unist/-/unist-3.0.0.tgz",
+ "integrity": "sha512-MFETx3tbTjE7Uk6vvnWINA/1iJ7LuMdO4fcq8UfF0pRbj01aGLduVvQcRyswuACJdpnHgg8E3rQLhaRdNEJS0w==",
+ "dev": true
+ },
+ "node_modules/unist-util-visit/node_modules/@types/unist": {
+ "version": "3.0.0",
+ "resolved": "https://registry.npmjs.org/@types/unist/-/unist-3.0.0.tgz",
+ "integrity": "sha512-MFETx3tbTjE7Uk6vvnWINA/1iJ7LuMdO4fcq8UfF0pRbj01aGLduVvQcRyswuACJdpnHgg8E3rQLhaRdNEJS0w==",
+ "dev": true
+ },
"node_modules/uri-js": {
"version": "4.4.1",
"resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz",
diff --git a/kit/package.json b/kit/package.json
index 211c010e..c61ab428 100644
--- a/kit/package.json
+++ b/kit/package.json
@@ -38,7 +38,8 @@
"svelte-preprocess": "^4.10.1",
"tailwindcss": "^3.0.22",
"tslib": "^2.3.1",
- "typescript": "~4.5.4"
+ "typescript": "~4.5.4",
+ "unist-util-visit": "^5.0.0"
},
"type": "module",
"dependencies": {
diff --git a/kit/preprocess.js b/kit/preprocess.js
index 74a1f463..cafebdee 100644
--- a/kit/preprocess.js
+++ b/kit/preprocess.js
@@ -3,6 +3,7 @@ import htmlparser2 from "htmlparser2";
import hljs from "highlight.js";
import { mdsvex } from "mdsvex";
import katex from "katex";
+import { visit } from 'unist-util-visit'
// Preprocessor that converts markdown into Docstring
// svelte component using mdsvexPreprocess
@@ -423,7 +424,22 @@ function renderKatex(code, markedKatex) {
});
}
+
+function escapeSvelteSpecialChars() {
+ return transform;
+
+ function transform(tree) {
+ visit(tree, 'text', ontext);
+ }
+
+ function ontext(node) {
+ node.value = node.value.replaceAll("{", '{');
+ node.value = node.value.replaceAll("<", '<');
+ }
+}
+
const _mdsvexPreprocess = mdsvex({
+ remarkPlugins: [escapeSvelteSpecialChars],
extensions: ["mdx"],
highlight: {
highlighter: function (code, lang) {
From f20be19fc7826730339831bd2b4e173f6ae43f4f Mon Sep 17 00:00:00 2001
From: Mishig Davaadorj
Date: Sat, 23 Sep 2023 15:29:39 +0200
Subject: [PATCH 2/7] Rm unneeded python escaping code
---
src/doc_builder/convert_md_to_mdx.py | 47 +----------------
tests/test_convert_md_to_mdx.py | 75 ----------------------------
2 files changed, 2 insertions(+), 120 deletions(-)
diff --git a/src/doc_builder/convert_md_to_mdx.py b/src/doc_builder/convert_md_to_mdx.py
index bd6c4c45..9f27b28a 100644
--- a/src/doc_builder/convert_md_to_mdx.py
+++ b/src/doc_builder/convert_md_to_mdx.py
@@ -22,25 +22,6 @@
_re_doctest_flags = re.compile(r"^(>>>.*\S)(\s+)# doctest:\s+\+[A-Z_]+\s*$", flags=re.MULTILINE)
-_re_lt_html = re.compile(
- r"""# This regex is meant to detect any HTML tag or comment, but not standalone '<' characters.
- <( # HTML tag with...
- (
- !DOCTYPE # ... !DOCTYPE
- |
- ((\/\s*)?[a-z]+) # ... or any regular tag (i.e. starts with [a-z]
- )
- [^><]*? # ... followed by anything until next closing ">"
- )>
- |
- <(!--[^>]*?--)> # Or an HTML comment
- """,
- re.IGNORECASE | re.VERBOSE,
-)
-_re_lcub_svelte = re.compile(
- r"<(Question|Tip|Added|Changed|Deprecated|DocNotebookDropdown|CourseFloatingBanner|FrameworkSwitch|audio|PipelineIcon|PipelineTag)(((?!<(Question|Tip|Added|Changed|Deprecated|DocNotebookDropdown|CourseFloatingBanner|FrameworkSwitch|audio|PipelineIcon|PipelineTag)).)*)>|{(#if|:else}|/if})",
- re.DOTALL,
-)
def convert_md_to_mdx(md_text, page_info):
@@ -83,28 +64,6 @@ def convert_md_to_mdx(md_text, page_info):
)
-def convert_special_chars(text):
- """
- Convert { and < that have special meanings in MDX.
- """
- text = text.replace("{", "{")
- # We don't want to escape `{` that are part of svelte syntax
- text = _re_lcub_svelte.sub(lambda match: match[0].replace("{", "{"), text)
- # We don't want to replace those by the HTML code, so we temporarily set them at LTHTML
- # source is a special tag, it can be standalone (html tag) or closing (doc tag)
-
- # Temporarily replace all valid HTML tags with LTHTML
- # Replace with '\1\5' => 2 possible groups to catch the tag but in practice only one is not empty.
- text = re.sub(_re_lt_html, r"LTHTML\1\5>", text)
-
- # Encode remaining < symbols
- text = text.replace("<", "<")
- # Put back the HTML tags
- text = text.replace("LTHTML", "<")
-
- return text
-
-
def convert_img_links(text, page_info):
"""
Convert image links to correct URL paths.
@@ -199,13 +158,11 @@ def process_md(text, page_info):
Processes markdown by:
1. Converting include
2. Converting literalinclude
- 3. Converting special characters
- 4. Clean doctest syntax
- 5. Converting image links
+ 3. Clean doctest syntax
+ 4. Converting image links
"""
text = convert_include(text, page_info)
text = convert_literalinclude(text, page_info)
- text = convert_special_chars(text)
text = clean_doctest_syntax(text)
text = convert_img_links(text, page_info)
return text
diff --git a/tests/test_convert_md_to_mdx.py b/tests/test_convert_md_to_mdx.py
index 1405bd05..4e0e63dc 100644
--- a/tests/test_convert_md_to_mdx.py
+++ b/tests/test_convert_md_to_mdx.py
@@ -22,7 +22,6 @@
convert_include,
convert_literalinclude,
convert_md_to_mdx,
- convert_special_chars,
process_md,
)
@@ -65,80 +64,6 @@ def test_convert_md_to_mdx(self):
Lorem ipsum dolor sit amet, consectetur adipiscing elit"""
self.assertEqual(convert_md_to_mdx(md_text, page_info), expected_conversion)
- def test_convert_special_chars(self):
- self.assertEqual(convert_special_chars("{ lala }"), "{ lala }")
- self.assertEqual(convert_special_chars("< blo"), "< blo")
- self.assertEqual(convert_special_chars(""), "")
- self.assertEqual(convert_special_chars("
"), "
")
- self.assertEqual(convert_special_chars("
"), "
")
- self.assertEqual(convert_special_chars(""), "")
- self.assertEqual(convert_special_chars(""), "")
- self.assertEqual(convert_special_chars(""), "")
- self.assertEqual(convert_special_chars("
"), "
")
- self.assertEqual(convert_special_chars("5 <= 10
"), "5 <= 10
")
- self.assertEqual(
- convert_special_chars("5 <= 10
"), "5 <= 10
"
- )
- self.assertEqual(convert_special_chars("5 <= 10"), "
5 <= 10") # no closing tag
- self.assertEqual(convert_special_chars("5 <= 10
"), "5 <= 10
") # no opening tag
- self.assertEqual(convert_special_chars("test"), "test") # mismatched tags
- self.assertEqual(convert_special_chars("5 < 10
"), "5 < 10
")
- self.assertEqual(convert_special_chars("5 > 10
"), "5 > 10
")
- self.assertEqual(convert_special_chars(""), "") # comment
- self.assertEqual(convert_special_chars(""), "") # comment
- self.assertEqual(convert_special_chars(" 1 < 2"), " 1 < 2")
-
- longer_test = """"""
- self.assertEqual(convert_special_chars(longer_test), longer_test)
-
- nested_test = """
- sometext
-
- sometext
-
-
"""
- self.assertEqual(convert_special_chars(nested_test), nested_test)
-
- html_code = 'some_text'
- self.assertEqual(convert_special_chars(html_code), html_code)
-
- inner_less = """
- sometext 4 < 5
-
"""
- self.assertEqual(convert_special_chars(inner_less), inner_less)
-
- img_code = '
'
- self.assertEqual(convert_special_chars(img_code), img_code)
-
- video_code = '