From 23d5b6abc0ef3e622d7309a32946fdc614e0a9ae Mon Sep 17 00:00:00 2001 From: Mishig Davaadorj Date: Sat, 23 Sep 2023 15:29:21 +0200 Subject: [PATCH 1/7] Remark md escape svelte special chars --- kit/package-lock.json | 63 ++++++++++++++++++++++++++++++++++++++++++- kit/package.json | 3 ++- kit/preprocess.js | 16 +++++++++++ 3 files changed, 80 insertions(+), 2 deletions(-) diff --git a/kit/package-lock.json b/kit/package-lock.json index de0d4248..fb0f8763 100644 --- a/kit/package-lock.json +++ b/kit/package-lock.json @@ -37,7 +37,8 @@ "svelte-preprocess": "^4.10.1", "tailwindcss": "^3.0.22", "tslib": "^2.3.1", - "typescript": "~4.5.4" + "typescript": "~4.5.4", + "unist-util-visit": "^5.0.0" } }, "node_modules/@babel/code-frame": { @@ -3471,6 +3472,25 @@ "node": ">=4.2.0" } }, + "node_modules/unist-util-is": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/unist-util-is/-/unist-util-is-6.0.0.tgz", + "integrity": "sha512-2qCTHimwdxLfz+YzdGfkqNlH0tLi9xjTnHddPmJwtIG9MGsdbutfTc4P+haPD7l7Cjxf/WZj+we5qfVPvvxfYw==", + "dev": true, + "dependencies": { + "@types/unist": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unist-util-is/node_modules/@types/unist": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/@types/unist/-/unist-3.0.0.tgz", + "integrity": "sha512-MFETx3tbTjE7Uk6vvnWINA/1iJ7LuMdO4fcq8UfF0pRbj01aGLduVvQcRyswuACJdpnHgg8E3rQLhaRdNEJS0w==", + "dev": true + }, "node_modules/unist-util-stringify-position": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/unist-util-stringify-position/-/unist-util-stringify-position-2.0.3.tgz", @@ -3484,6 +3504,47 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/unist-util-visit": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/unist-util-visit/-/unist-util-visit-5.0.0.tgz", + "integrity": "sha512-MR04uvD+07cwl/yhVuVWAtw+3GOR/knlL55Nd/wAdblk27GCVt3lqpTivy/tkJcZoNPzTwS1Y+KMojlLDhoTzg==", + "dev": true, + "dependencies": { + "@types/unist": "^3.0.0", + "unist-util-is": "^6.0.0", + "unist-util-visit-parents": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unist-util-visit-parents": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/unist-util-visit-parents/-/unist-util-visit-parents-6.0.1.tgz", + "integrity": "sha512-L/PqWzfTP9lzzEa6CKs0k2nARxTdZduw3zyh8d2NVBnsyvHjSX4TWse388YrrQKbvI8w20fGjGlhgT96WwKykw==", + "dev": true, + "dependencies": { + "@types/unist": "^3.0.0", + "unist-util-is": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unist-util-visit-parents/node_modules/@types/unist": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/@types/unist/-/unist-3.0.0.tgz", + "integrity": "sha512-MFETx3tbTjE7Uk6vvnWINA/1iJ7LuMdO4fcq8UfF0pRbj01aGLduVvQcRyswuACJdpnHgg8E3rQLhaRdNEJS0w==", + "dev": true + }, + "node_modules/unist-util-visit/node_modules/@types/unist": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/@types/unist/-/unist-3.0.0.tgz", + "integrity": "sha512-MFETx3tbTjE7Uk6vvnWINA/1iJ7LuMdO4fcq8UfF0pRbj01aGLduVvQcRyswuACJdpnHgg8E3rQLhaRdNEJS0w==", + "dev": true + }, "node_modules/uri-js": { "version": "4.4.1", "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz", diff --git a/kit/package.json b/kit/package.json index 211c010e..c61ab428 100644 --- a/kit/package.json +++ b/kit/package.json @@ -38,7 +38,8 @@ "svelte-preprocess": "^4.10.1", "tailwindcss": "^3.0.22", "tslib": "^2.3.1", - "typescript": "~4.5.4" + "typescript": "~4.5.4", + "unist-util-visit": "^5.0.0" }, "type": "module", "dependencies": { diff --git a/kit/preprocess.js b/kit/preprocess.js index 74a1f463..cafebdee 100644 --- a/kit/preprocess.js +++ b/kit/preprocess.js @@ -3,6 +3,7 @@ import htmlparser2 from "htmlparser2"; import hljs from "highlight.js"; import { mdsvex } from "mdsvex"; import katex from "katex"; +import { visit } from 'unist-util-visit' // Preprocessor that converts markdown into Docstring // svelte component using mdsvexPreprocess @@ -423,7 +424,22 @@ function renderKatex(code, markedKatex) { }); } + +function escapeSvelteSpecialChars() { + return transform; + + function transform(tree) { + visit(tree, 'text', ontext); + } + + function ontext(node) { + node.value = node.value.replaceAll("{", '{'); + node.value = node.value.replaceAll("<", '<'); + } +} + const _mdsvexPreprocess = mdsvex({ + remarkPlugins: [escapeSvelteSpecialChars], extensions: ["mdx"], highlight: { highlighter: function (code, lang) { From f20be19fc7826730339831bd2b4e173f6ae43f4f Mon Sep 17 00:00:00 2001 From: Mishig Davaadorj Date: Sat, 23 Sep 2023 15:29:39 +0200 Subject: [PATCH 2/7] Rm unneeded python escaping code --- src/doc_builder/convert_md_to_mdx.py | 47 +---------------- tests/test_convert_md_to_mdx.py | 75 ---------------------------- 2 files changed, 2 insertions(+), 120 deletions(-) diff --git a/src/doc_builder/convert_md_to_mdx.py b/src/doc_builder/convert_md_to_mdx.py index bd6c4c45..9f27b28a 100644 --- a/src/doc_builder/convert_md_to_mdx.py +++ b/src/doc_builder/convert_md_to_mdx.py @@ -22,25 +22,6 @@ _re_doctest_flags = re.compile(r"^(>>>.*\S)(\s+)# doctest:\s+\+[A-Z_]+\s*$", flags=re.MULTILINE) -_re_lt_html = re.compile( - r"""# This regex is meant to detect any HTML tag or comment, but not standalone '<' characters. - <( # HTML tag with... - ( - !DOCTYPE # ... !DOCTYPE - | - ((\/\s*)?[a-z]+) # ... or any regular tag (i.e. starts with [a-z] - ) - [^><]*? # ... followed by anything until next closing ">" - )> - | - <(!--[^>]*?--)> # Or an HTML comment - """, - re.IGNORECASE | re.VERBOSE, -) -_re_lcub_svelte = re.compile( - r"<(Question|Tip|Added|Changed|Deprecated|DocNotebookDropdown|CourseFloatingBanner|FrameworkSwitch|audio|PipelineIcon|PipelineTag)(((?!<(Question|Tip|Added|Changed|Deprecated|DocNotebookDropdown|CourseFloatingBanner|FrameworkSwitch|audio|PipelineIcon|PipelineTag)).)*)>|&lcub;(#if|:else}|/if})", - re.DOTALL, -) def convert_md_to_mdx(md_text, page_info): @@ -83,28 +64,6 @@ def convert_md_to_mdx(md_text, page_info): ) -def convert_special_chars(text): - """ - Convert { and < that have special meanings in MDX. - """ - text = text.replace("{", "&lcub;") - # We don't want to escape `{` that are part of svelte syntax - text = _re_lcub_svelte.sub(lambda match: match[0].replace("&lcub;", "{"), text) - # We don't want to replace those by the HTML code, so we temporarily set them at LTHTML - # source is a special tag, it can be standalone (html tag) or closing (doc tag) - - # Temporarily replace all valid HTML tags with LTHTML - # Replace with '\1\5' => 2 possible groups to catch the tag but in practice only one is not empty. - text = re.sub(_re_lt_html, r"LTHTML\1\5>", text) - - # Encode remaining < symbols - text = text.replace("<", "&lt;") - # Put back the HTML tags - text = text.replace("LTHTML", "<") - - return text - - def convert_img_links(text, page_info): """ Convert image links to correct URL paths. @@ -199,13 +158,11 @@ def process_md(text, page_info): Processes markdown by: 1. Converting include 2. Converting literalinclude - 3. Converting special characters - 4. Clean doctest syntax - 5. Converting image links + 3. Clean doctest syntax + 4. Converting image links """ text = convert_include(text, page_info) text = convert_literalinclude(text, page_info) - text = convert_special_chars(text) text = clean_doctest_syntax(text) text = convert_img_links(text, page_info) return text diff --git a/tests/test_convert_md_to_mdx.py b/tests/test_convert_md_to_mdx.py index 1405bd05..4e0e63dc 100644 --- a/tests/test_convert_md_to_mdx.py +++ b/tests/test_convert_md_to_mdx.py @@ -22,7 +22,6 @@ convert_include, convert_literalinclude, convert_md_to_mdx, - convert_special_chars, process_md, ) @@ -65,80 +64,6 @@ def test_convert_md_to_mdx(self): Lorem ipsum dolor sit amet, consectetur adipiscing elit""" self.assertEqual(convert_md_to_mdx(md_text, page_info), expected_conversion) - def test_convert_special_chars(self): - self.assertEqual(convert_special_chars("{ lala }"), "&lcub; lala }") - self.assertEqual(convert_special_chars("< blo"), "&lt; blo") - self.assertEqual(convert_special_chars(""), "") - self.assertEqual(convert_special_chars("
"), "
") - self.assertEqual(convert_special_chars("
"), "
") - self.assertEqual(convert_special_chars(""), "") - self.assertEqual(convert_special_chars(""), "") - self.assertEqual(convert_special_chars("
"), "
") - self.assertEqual(convert_special_chars("
"), "
") - self.assertEqual(convert_special_chars("

5 <= 10

"), "

5 &lt;= 10

") - self.assertEqual( - convert_special_chars("

5 <= 10

"), "

5 &lt;= 10

" - ) - self.assertEqual(convert_special_chars("

5 <= 10"), "

5 &lt;= 10") # no closing tag - self.assertEqual(convert_special_chars("5 <= 10

"), "5 &lt;= 10

") # no opening tag - self.assertEqual(convert_special_chars("test"), "test") # mismatched tags - self.assertEqual(convert_special_chars("

5 < 10

"), "

5 &lt; 10

") - self.assertEqual(convert_special_chars("

5 > 10

"), "

5 > 10

") - self.assertEqual(convert_special_chars(""), "") # comment - self.assertEqual(convert_special_chars(""), "") # comment - self.assertEqual(convert_special_chars(" 1 < 2"), " 1 &lt; 2") - - longer_test = """""" - self.assertEqual(convert_special_chars(longer_test), longer_test) - - nested_test = """
- sometext -
- sometext -
-
""" - self.assertEqual(convert_special_chars(nested_test), nested_test) - - html_code = '
some_text' - self.assertEqual(convert_special_chars(html_code), html_code) - - inner_less = """
- sometext 4 &lt; 5 -
""" - self.assertEqual(convert_special_chars(inner_less), inner_less) - - img_code = '' - self.assertEqual(convert_special_chars(img_code), img_code) - - video_code = '