From 19a6052a6c9c17f477ad90c24d383c7537b69e66 Mon Sep 17 00:00:00 2001 From: "B. Caller" <73827525+b-c-ds@users.noreply.github.com> Date: Fri, 7 May 2021 15:34:39 +0100 Subject: [PATCH] Fix cubic ReDoS in fenced code and references Two regular expressions were vulerable to Regular Expression Denial of Service (ReDoS). Crafted strings containing a long sequence of spaces could cause Denial of Service by making markdown take a long time to process. This represents a vulnerability when untrusted user input is processed with the markdown package. ReferencesProcessor: https://github.com/Python-Markdown/markdown/blob/4acb949256adc535d6e6cd8/markdown/blockprocessors.py#L559-L563 e.g.: ```python import markdown markdown.markdown('[]:0' + ' ' * 4321 + '0') ``` FencedBlockPreprocessor (requires fenced_code extension): https://github.com/Python-Markdown/markdown/blob/a11431539d08e14b0bd821c/markdown/extensions/fenced_code.py#L43-L54 e.g.: ```python import markdown markdown.markdown('```' + ' ' * 4321, extensions=['fenced_code']) ``` Both regular expressions had cubic worst-case complexity, so doubling the number of spaces made processing take 8 times as long. The cubic behaviour can be seen as follows: ``` $ time python -c "import markdown; markdown.markdown('[]:0' + ' ' * 1000 + '0')" python -c "import markdown; markdown.markdown('[]:0' + ' ' * 1000 + '0')" 1.25s user 0.02s system 99% cpu 1.271 total $ time python -c "import markdown; markdown.markdown('[]:0' + ' ' * 2000 + '0')" python -c "import markdown; markdown.markdown('[]:0' + ' ' * 2000 + '0')" 9.01s user 0.02s system 99% cpu 9.040 total $ time python -c "import markdown; markdown.markdown('[]:0' + ' ' * 4000 + '0')" python -c "import markdown; markdown.markdown('[]:0' + ' ' * 4000 + '0')" 74.86s user 0.27s system 99% cpu 1:15.38 total ``` Both regexes had three `[ ]*` groups separated by optional groups, in effect making the regex `[ ]*[ ]*[ ]*`. Discovered using [regexploit](https://github.com/doyensec/regexploit). --- markdown/blockprocessors.py | 2 +- markdown/extensions/fenced_code.py | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/markdown/blockprocessors.py b/markdown/blockprocessors.py index 8518e50f5..dac3f086a 100644 --- a/markdown/blockprocessors.py +++ b/markdown/blockprocessors.py @@ -559,7 +559,7 @@ def run(self, parent, blocks): class ReferenceProcessor(BlockProcessor): """ Process link references. """ RE = re.compile( - r'^[ ]{0,3}\[([^\]]*)\]:[ ]*\n?[ ]*([^\s]+)[ ]*\n?[ ]*((["\'])(.*)\4|\((.*)\))?[ ]*$', re.MULTILINE + r'^[ ]{0,3}\[([^\]]*)\]:[ ]*\n?[ ]*([^\s]+)[ ]*(?:\n[ ]*)?((["\'])(.*)\4[ ]*|\((.*)\)[ ]*)?$', re.MULTILINE ) def test(self, parent, block): diff --git a/markdown/extensions/fenced_code.py b/markdown/extensions/fenced_code.py index 716b46772..04c249e6d 100644 --- a/markdown/extensions/fenced_code.py +++ b/markdown/extensions/fenced_code.py @@ -42,13 +42,13 @@ def extendMarkdown(self, md): class FencedBlockPreprocessor(Preprocessor): FENCED_BLOCK_RE = re.compile( dedent(r''' - (?P^(?:~{3,}|`{3,}))[ ]* # opening fence - ((\{(?P[^\}\n]*)\})?| # (optional {attrs} or - (\.?(?P[\w#.+-]*))?[ ]* # optional (.)lang - (hl_lines=(?P"|')(?P.*?)(?P=quot))?) # optional hl_lines) - [ ]*\n # newline (end of opening fence) - (?P.*?)(?<=\n) # the code block - (?P=fence)[ ]*$ # closing fence + (?P^(?:~{3,}|`{3,}))[ ]* # opening fence + ((\{(?P[^\}\n]*)\})| # (optional {attrs} or + (\.?(?P[\w#.+-]*)[ ]*)? # optional (.)lang + (hl_lines=(?P"|')(?P.*?)(?P=quot)[ ]*)?) # optional hl_lines) + \n # newline (end of opening fence) + (?P.*?)(?<=\n) # the code block + (?P=fence)[ ]*$ # closing fence '''), re.MULTILINE | re.DOTALL | re.VERBOSE )