From dbdd1349d1972e9a3c788c2af060574c1e36323c Mon Sep 17 00:00:00 2001 From: Jeremy Paige Date: Thu, 1 Apr 2021 20:54:51 -0700 Subject: [PATCH 1/2] Address CVE-2021-23336 for urlparse --- Lib/cgi.py | 23 +++++++++++++++-------- Lib/test/test_cgi.py | 34 +++++++++++++++++++++++++++++----- Lib/test/test_urlparse.py | 24 ++++++++++++++++++++++++ Lib/urlparse.py | 22 +++++++++++++++++----- 4 files changed, 85 insertions(+), 18 deletions(-) diff --git a/Lib/cgi.py b/Lib/cgi.py index 5b903e0347739c..cde28cc91b7883 100755 --- a/Lib/cgi.py +++ b/Lib/cgi.py @@ -121,7 +121,8 @@ def nolog(*allargs): # 0 ==> unlimited input maxlen = 0 -def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0): +def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0, + separator='&'): """Parse a query in the environment or from a file (default stdin) Arguments, all optional: @@ -140,6 +141,9 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0): strict_parsing: flag indicating what to do with parsing errors. If false (the default), errors are silently ignored. If true, errors raise a ValueError exception. + + separator: str. The symbol to use for separating the query arguments. + Defaults to &. """ if fp is None: fp = sys.stdin @@ -148,7 +152,7 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0): if environ['REQUEST_METHOD'] == 'POST': ctype, pdict = parse_header(environ['CONTENT_TYPE']) if ctype == 'multipart/form-data': - return parse_multipart(fp, pdict) + return parse_multipart(fp, pdict, separator=separator) elif ctype == 'application/x-www-form-urlencoded': clength = int(environ['CONTENT_LENGTH']) if maxlen and clength > maxlen: @@ -171,7 +175,7 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0): else: qs = "" environ['QUERY_STRING'] = qs # XXX Shouldn't, really - return urlparse.parse_qs(qs, keep_blank_values, strict_parsing) + return urlparse.parse_qs(qs, keep_blank_values, strict_parsing, separator=separator) # parse query string function called from urlparse, @@ -191,7 +195,7 @@ def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None): return urlparse.parse_qsl(qs, keep_blank_values, strict_parsing, max_num_fields) -def parse_multipart(fp, pdict): +def parse_multipart(fp, pdict, separator='&'): """Parse multipart input. Arguments: @@ -395,7 +399,7 @@ class FieldStorage: def __init__(self, fp=None, headers=None, outerboundary="", environ=os.environ, keep_blank_values=0, strict_parsing=0, - max_num_fields=None): + max_num_fields=None, separator='&'): """Constructor. Read multipart/* until last part. Arguments, all optional: @@ -430,6 +434,7 @@ def __init__(self, fp=None, headers=None, outerboundary="", self.keep_blank_values = keep_blank_values self.strict_parsing = strict_parsing self.max_num_fields = max_num_fields + self.separator = separator if 'REQUEST_METHOD' in environ: method = environ['REQUEST_METHOD'].upper() self.qs_on_post = None @@ -613,7 +618,8 @@ def read_urlencoded(self): if self.qs_on_post: qs += '&' + self.qs_on_post query = urlparse.parse_qsl(qs, self.keep_blank_values, - self.strict_parsing, self.max_num_fields) + self.strict_parsing, self.max_num_fields, + self.separator) self.list = [MiniFieldStorage(key, value) for key, value in query] self.skip_lines() @@ -629,7 +635,8 @@ def read_multi(self, environ, keep_blank_values, strict_parsing): query = urlparse.parse_qsl(self.qs_on_post, self.keep_blank_values, self.strict_parsing, - self.max_num_fields) + self.max_num_fields, + self.separator) self.list.extend(MiniFieldStorage(key, value) for key, value in query) FieldStorageClass = None @@ -649,7 +656,7 @@ def read_multi(self, environ, keep_blank_values, strict_parsing): headers = rfc822.Message(self.fp) part = klass(self.fp, headers, ib, environ, keep_blank_values, strict_parsing, - max_num_fields) + max_num_fields, self.separator) if max_num_fields is not None: max_num_fields -= 1 diff --git a/Lib/test/test_cgi.py b/Lib/test/test_cgi.py index 743c2afbd4cd24..597260275337f0 100644 --- a/Lib/test/test_cgi.py +++ b/Lib/test/test_cgi.py @@ -61,12 +61,9 @@ def do_test(buf, method): ("", ValueError("bad query field: ''")), ("&", ValueError("bad query field: ''")), ("&&", ValueError("bad query field: ''")), - (";", ValueError("bad query field: ''")), - (";&;", ValueError("bad query field: ''")), # Should the next few really be valid? ("=", {}), ("=&=", {}), - ("=;=", {}), # This rest seem to make sense ("=a", {'': ['a']}), ("&=a", ValueError("bad query field: ''")), @@ -81,8 +78,6 @@ def do_test(buf, method): ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}), ("a=a+b&a=b+a", {'a': ['a b', 'b a']}), ("x=1&y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}), - ("x=1;y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}), - ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}), ("Hbc5161168c542333633315dee1182227:key_store_seqid=400006&cuyer=r&view=bustomer&order_id=0bb2e248638833d48cb7fed300000f1b&expire=964546263&lobale=en-US&kid=130003.300038&ss=env", {'Hbc5161168c542333633315dee1182227:key_store_seqid': ['400006'], 'cuyer': ['r'], @@ -104,6 +99,18 @@ def do_test(buf, method): }) ] +parse_semicolon_test_cases = [ + ("x=1;y=2.0", {'x': ['1'], 'y': ['2.0']}), + ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}), + (";", ValueError("bad query field: ''")), + (";;", ValueError("bad query field: ''")), + ("=;a", ValueError("bad query field: 'a'")), + (";b=a", ValueError("bad query field: ''")), + ("b;=a", ValueError("bad query field: 'b'")), + ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}), + ("a=a+b;a=b+a", {'a': ['a b', 'b a']}), +] + def first_elts(list): return map(lambda x:x[0], list) @@ -177,6 +184,23 @@ def test_strict(self): self.assertItemsEqual(sd.items(), first_second_elts(expect.items())) + def test_separator(self): + for orig, expect in parse_semicolon_test_cases: + env = {'QUERY_STRING': orig} + try: + fs = cgi.FieldStorage(separator=';', environ=env, strict_parsing=True) + except ValueError as ve: + self.assertEqual(type(ve), type(expect)) + self.assertEqual(ve.args, expect.args) + else: + for key in expect.keys(): + expect_val = expect[key] + self.assertIn(key, fs) + if len(expect_val) > 1: + self.assertEqual(fs.getvalue(key), expect_val) + else: + self.assertEqual(fs.getvalue(key), expect_val[0]) + def test_weird_formcontentdict(self): # Test the weird FormContentDict classes env = {'QUERY_STRING': "x=1&y=2.0&z=2-3.%2b0&1=1abc"} diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 86c4a0595c4f6b..3c7fc9217f576c 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -24,6 +24,13 @@ ("&a=b", [('a', 'b')]), ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]), ("a=1&a=2", [('a', '1'), ('a', '2')]), + (";a=b", [(';a', 'b')]), + ("a=a+b;b=b+c", [('a', 'a b;b=b c')]), + (b";a=b", [(b';a', b'b')]), + (b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]), +] + +parse_qsl_semicolon_cases = [ (";", []), (";;", []), (";a=b", [('a', 'b')]), @@ -57,6 +64,13 @@ (b"&a=b", {b'a': [b'b']}), (b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}), (b"a=1&a=2", {b'a': [b'1', b'2']}), + (";a=b", {';a': ['b']}), + ("a=a+b;b=b+c", {'a': ['a b;b=b c']}), + (b";a=b", {b';a': [b'b']}), + (b"a=a+b;b=b+c", {b'a': [b'a b;b=b c']}), +] + +parse_qs_semicolon_cases = [ (";", {}), (";;", {}), (";a=b", {'a': ['b']}), @@ -141,6 +155,16 @@ def test_qs(self): self.assertEqual(result, expect_without_blanks, "Error parsing %r" % orig) + def test_parse_qsl_separator(self): + for orig, expect in parse_qsl_semicolon_cases: + result = urlparse.parse_qsl(orig, separator=';') + self.assertEqual(result, expect, "Error parsing %r" % orig) + + def test_parse_qs_separator(self): + for orig, expect in parse_qs_semicolon_cases: + result = urlparse.parse_qs(orig, separator=';') + self.assertEqual(result, expect, "Error parsing %r" % orig) + def test_roundtrips(self): testcases = [ ('file:///tmp/junk.txt', diff --git a/Lib/urlparse.py b/Lib/urlparse.py index 798b467b605f73..316dc3c6ebec49 100644 --- a/Lib/urlparse.py +++ b/Lib/urlparse.py @@ -382,7 +382,8 @@ def unquote(s): append(item) return ''.join(res) -def parse_qs(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None): +def parse_qs(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None, + separator='&'): """Parse a query given as a string argument. Arguments: @@ -402,17 +403,21 @@ def parse_qs(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None): max_num_fields: int. If set, then throws a ValueError if there are more than n fields read by parse_qsl(). + + separator: str. The symbol to use for separating the query arguments. + Defaults to &. """ dict = {} for name, value in parse_qsl(qs, keep_blank_values, strict_parsing, - max_num_fields): + max_num_fields, separator): if name in dict: dict[name].append(value) else: dict[name] = [value] return dict -def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None): +def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None, + separator='&'): """Parse a query given as a string argument. Arguments: @@ -432,17 +437,24 @@ def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None): max_num_fields: int. If set, then throws a ValueError if there are more than n fields read by parse_qsl(). + separator: str. The symbol to use for separating the query arguments. + Defaults to &. + Returns a list, as G-d intended. """ + if not separator or (not isinstance(separator, str) + and not isinstance(separator, bytes)): + raise ValueError("Separator must be of type string or bytes.") + # If max_num_fields is defined then check that the number of fields # is less than max_num_fields. This prevents a memory exhaustion DOS # attack via post bodies with many fields. if max_num_fields is not None: - num_fields = 1 + qs.count('&') + qs.count(';') + num_fields = 1 + qs.count(separator) if max_num_fields < num_fields: raise ValueError('Max number of fields exceeded') - pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] + pairs = [s for s in qs.split(separator)] r = [] for name_value in pairs: if not name_value and not strict_parsing: From 05fa1caf6c635cfed8ce7d34e83df81842c3400d Mon Sep 17 00:00:00 2001 From: Jeremy Paige Date: Tue, 6 Apr 2021 21:13:35 -0700 Subject: [PATCH 2/2] PY_VERSION reflects platform version --- Include/patchlevel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/patchlevel.h b/Include/patchlevel.h index 0ce6313fa94131..9a9b4002e72db8 100644 --- a/Include/patchlevel.h +++ b/Include/patchlevel.h @@ -27,7 +27,7 @@ #define PY_RELEASE_SERIAL 0 /* Version as a string */ -#define PY_VERSION "2.7.18" +#define PY_VERSION "2.7.18.4" /*--end constants--*/ /* Subversion Revision number of this file (not of the repository). Empty