From dbdd1349d1972e9a3c788c2af060574c1e36323c Mon Sep 17 00:00:00 2001
From: Jeremy Paige <jeremyp@activestate.com>
Date: Thu, 1 Apr 2021 20:54:51 -0700
Subject: [PATCH 1/2] Address CVE-2021-23336 for urlparse

---
 Lib/cgi.py                | 23 +++++++++++++++--------
 Lib/test/test_cgi.py      | 34 +++++++++++++++++++++++++++++-----
 Lib/test/test_urlparse.py | 24 ++++++++++++++++++++++++
 Lib/urlparse.py           | 22 +++++++++++++++++-----
 4 files changed, 85 insertions(+), 18 deletions(-)

diff --git a/Lib/cgi.py b/Lib/cgi.py
index 5b903e0347739c..cde28cc91b7883 100755
--- a/Lib/cgi.py
+++ b/Lib/cgi.py
@@ -121,7 +121,8 @@ def nolog(*allargs):
 # 0 ==> unlimited input
 maxlen = 0
 
-def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
+def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0,
+          separator='&'):
     """Parse a query in the environment or from a file (default stdin)
 
         Arguments, all optional:
@@ -140,6 +141,9 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
         strict_parsing: flag indicating what to do with parsing errors.
             If false (the default), errors are silently ignored.
             If true, errors raise a ValueError exception.
+
+        separator: str. The symbol to use for separating the query arguments.
+            Defaults to &.
     """
     if fp is None:
         fp = sys.stdin
@@ -148,7 +152,7 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
     if environ['REQUEST_METHOD'] == 'POST':
         ctype, pdict = parse_header(environ['CONTENT_TYPE'])
         if ctype == 'multipart/form-data':
-            return parse_multipart(fp, pdict)
+            return parse_multipart(fp, pdict, separator=separator)
         elif ctype == 'application/x-www-form-urlencoded':
             clength = int(environ['CONTENT_LENGTH'])
             if maxlen and clength > maxlen:
@@ -171,7 +175,7 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
         else:
             qs = ""
         environ['QUERY_STRING'] = qs    # XXX Shouldn't, really
-    return urlparse.parse_qs(qs, keep_blank_values, strict_parsing)
+    return urlparse.parse_qs(qs, keep_blank_values, strict_parsing, separator=separator)
 
 
 # parse query string function called from urlparse,
@@ -191,7 +195,7 @@ def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None):
     return urlparse.parse_qsl(qs, keep_blank_values, strict_parsing,
                               max_num_fields)
 
-def parse_multipart(fp, pdict):
+def parse_multipart(fp, pdict, separator='&'):
     """Parse multipart input.
 
     Arguments:
@@ -395,7 +399,7 @@ class FieldStorage:
 
     def __init__(self, fp=None, headers=None, outerboundary="",
                  environ=os.environ, keep_blank_values=0, strict_parsing=0,
-                 max_num_fields=None):
+                 max_num_fields=None, separator='&'):
         """Constructor.  Read multipart/* until last part.
 
         Arguments, all optional:
@@ -430,6 +434,7 @@ def __init__(self, fp=None, headers=None, outerboundary="",
         self.keep_blank_values = keep_blank_values
         self.strict_parsing = strict_parsing
         self.max_num_fields = max_num_fields
+        self.separator = separator
         if 'REQUEST_METHOD' in environ:
             method = environ['REQUEST_METHOD'].upper()
         self.qs_on_post = None
@@ -613,7 +618,8 @@ def read_urlencoded(self):
         if self.qs_on_post:
             qs += '&' + self.qs_on_post
         query = urlparse.parse_qsl(qs, self.keep_blank_values,
-                                   self.strict_parsing, self.max_num_fields)
+                                   self.strict_parsing, self.max_num_fields,
+                                   self.separator)
         self.list = [MiniFieldStorage(key, value) for key, value in query]
         self.skip_lines()
 
@@ -629,7 +635,8 @@ def read_multi(self, environ, keep_blank_values, strict_parsing):
             query = urlparse.parse_qsl(self.qs_on_post,
                                        self.keep_blank_values,
                                        self.strict_parsing,
-                                       self.max_num_fields)
+                                       self.max_num_fields,
+                                       self.separator)
             self.list.extend(MiniFieldStorage(key, value)
                              for key, value in query)
             FieldStorageClass = None
@@ -649,7 +656,7 @@ def read_multi(self, environ, keep_blank_values, strict_parsing):
             headers = rfc822.Message(self.fp)
             part = klass(self.fp, headers, ib,
                          environ, keep_blank_values, strict_parsing,
-                         max_num_fields)
+                         max_num_fields, self.separator)
 
             if max_num_fields is not None:
                 max_num_fields -= 1
diff --git a/Lib/test/test_cgi.py b/Lib/test/test_cgi.py
index 743c2afbd4cd24..597260275337f0 100644
--- a/Lib/test/test_cgi.py
+++ b/Lib/test/test_cgi.py
@@ -61,12 +61,9 @@ def do_test(buf, method):
     ("", ValueError("bad query field: ''")),
     ("&", ValueError("bad query field: ''")),
     ("&&", ValueError("bad query field: ''")),
-    (";", ValueError("bad query field: ''")),
-    (";&;", ValueError("bad query field: ''")),
     # Should the next few really be valid?
     ("=", {}),
     ("=&=", {}),
-    ("=;=", {}),
     # This rest seem to make sense
     ("=a", {'': ['a']}),
     ("&=a", ValueError("bad query field: ''")),
@@ -81,8 +78,6 @@ def do_test(buf, method):
     ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
     ("a=a+b&a=b+a", {'a': ['a b', 'b a']}),
     ("x=1&y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
-    ("x=1;y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
-    ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
     ("Hbc5161168c542333633315dee1182227:key_store_seqid=400006&cuyer=r&view=bustomer&order_id=0bb2e248638833d48cb7fed300000f1b&expire=964546263&lobale=en-US&kid=130003.300038&ss=env",
      {'Hbc5161168c542333633315dee1182227:key_store_seqid': ['400006'],
       'cuyer': ['r'],
@@ -104,6 +99,18 @@ def do_test(buf, method):
       })
     ]
 
+parse_semicolon_test_cases = [
+    ("x=1;y=2.0", {'x': ['1'], 'y': ['2.0']}),
+    ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
+    (";", ValueError("bad query field: ''")),
+    (";;", ValueError("bad query field: ''")),
+    ("=;a", ValueError("bad query field: 'a'")),
+    (";b=a", ValueError("bad query field: ''")),
+    ("b;=a", ValueError("bad query field: 'b'")),
+    ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
+    ("a=a+b;a=b+a", {'a': ['a b', 'b a']}),
+]
+
 def first_elts(list):
     return map(lambda x:x[0], list)
 
@@ -177,6 +184,23 @@ def test_strict(self):
                         self.assertItemsEqual(sd.items(),
                                                 first_second_elts(expect.items()))
 
+    def test_separator(self):
+        for orig, expect in parse_semicolon_test_cases:
+            env = {'QUERY_STRING': orig}
+            try:
+                fs = cgi.FieldStorage(separator=';', environ=env, strict_parsing=True)
+            except ValueError as ve:
+                self.assertEqual(type(ve), type(expect))
+                self.assertEqual(ve.args, expect.args)
+            else:
+                for key in expect.keys():
+                    expect_val = expect[key]
+                    self.assertIn(key, fs)
+                    if len(expect_val) > 1:
+                        self.assertEqual(fs.getvalue(key), expect_val)
+                    else:
+                        self.assertEqual(fs.getvalue(key), expect_val[0])
+
     def test_weird_formcontentdict(self):
         # Test the weird FormContentDict classes
         env = {'QUERY_STRING': "x=1&y=2.0&z=2-3.%2b0&1=1abc"}
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index 86c4a0595c4f6b..3c7fc9217f576c 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -24,6 +24,13 @@
     ("&a=b", [('a', 'b')]),
     ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
     ("a=1&a=2", [('a', '1'), ('a', '2')]),
+    (";a=b", [(';a', 'b')]),
+    ("a=a+b;b=b+c", [('a', 'a b;b=b c')]),
+    (b";a=b", [(b';a', b'b')]),
+    (b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]),
+]
+
+parse_qsl_semicolon_cases = [
     (";", []),
     (";;", []),
     (";a=b", [('a', 'b')]),
@@ -57,6 +64,13 @@
     (b"&a=b", {b'a': [b'b']}),
     (b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
     (b"a=1&a=2", {b'a': [b'1', b'2']}),
+    (";a=b", {';a': ['b']}),
+    ("a=a+b;b=b+c", {'a': ['a b;b=b c']}),
+    (b";a=b", {b';a': [b'b']}),
+    (b"a=a+b;b=b+c", {b'a': [b'a b;b=b c']}),
+]
+
+parse_qs_semicolon_cases = [
     (";", {}),
     (";;", {}),
     (";a=b", {'a': ['b']}),
@@ -141,6 +155,16 @@ def test_qs(self):
             self.assertEqual(result, expect_without_blanks,
                     "Error parsing %r" % orig)
 
+    def test_parse_qsl_separator(self):
+        for orig, expect in parse_qsl_semicolon_cases:
+            result = urlparse.parse_qsl(orig, separator=';')
+            self.assertEqual(result, expect, "Error parsing %r" % orig)
+
+    def test_parse_qs_separator(self):
+        for orig, expect in parse_qs_semicolon_cases:
+            result = urlparse.parse_qs(orig, separator=';')
+            self.assertEqual(result, expect, "Error parsing %r" % orig)
+
     def test_roundtrips(self):
         testcases = [
             ('file:///tmp/junk.txt',
diff --git a/Lib/urlparse.py b/Lib/urlparse.py
index 798b467b605f73..316dc3c6ebec49 100644
--- a/Lib/urlparse.py
+++ b/Lib/urlparse.py
@@ -382,7 +382,8 @@ def unquote(s):
             append(item)
     return ''.join(res)
 
-def parse_qs(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None):
+def parse_qs(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None,
+             separator='&'):
     """Parse a query given as a string argument.
 
         Arguments:
@@ -402,17 +403,21 @@ def parse_qs(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None):
 
         max_num_fields: int. If set, then throws a ValueError if there
             are more than n fields read by parse_qsl().
+
+        separator: str. The symbol to use for separating the query arguments.
+            Defaults to &.
     """
     dict = {}
     for name, value in parse_qsl(qs, keep_blank_values, strict_parsing,
-                                 max_num_fields):
+                                 max_num_fields, separator):
         if name in dict:
             dict[name].append(value)
         else:
             dict[name] = [value]
     return dict
 
-def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None):
+def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None,
+              separator='&'):
     """Parse a query given as a string argument.
 
     Arguments:
@@ -432,17 +437,24 @@ def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None):
     max_num_fields: int. If set, then throws a ValueError if there
         are more than n fields read by parse_qsl().
 
+    separator: str. The symbol to use for separating the query arguments.
+        Defaults to &.
+
     Returns a list, as G-d intended.
     """
+    if not separator or (not isinstance(separator, str)
+            and not isinstance(separator, bytes)):
+        raise ValueError("Separator must be of type string or bytes.")
+
     # If max_num_fields is defined then check that the number of fields
     # is less than max_num_fields. This prevents a memory exhaustion DOS
     # attack via post bodies with many fields.
     if max_num_fields is not None:
-        num_fields = 1 + qs.count('&') + qs.count(';')
+        num_fields = 1 + qs.count(separator)
         if max_num_fields < num_fields:
             raise ValueError('Max number of fields exceeded')
 
-    pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
+    pairs = [s for s in qs.split(separator)]
     r = []
     for name_value in pairs:
         if not name_value and not strict_parsing:

From 05fa1caf6c635cfed8ce7d34e83df81842c3400d Mon Sep 17 00:00:00 2001
From: Jeremy Paige <jeremyp@activestate.com>
Date: Tue, 6 Apr 2021 21:13:35 -0700
Subject: [PATCH 2/2] PY_VERSION reflects platform version

---
 Include/patchlevel.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Include/patchlevel.h b/Include/patchlevel.h
index 0ce6313fa94131..9a9b4002e72db8 100644
--- a/Include/patchlevel.h
+++ b/Include/patchlevel.h
@@ -27,7 +27,7 @@
 #define PY_RELEASE_SERIAL	0
 
 /* Version as a string */
-#define PY_VERSION      	"2.7.18"
+#define PY_VERSION      	"2.7.18.4"
 /*--end constants--*/
 
 /* Subversion Revision number of this file (not of the repository). Empty