499 lines
22 KiB
Diff
499 lines
22 KiB
Diff
From d0d4d30882fe3ab9b1badbecf5d15d94326fd13e Mon Sep 17 00:00:00 2001
|
|
From: Senthil Kumaran <senthil@uthcode.com>
|
|
Date: Mon, 15 Feb 2021 10:34:14 -0800
|
|
Subject: [PATCH] [3.7] bpo-42967: only use '&' as a query string separator
|
|
(GH-24297) (GH-24531)
|
|
MIME-Version: 1.0
|
|
Content-Type: text/plain; charset=UTF-8
|
|
Content-Transfer-Encoding: 8bit
|
|
|
|
bpo-42967: [security] Address a web cache-poisoning issue reported in
|
|
urllib.parse.parse_qsl().
|
|
|
|
urllib.parse will only us "&" as query string separator by default
|
|
instead of both ";" and "&" as allowed in earlier versions. An optional
|
|
argument seperator with default value "&" is added to specify the
|
|
separator.
|
|
|
|
Co-authored-by: Éric Araujo <merwok@netwok.org>
|
|
Co-authored-by: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com>
|
|
Co-authored-by: Adam Goldschmidt <adamgold7@gmail.com>
|
|
(cherry picked from commit fcbe0cb04d35189401c0c880ebfb4311e952d776)
|
|
---
|
|
Doc/library/cgi.rst | 9 ++-
|
|
Doc/library/urllib.parse.rst | 23 ++++++-
|
|
Doc/whatsnew/3.6.rst | 13 ++++
|
|
Doc/whatsnew/3.7.rst | 13 ++++
|
|
Lib/cgi.py | 23 ++++---
|
|
Lib/test/test_cgi.py | 29 ++++++--
|
|
Lib/test/test_urlparse.py | 68 +++++++++++++------
|
|
Lib/urllib/parse.py | 19 ++++--
|
|
.../2021-02-14-15-59-16.bpo-42967.YApqDS.rst | 1 +
|
|
9 files changed, 152 insertions(+), 46 deletions(-)
|
|
create mode 100644 Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst
|
|
|
|
diff --git a/Doc/library/cgi.rst b/Doc/library/cgi.rst
|
|
index 0b1aead9ddf1f..f0ec7e8cc6d04 100644
|
|
--- a/Doc/library/cgi.rst
|
|
+++ b/Doc/library/cgi.rst
|
|
@@ -277,10 +277,10 @@ These are useful if you want more control, or if you want to employ some of the
|
|
algorithms implemented in this module in other circumstances.
|
|
|
|
|
|
-.. function:: parse(fp=None, environ=os.environ, keep_blank_values=False, strict_parsing=False)
|
|
+.. function:: parse(fp=None, environ=os.environ, keep_blank_values=False, strict_parsing=False, separator="&")
|
|
|
|
Parse a query in the environment or from a file (the file defaults to
|
|
- ``sys.stdin``). The *keep_blank_values* and *strict_parsing* parameters are
|
|
+ ``sys.stdin``). The *keep_blank_values*, *strict_parsing* and *separator* parameters are
|
|
passed to :func:`urllib.parse.parse_qs` unchanged.
|
|
|
|
|
|
@@ -296,7 +296,7 @@ algorithms implemented in this module in other circumstances.
|
|
instead. It is maintained here only for backward compatibility.
|
|
|
|
|
|
-.. function:: parse_multipart(fp, pdict, encoding="utf-8", errors="replace")
|
|
+.. function:: parse_multipart(fp, pdict, encoding="utf-8", errors="replace", separator="&")
|
|
|
|
Parse input of type :mimetype:`multipart/form-data` (for file uploads).
|
|
Arguments are *fp* for the input file, *pdict* for a dictionary containing
|
|
@@ -315,6 +315,9 @@ algorithms implemented in this module in other circumstances.
|
|
Added the *encoding* and *errors* parameters. For non-file fields, the
|
|
value is now a list of strings, not bytes.
|
|
|
|
+ .. versionchanged:: 3.7.10
|
|
+ Added the *separator* parameter.
|
|
+
|
|
|
|
.. function:: parse_header(string)
|
|
|
|
diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst
|
|
index f499412144008..e79faf035b06b 100644
|
|
--- a/Doc/library/urllib.parse.rst
|
|
+++ b/Doc/library/urllib.parse.rst
|
|
@@ -165,7 +165,7 @@ or on combining URL components into a URL string.
|
|
now raise :exc:`ValueError`.
|
|
|
|
|
|
-.. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', max_num_fields=None)
|
|
+.. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', max_num_fields=None, separator='&')
|
|
|
|
Parse a query string given as a string argument (data of type
|
|
:mimetype:`application/x-www-form-urlencoded`). Data are returned as a
|
|
@@ -190,6 +190,9 @@ or on combining URL components into a URL string.
|
|
read. If set, then throws a :exc:`ValueError` if there are more than
|
|
*max_num_fields* fields read.
|
|
|
|
+ The optional argument *separator* is the symbol to use for separating the
|
|
+ query arguments. It defaults to ``&``.
|
|
+
|
|
Use the :func:`urllib.parse.urlencode` function (with the ``doseq``
|
|
parameter set to ``True``) to convert such dictionaries into query
|
|
strings.
|
|
@@ -200,8 +203,14 @@ or on combining URL components into a URL string.
|
|
.. versionchanged:: 3.7.2
|
|
Added *max_num_fields* parameter.
|
|
|
|
+ .. versionchanged:: 3.7.10
|
|
+ Added *separator* parameter with the default value of ``&``. Python
|
|
+ versions earlier than Python 3.7.10 allowed using both ``;`` and ``&`` as
|
|
+ query parameter separator. This has been changed to allow only a single
|
|
+ separator key, with ``&`` as the default separator.
|
|
+
|
|
|
|
-.. function:: parse_qsl(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', max_num_fields=None)
|
|
+.. function:: parse_qsl(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', max_num_fields=None, separator='&')
|
|
|
|
Parse a query string given as a string argument (data of type
|
|
:mimetype:`application/x-www-form-urlencoded`). Data are returned as a list of
|
|
@@ -225,6 +234,9 @@ or on combining URL components into a URL string.
|
|
read. If set, then throws a :exc:`ValueError` if there are more than
|
|
*max_num_fields* fields read.
|
|
|
|
+ The optional argument *separator* is the symbol to use for separating the
|
|
+ query arguments. It defaults to ``&``.
|
|
+
|
|
Use the :func:`urllib.parse.urlencode` function to convert such lists of pairs into
|
|
query strings.
|
|
|
|
@@ -234,6 +246,13 @@ or on combining URL components into a URL string.
|
|
.. versionchanged:: 3.7.2
|
|
Added *max_num_fields* parameter.
|
|
|
|
+ .. versionchanged:: 3.7.10
|
|
+ Added *separator* parameter with the default value of ``&``. Python
|
|
+ versions earlier than Python 3.7.10 allowed using both ``;`` and ``&`` as
|
|
+ query parameter separator. This has been changed to allow only a single
|
|
+ separator key, with ``&`` as the default separator.
|
|
+
|
|
+
|
|
.. function:: urlunparse(parts)
|
|
|
|
Construct a URL from a tuple as returned by ``urlparse()``. The *parts*
|
|
diff --git a/Doc/whatsnew/3.6.rst b/Doc/whatsnew/3.6.rst
|
|
index 04c1f7e71db32..4409a3a596267 100644
|
|
--- a/Doc/whatsnew/3.6.rst
|
|
+++ b/Doc/whatsnew/3.6.rst
|
|
@@ -2443,3 +2443,16 @@ because of the behavior of the socket option ``SO_REUSEADDR`` in UDP. For more
|
|
details, see the documentation for ``loop.create_datagram_endpoint()``.
|
|
(Contributed by Kyle Stanley, Antoine Pitrou, and Yury Selivanov in
|
|
:issue:`37228`.)
|
|
+
|
|
+Notable changes in Python 3.6.13
|
|
+================================
|
|
+
|
|
+Earlier Python versions allowed using both ``;`` and ``&`` as
|
|
+query parameter separators in :func:`urllib.parse.parse_qs` and
|
|
+:func:`urllib.parse.parse_qsl`. Due to security concerns, and to conform with
|
|
+newer W3C recommendations, this has been changed to allow only a single
|
|
+separator key, with ``&`` as the default. This change also affects
|
|
+:func:`cgi.parse` and :func:`cgi.parse_multipart` as they use the affected
|
|
+functions internally. For more details, please see their respective
|
|
+documentation.
|
|
+(Contributed by Adam Goldschmidt, Senthil Kumaran and Ken Jin in :issue:`42967`.)
|
|
diff --git a/Doc/whatsnew/3.7.rst b/Doc/whatsnew/3.7.rst
|
|
index 6dcb006924e77..25e231dcc7dfa 100644
|
|
--- a/Doc/whatsnew/3.7.rst
|
|
+++ b/Doc/whatsnew/3.7.rst
|
|
@@ -2572,3 +2572,16 @@ because of the behavior of the socket option ``SO_REUSEADDR`` in UDP. For more
|
|
details, see the documentation for ``loop.create_datagram_endpoint()``.
|
|
(Contributed by Kyle Stanley, Antoine Pitrou, and Yury Selivanov in
|
|
:issue:`37228`.)
|
|
+
|
|
+Notable changes in Python 3.7.10
|
|
+================================
|
|
+
|
|
+Earlier Python versions allowed using both ``;`` and ``&`` as
|
|
+query parameter separators in :func:`urllib.parse.parse_qs` and
|
|
+:func:`urllib.parse.parse_qsl`. Due to security concerns, and to conform with
|
|
+newer W3C recommendations, this has been changed to allow only a single
|
|
+separator key, with ``&`` as the default. This change also affects
|
|
+:func:`cgi.parse` and :func:`cgi.parse_multipart` as they use the affected
|
|
+functions internally. For more details, please see their respective
|
|
+documentation.
|
|
+(Contributed by Adam Goldschmidt, Senthil Kumaran and Ken Jin in :issue:`42967`.)
|
|
diff --git a/Lib/cgi.py b/Lib/cgi.py
|
|
index 5a001667efca8..51afead1b3136 100755
|
|
--- a/Lib/cgi.py
|
|
+++ b/Lib/cgi.py
|
|
@@ -117,7 +117,8 @@ def closelog():
|
|
# 0 ==> unlimited input
|
|
maxlen = 0
|
|
|
|
-def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
|
|
+def parse(fp=None, environ=os.environ, keep_blank_values=0,
|
|
+ strict_parsing=0, separator='&'):
|
|
"""Parse a query in the environment or from a file (default stdin)
|
|
|
|
Arguments, all optional:
|
|
@@ -136,6 +137,9 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
|
|
strict_parsing: flag indicating what to do with parsing errors.
|
|
If false (the default), errors are silently ignored.
|
|
If true, errors raise a ValueError exception.
|
|
+
|
|
+ separator: str. The symbol to use for separating the query arguments.
|
|
+ Defaults to &.
|
|
"""
|
|
if fp is None:
|
|
fp = sys.stdin
|
|
@@ -156,7 +160,7 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
|
|
if environ['REQUEST_METHOD'] == 'POST':
|
|
ctype, pdict = parse_header(environ['CONTENT_TYPE'])
|
|
if ctype == 'multipart/form-data':
|
|
- return parse_multipart(fp, pdict)
|
|
+ return parse_multipart(fp, pdict, separator=separator)
|
|
elif ctype == 'application/x-www-form-urlencoded':
|
|
clength = int(environ['CONTENT_LENGTH'])
|
|
if maxlen and clength > maxlen:
|
|
@@ -180,7 +184,7 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
|
|
qs = ""
|
|
environ['QUERY_STRING'] = qs # XXX Shouldn't, really
|
|
return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing,
|
|
- encoding=encoding)
|
|
+ encoding=encoding, separator=separator)
|
|
|
|
|
|
# parse query string function called from urlparse,
|
|
@@ -198,7 +202,7 @@ def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):
|
|
DeprecationWarning, 2)
|
|
return urllib.parse.parse_qsl(qs, keep_blank_values, strict_parsing)
|
|
|
|
-def parse_multipart(fp, pdict, encoding="utf-8", errors="replace"):
|
|
+def parse_multipart(fp, pdict, encoding="utf-8", errors="replace", separator='&'):
|
|
"""Parse multipart input.
|
|
|
|
Arguments:
|
|
@@ -222,7 +226,7 @@ def parse_multipart(fp, pdict, encoding="utf-8", errors="replace"):
|
|
except KeyError:
|
|
pass
|
|
fs = FieldStorage(fp, headers=headers, encoding=encoding, errors=errors,
|
|
- environ={'REQUEST_METHOD': 'POST'})
|
|
+ environ={'REQUEST_METHOD': 'POST'}, separator=separator)
|
|
return {k: fs.getlist(k) for k in fs}
|
|
|
|
def _parseparam(s):
|
|
@@ -332,7 +336,7 @@ class FieldStorage:
|
|
def __init__(self, fp=None, headers=None, outerboundary=b'',
|
|
environ=os.environ, keep_blank_values=0, strict_parsing=0,
|
|
limit=None, encoding='utf-8', errors='replace',
|
|
- max_num_fields=None):
|
|
+ max_num_fields=None, separator='&'):
|
|
"""Constructor. Read multipart/* until last part.
|
|
|
|
Arguments, all optional:
|
|
@@ -380,6 +384,7 @@ def __init__(self, fp=None, headers=None, outerboundary=b'',
|
|
self.keep_blank_values = keep_blank_values
|
|
self.strict_parsing = strict_parsing
|
|
self.max_num_fields = max_num_fields
|
|
+ self.separator = separator
|
|
if 'REQUEST_METHOD' in environ:
|
|
method = environ['REQUEST_METHOD'].upper()
|
|
self.qs_on_post = None
|
|
@@ -606,7 +611,7 @@ def read_urlencoded(self):
|
|
query = urllib.parse.parse_qsl(
|
|
qs, self.keep_blank_values, self.strict_parsing,
|
|
encoding=self.encoding, errors=self.errors,
|
|
- max_num_fields=self.max_num_fields)
|
|
+ max_num_fields=self.max_num_fields, separator=self.separator)
|
|
self.list = [MiniFieldStorage(key, value) for key, value in query]
|
|
self.skip_lines()
|
|
|
|
@@ -622,7 +627,7 @@ def read_multi(self, environ, keep_blank_values, strict_parsing):
|
|
query = urllib.parse.parse_qsl(
|
|
self.qs_on_post, self.keep_blank_values, self.strict_parsing,
|
|
encoding=self.encoding, errors=self.errors,
|
|
- max_num_fields=self.max_num_fields)
|
|
+ max_num_fields=self.max_num_fields, separator=self.separator)
|
|
self.list.extend(MiniFieldStorage(key, value) for key, value in query)
|
|
|
|
klass = self.FieldStorageClass or self.__class__
|
|
@@ -666,7 +671,7 @@ def read_multi(self, environ, keep_blank_values, strict_parsing):
|
|
else self.limit - self.bytes_read
|
|
part = klass(self.fp, headers, ib, environ, keep_blank_values,
|
|
strict_parsing, limit,
|
|
- self.encoding, self.errors, max_num_fields)
|
|
+ self.encoding, self.errors, max_num_fields, self.separator)
|
|
|
|
if max_num_fields is not None:
|
|
max_num_fields -= 1
|
|
diff --git a/Lib/test/test_cgi.py b/Lib/test/test_cgi.py
|
|
index 220268e14f032..c2a00a2e45d79 100644
|
|
--- a/Lib/test/test_cgi.py
|
|
+++ b/Lib/test/test_cgi.py
|
|
@@ -55,12 +55,9 @@ def do_test(buf, method):
|
|
("", ValueError("bad query field: ''")),
|
|
("&", ValueError("bad query field: ''")),
|
|
("&&", ValueError("bad query field: ''")),
|
|
- (";", ValueError("bad query field: ''")),
|
|
- (";&;", ValueError("bad query field: ''")),
|
|
# Should the next few really be valid?
|
|
("=", {}),
|
|
("=&=", {}),
|
|
- ("=;=", {}),
|
|
# This rest seem to make sense
|
|
("=a", {'': ['a']}),
|
|
("&=a", ValueError("bad query field: ''")),
|
|
@@ -75,8 +72,6 @@ def do_test(buf, method):
|
|
("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
|
|
("a=a+b&a=b+a", {'a': ['a b', 'b a']}),
|
|
("x=1&y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
|
|
- ("x=1;y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
|
|
- ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
|
|
("Hbc5161168c542333633315dee1182227:key_store_seqid=400006&cuyer=r&view=bustomer&order_id=0bb2e248638833d48cb7fed300000f1b&expire=964546263&lobale=en-US&kid=130003.300038&ss=env",
|
|
{'Hbc5161168c542333633315dee1182227:key_store_seqid': ['400006'],
|
|
'cuyer': ['r'],
|
|
@@ -212,6 +207,30 @@ def test_strict(self):
|
|
else:
|
|
self.assertEqual(fs.getvalue(key), expect_val[0])
|
|
|
|
+ def test_separator(self):
|
|
+ parse_semicolon = [
|
|
+ ("x=1;y=2.0", {'x': ['1'], 'y': ['2.0']}),
|
|
+ ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
|
|
+ (";", ValueError("bad query field: ''")),
|
|
+ (";;", ValueError("bad query field: ''")),
|
|
+ ("=;a", ValueError("bad query field: 'a'")),
|
|
+ (";b=a", ValueError("bad query field: ''")),
|
|
+ ("b;=a", ValueError("bad query field: 'b'")),
|
|
+ ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
|
|
+ ("a=a+b;a=b+a", {'a': ['a b', 'b a']}),
|
|
+ ]
|
|
+ for orig, expect in parse_semicolon:
|
|
+ env = {'QUERY_STRING': orig}
|
|
+ fs = cgi.FieldStorage(separator=';', environ=env)
|
|
+ if isinstance(expect, dict):
|
|
+ for key in expect.keys():
|
|
+ expect_val = expect[key]
|
|
+ self.assertIn(key, fs)
|
|
+ if len(expect_val) > 1:
|
|
+ self.assertEqual(fs.getvalue(key), expect_val)
|
|
+ else:
|
|
+ self.assertEqual(fs.getvalue(key), expect_val[0])
|
|
+
|
|
def test_log(self):
|
|
cgi.log("Testing")
|
|
|
|
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
|
|
index 68f633ca3a7db..e3088b2f39bd7 100644
|
|
--- a/Lib/test/test_urlparse.py
|
|
+++ b/Lib/test/test_urlparse.py
|
|
@@ -32,16 +32,10 @@
|
|
(b"&a=b", [(b'a', b'b')]),
|
|
(b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
|
|
(b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
|
|
- (";", []),
|
|
- (";;", []),
|
|
- (";a=b", [('a', 'b')]),
|
|
- ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
|
|
- ("a=1;a=2", [('a', '1'), ('a', '2')]),
|
|
- (b";", []),
|
|
- (b";;", []),
|
|
- (b";a=b", [(b'a', b'b')]),
|
|
- (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
|
|
- (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
|
|
+ (";a=b", [(';a', 'b')]),
|
|
+ ("a=a+b;b=b+c", [('a', 'a b;b=b c')]),
|
|
+ (b";a=b", [(b';a', b'b')]),
|
|
+ (b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]),
|
|
]
|
|
|
|
# Each parse_qs testcase is a two-tuple that contains
|
|
@@ -68,16 +62,10 @@
|
|
(b"&a=b", {b'a': [b'b']}),
|
|
(b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
|
|
(b"a=1&a=2", {b'a': [b'1', b'2']}),
|
|
- (";", {}),
|
|
- (";;", {}),
|
|
- (";a=b", {'a': ['b']}),
|
|
- ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
|
|
- ("a=1;a=2", {'a': ['1', '2']}),
|
|
- (b";", {}),
|
|
- (b";;", {}),
|
|
- (b";a=b", {b'a': [b'b']}),
|
|
- (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
|
|
- (b"a=1;a=2", {b'a': [b'1', b'2']}),
|
|
+ (";a=b", {';a': ['b']}),
|
|
+ ("a=a+b;b=b+c", {'a': ['a b;b=b c']}),
|
|
+ (b";a=b", {b';a': [b'b']}),
|
|
+ (b"a=a+b;b=b+c", {b'a':[ b'a b;b=b c']}),
|
|
]
|
|
|
|
class UrlParseTestCase(unittest.TestCase):
|
|
@@ -884,10 +872,46 @@ def test_parse_qsl_encoding(self):
|
|
def test_parse_qsl_max_num_fields(self):
|
|
with self.assertRaises(ValueError):
|
|
urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10)
|
|
- with self.assertRaises(ValueError):
|
|
- urllib.parse.parse_qs(';'.join(['a=a']*11), max_num_fields=10)
|
|
urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10)
|
|
|
|
+ def test_parse_qs_separator(self):
|
|
+ parse_qs_semicolon_cases = [
|
|
+ (";", {}),
|
|
+ (";;", {}),
|
|
+ (";a=b", {'a': ['b']}),
|
|
+ ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
|
|
+ ("a=1;a=2", {'a': ['1', '2']}),
|
|
+ (b";", {}),
|
|
+ (b";;", {}),
|
|
+ (b";a=b", {b'a': [b'b']}),
|
|
+ (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
|
|
+ (b"a=1;a=2", {b'a': [b'1', b'2']}),
|
|
+ ]
|
|
+ for orig, expect in parse_qs_semicolon_cases:
|
|
+ with self.subTest(f"Original: {orig!r}, Expected: {expect!r}"):
|
|
+ result = urllib.parse.parse_qs(orig, separator=';')
|
|
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
|
|
+
|
|
+
|
|
+ def test_parse_qsl_separator(self):
|
|
+ parse_qsl_semicolon_cases = [
|
|
+ (";", []),
|
|
+ (";;", []),
|
|
+ (";a=b", [('a', 'b')]),
|
|
+ ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
|
|
+ ("a=1;a=2", [('a', '1'), ('a', '2')]),
|
|
+ (b";", []),
|
|
+ (b";;", []),
|
|
+ (b";a=b", [(b'a', b'b')]),
|
|
+ (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
|
|
+ (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
|
|
+ ]
|
|
+ for orig, expect in parse_qsl_semicolon_cases:
|
|
+ with self.subTest(f"Original: {orig!r}, Expected: {expect!r}"):
|
|
+ result = urllib.parse.parse_qsl(orig, separator=';')
|
|
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
|
|
+
|
|
+
|
|
def test_urlencode_sequences(self):
|
|
# Other tests incidentally urlencode things; test non-covered cases:
|
|
# Sequence and object values.
|
|
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
|
|
index 94df275c4677e..e67d69db3614b 100644
|
|
--- a/Lib/urllib/parse.py
|
|
+++ b/Lib/urllib/parse.py
|
|
@@ -643,7 +643,7 @@ def unquote(string, encoding='utf-8', errors='replace'):
|
|
|
|
|
|
def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
|
|
- encoding='utf-8', errors='replace', max_num_fields=None):
|
|
+ encoding='utf-8', errors='replace', max_num_fields=None, separator='&'):
|
|
"""Parse a query given as a string argument.
|
|
|
|
Arguments:
|
|
@@ -667,12 +667,15 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
|
|
max_num_fields: int. If set, then throws a ValueError if there
|
|
are more than n fields read by parse_qsl().
|
|
|
|
+ separator: str. The symbol to use for separating the query arguments.
|
|
+ Defaults to &.
|
|
+
|
|
Returns a dictionary.
|
|
"""
|
|
parsed_result = {}
|
|
pairs = parse_qsl(qs, keep_blank_values, strict_parsing,
|
|
encoding=encoding, errors=errors,
|
|
- max_num_fields=max_num_fields)
|
|
+ max_num_fields=max_num_fields, separator=separator)
|
|
for name, value in pairs:
|
|
if name in parsed_result:
|
|
parsed_result[name].append(value)
|
|
@@ -682,7 +685,7 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
|
|
|
|
|
|
def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
|
|
- encoding='utf-8', errors='replace', max_num_fields=None):
|
|
+ encoding='utf-8', errors='replace', max_num_fields=None, separator='&'):
|
|
"""Parse a query given as a string argument.
|
|
|
|
Arguments:
|
|
@@ -705,19 +708,25 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
|
|
max_num_fields: int. If set, then throws a ValueError
|
|
if there are more than n fields read by parse_qsl().
|
|
|
|
+ separator: str. The symbol to use for separating the query arguments.
|
|
+ Defaults to &.
|
|
+
|
|
Returns a list, as G-d intended.
|
|
"""
|
|
qs, _coerce_result = _coerce_args(qs)
|
|
|
|
+ if not separator or (not isinstance(separator, (str, bytes))):
|
|
+ raise ValueError("Separator must be of type string or bytes.")
|
|
+
|
|
# If max_num_fields is defined then check that the number of fields
|
|
# is less than max_num_fields. This prevents a memory exhaustion DOS
|
|
# attack via post bodies with many fields.
|
|
if max_num_fields is not None:
|
|
- num_fields = 1 + qs.count('&') + qs.count(';')
|
|
+ num_fields = 1 + qs.count(separator)
|
|
if max_num_fields < num_fields:
|
|
raise ValueError('Max number of fields exceeded')
|
|
|
|
- pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
|
|
+ pairs = [s1 for s1 in qs.split(separator)]
|
|
r = []
|
|
for name_value in pairs:
|
|
if not name_value and not strict_parsing:
|