Skip to content

Commit b703873

Browse files
Merge pull request #15 from secondlife/SRV-439
SRV-439 - performance optimizations for string handling in xml formatting
2 parents a63abbe + 2432466 commit b703873

File tree

7 files changed

+295
-154
lines changed

7 files changed

+295
-154
lines changed

.github/workflows/ci.yaml

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,21 +13,23 @@ jobs:
1313
matrix:
1414
python-version: ['2.7', '3.7', '3.8', '3.10']
1515
runs-on: [ubuntu-latest]
16+
container:
17+
image: "python:${{ matrix.python-version }}-buster"
1618
env:
1719
PYTHON: ${{ matrix.python-version }}
1820
steps:
1921
- uses: actions/checkout@v3
2022
with:
2123
fetch-depth: 0 # fetch all history for setuptools_scm to be able to read tags
2224

23-
- uses: actions/setup-python@v4
24-
with:
25-
python-version: ${{ matrix.python-version }}
26-
2725
- name: Install python dependencies
2826
run: |
29-
pip install wheel build tox
30-
pip install .[dev]
27+
apt-get update
28+
apt-get -y install sudo
29+
pip install --upgrade pip
30+
sudo chown root .
31+
sudo -H pip install wheel build tox
32+
sudo -H pip install .[dev]
3133
3234
- name: Determine pyenv
3335
id: pyenv

llsd/base.py

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131

3232
ALL_CHARS = str(bytearray(range(256))) if PY2 else bytes(range(256))
3333

34+
MAX_FORMAT_DEPTH = 200
35+
MAX_PARSE_DEPTH = 200
3436

3537
class _LLSD:
3638
__metaclass__ = abc.ABCMeta
@@ -209,7 +211,7 @@ def _parse_datestr(datestr):
209211
return datetime.datetime(year, month, day, hour, minute, second, usec)
210212

211213

212-
def _bool_to_python(node):
214+
def _bool_to_python(node, depth=0):
213215
"Convert boolean node to a python object."
214216
val = node.text or ''
215217
try:
@@ -220,35 +222,35 @@ def _bool_to_python(node):
220222
return bool(val)
221223

222224

223-
def _int_to_python(node):
225+
def _int_to_python(node, depth=0):
224226
"Convert integer node to a python object."
225227
val = node.text or ''
226228
if not val.strip():
227229
return 0
228230
return int(val)
229231

230232

231-
def _real_to_python(node):
233+
def _real_to_python(node, depth=0):
232234
"Convert floating point node to a python object."
233235
val = node.text or ''
234236
if not val.strip():
235237
return 0.0
236238
return float(val)
237239

238240

239-
def _uuid_to_python(node):
241+
def _uuid_to_python(node, depth=0):
240242
"Convert uuid node to a python object."
241243
if node.text:
242244
return uuid.UUID(hex=node.text)
243245
return uuid.UUID(int=0)
244246

245247

246-
def _str_to_python(node):
248+
def _str_to_python(node, depth=0):
247249
"Convert string node to a python object."
248250
return node.text or ''
249251

250252

251-
def _bin_to_python(node):
253+
def _bin_to_python(node, depth=0):
252254
base = node.get('encoding') or 'base64'
253255
try:
254256
if base == 'base16':
@@ -267,38 +269,38 @@ def _bin_to_python(node):
267269
return LLSDParseError("Bad binary data: " + str(exc))
268270

269271

270-
def _date_to_python(node):
272+
def _date_to_python(node, depth=0):
271273
"Convert date node to a python object."
272274
val = node.text or ''
273275
if not val:
274276
val = "1970-01-01T00:00:00Z"
275277
return _parse_datestr(val)
276278

277279

278-
def _uri_to_python(node):
280+
def _uri_to_python(node, depth=0):
279281
"Convert uri node to a python object."
280282
val = node.text or ''
281283
return uri(val)
282284

283285

284-
def _map_to_python(node):
286+
def _map_to_python(node, depth=0):
285287
"Convert map node to a python object."
286288
result = {}
287289
for index in range(len(node))[::2]:
288290
if node[index].text is None:
289-
result[''] = _to_python(node[index+1])
291+
result[''] = _to_python(node[index+1], depth+1)
290292
else:
291-
result[node[index].text] = _to_python(node[index+1])
293+
result[node[index].text] = _to_python(node[index+1], depth+1)
292294
return result
293295

294296

295-
def _array_to_python(node):
297+
def _array_to_python(node, depth=0):
296298
"Convert array node to a python object."
297-
return [_to_python(child) for child in node]
299+
return [_to_python(child, depth+1) for child in node]
298300

299301

300302
NODE_HANDLERS = dict(
301-
undef=lambda x: None,
303+
undef=lambda x,y: None,
302304
boolean=_bool_to_python,
303305
integer=_int_to_python,
304306
real=_real_to_python,
@@ -312,9 +314,12 @@ def _array_to_python(node):
312314
)
313315

314316

315-
def _to_python(node):
317+
def _to_python(node, depth=0):
316318
"Convert node to a python object."
317-
return NODE_HANDLERS[node.tag](node)
319+
if depth > MAX_PARSE_DEPTH:
320+
raise LLSDParseError("Cannot parse depth of more than %d" % MAX_PARSE_DEPTH)
321+
322+
return NODE_HANDLERS[node.tag](node, depth)
318323

319324

320325
class LLSDBaseFormatter(object):

llsd/serde_binary.py

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import uuid
66

77
from llsd.base import (_LLSD, LLSDBaseParser, LLSDSerializationError, BINARY_HEADER,
8-
_str_to_bytes, binary, is_integer, is_string, uri)
8+
MAX_FORMAT_DEPTH, MAX_PARSE_DEPTH, _str_to_bytes, binary, is_integer, is_string, uri)
99

1010

1111
try:
@@ -15,14 +15,13 @@
1515
# Python 3: 'range()' is already lazy
1616
pass
1717

18-
1918
class LLSDBinaryParser(LLSDBaseParser):
2019
"""
2120
Parse application/llsd+binary to a python object.
2221
2322
See http://wiki.secondlife.com/wiki/LLSD#Binary_Serialization
2423
"""
25-
__slots__ = ['_dispatch', '_keep_binary']
24+
__slots__ = ['_dispatch', '_keep_binary', '_depth']
2625

2726
def __init__(self):
2827
super(LLSDBinaryParser, self).__init__()
@@ -63,6 +62,7 @@ def __init__(self):
6362
# entries in _dispatch.
6463
for c, func in _dispatch_dict.items():
6564
self._dispatch[ord(c)] = func
65+
self._depth = 0
6666

6767
def parse(self, something, ignore_binary = False):
6868
"""
@@ -82,6 +82,9 @@ def parse(self, something, ignore_binary = False):
8282

8383
def _parse(self):
8484
"The actual parser which is called recursively when necessary."
85+
if self._depth > MAX_PARSE_DEPTH:
86+
self._error("Parse depth exceeded maximum depth of %d." % MAX_PARSE_DEPTH)
87+
8588
cc = self._getc()
8689
try:
8790
func = self._dispatch[ord(cc)]
@@ -97,6 +100,7 @@ def _parse_map(self):
97100
count = 0
98101
cc = self._getc()
99102
key = b''
103+
self._depth += 1
100104
while (cc != b'}') and (count < size):
101105
if cc == b'k':
102106
key = self._parse_string()
@@ -110,16 +114,19 @@ def _parse_map(self):
110114
cc = self._getc()
111115
if cc != b'}':
112116
self._error("invalid map close token")
117+
self._depth -= 1
113118
return rv
114119

115120
def _parse_array(self):
116121
"Parse a single llsd array"
117122
rv = []
123+
self._depth += 1
118124
size = struct.unpack("!i", self._getc(4))[0]
119125
for count in range(size):
120126
rv.append(self._parse())
121127
if self._getc() != b']':
122128
self._error("invalid array close token")
129+
self._depth -= 1
123130
return rv
124131

125132
def _parse_string(self):
@@ -164,15 +171,19 @@ def format_binary(something):
164171

165172
def write_binary(stream, something):
166173
stream.write(b'<?llsd/binary?>\n')
167-
_write_binary_recurse(stream, something)
174+
_write_binary_recurse(stream, something, 0)
168175

169176

170-
def _write_binary_recurse(stream, something):
177+
def _write_binary_recurse(stream, something, depth):
171178
"Binary formatter workhorse."
179+
180+
if depth > MAX_FORMAT_DEPTH:
181+
raise LLSDSerializationError("Cannot serialize depth of more than %d" % MAX_FORMAT_DEPTH)
182+
172183
if something is None:
173184
stream.write(b'!')
174185
elif isinstance(something, _LLSD):
175-
_write_binary_recurse(stream, something.thing)
186+
_write_binary_recurse(stream, something.thing, depth)
176187
elif isinstance(something, bool):
177188
stream.write(b'1' if something else b'0')
178189
elif is_integer(something):
@@ -202,27 +213,27 @@ def _write_binary_recurse(stream, something):
202213
seconds_since_epoch = calendar.timegm(something.timetuple())
203214
stream.writelines([b'd', struct.pack('<d', seconds_since_epoch)])
204215
elif isinstance(something, (list, tuple)):
205-
_write_list(stream, something)
216+
_write_list(stream, something, depth)
206217
elif isinstance(something, dict):
207218
stream.writelines([b'{', struct.pack('!i', len(something))])
208219
for key, value in something.items():
209220
key = _str_to_bytes(key)
210221
stream.writelines([b'k', struct.pack('!i', len(key)), key])
211-
_write_binary_recurse(stream, value)
222+
_write_binary_recurse(stream, value, depth+1)
212223
stream.write(b'}')
213224
else:
214225
try:
215-
return _write_list(stream, list(something))
226+
return _write_list(stream, list(something), depth)
216227
except TypeError:
217228
raise LLSDSerializationError(
218229
"Cannot serialize unknown type: %s (%s)" %
219230
(type(something), something))
220231

221232

222-
def _write_list(stream, something):
233+
def _write_list(stream, something, depth):
223234
stream.writelines([b'[', struct.pack('!i', len(something))])
224235
for item in something:
225-
_write_binary_recurse(stream, item)
236+
_write_binary_recurse(stream, item, depth+1)
226237
stream.write(b']')
227238

228239

llsd/serde_notation.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import uuid
55

66
from llsd.base import (_LLSD, B, LLSDBaseFormatter, LLSDBaseParser, NOTATION_HEADER,
7-
LLSDParseError, LLSDSerializationError, UnicodeType,
7+
MAX_FORMAT_DEPTH, MAX_PARSE_DEPTH, LLSDParseError, LLSDSerializationError, UnicodeType,
88
_format_datestr, _parse_datestr, _str_to_bytes, binary, uri)
99

1010

@@ -70,6 +70,7 @@ def __init__(self):
7070
# Then fill in specific entries based on the dict above.
7171
for c, func in _dispatch_dict.items():
7272
self._dispatch[ord(c)] = func
73+
self._depth = 0
7374

7475
def parse(self, something, ignore_binary = False):
7576
"""
@@ -107,6 +108,8 @@ def _get_until(self, delim):
107108

108109
def _parse(self, cc):
109110
"The notation parser workhorse."
111+
if self._depth > MAX_PARSE_DEPTH:
112+
self._error("Parse depth exceeded max of %d" % MAX_PARSE_DEPTH)
110113
try:
111114
func = self._dispatch[ord(cc)]
112115
except IndexError:
@@ -182,6 +185,7 @@ def _parse_map(self, cc):
182185
rv = {}
183186
key = b''
184187
found_key = False
188+
self._depth += 1
185189
# skip the beginning '{'
186190
cc = self._getc()
187191
while (cc != b'}'):
@@ -207,6 +211,7 @@ def _parse_map(self, cc):
207211
else:
208212
self._error("missing separator")
209213
cc = self._getc()
214+
self._depth -= 1
210215

211216
return rv
212217

@@ -217,6 +222,7 @@ def _parse_array(self, cc):
217222
array: [ object, object, object ]
218223
"""
219224
rv = []
225+
self._depth += 1
220226
# skip the beginning '['
221227
cc = self._getc()
222228
while (cc != b']'):
@@ -227,7 +233,7 @@ def _parse_array(self, cc):
227233
continue
228234
rv.append(self._parse(cc))
229235
cc = self._getc()
230-
236+
self._depth -= 1
231237
return rv
232238

233239
def _parse_uuid(self, cc):
@@ -411,6 +417,11 @@ class LLSDNotationFormatter(LLSDBaseFormatter):
411417
412418
See http://wiki.secondlife.com/wiki/LLSD#Notation_Serialization
413419
"""
420+
421+
def __init__(self):
422+
super(LLSDNotationFormatter, self).__init__()
423+
self._depth = 0
424+
414425
def _LLSD(self, v):
415426
return self._generate(v.thing)
416427
def _UNDEF(self, v):
@@ -443,18 +454,22 @@ def _DATE(self, v):
443454
def _ARRAY(self, v):
444455
self.stream.write(b'[')
445456
delim = b''
457+
self._depth += 1
446458
for item in v:
447459
self.stream.write(delim)
448460
self._generate(item)
449461
delim = b','
462+
self._depth -= 1
450463
self.stream.write(b']')
451464
def _MAP(self, v):
452465
self.stream.write(b'{')
453466
delim = b''
467+
self._depth += 1
454468
for key, value in v.items():
455469
self.stream.writelines([delim, b"'", self._esc(UnicodeType(key)), b"':"])
456470
self._generate(value)
457471
delim = b','
472+
self._depth -= 1
458473
self.stream.write(b'}')
459474

460475
def _esc(self, data, quote=b"'"):
@@ -466,6 +481,9 @@ def _generate(self, something):
466481
467482
:param something: a python object (typically a dict) to be serialized.
468483
"""
484+
if self._depth > MAX_FORMAT_DEPTH:
485+
raise LLSDSerializationError("Cannot serialize depth of more than %d" % MAX_FORMAT_DEPTH)
486+
469487
t = type(something)
470488
handler = self.type_map.get(t)
471489
if handler:

0 commit comments

Comments
 (0)