Skip to content

Commit 0931046

Browse files
Merge pull request #6 from secondlife/sl-18330-perf
SL-18330: Refactor notation parsing to manage a lookahead char.
2 parents 0adae95 + 9249fce commit 0931046

File tree

4 files changed

+194
-144
lines changed

4 files changed

+194
-144
lines changed

llsd/base.py

Lines changed: 11 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -404,13 +404,12 @@ def _reset(self, something):
404404
# Wrap an incoming bytes string into a stream. If the passed bytes
405405
# string is so large that the overhead of copying it into a
406406
# BytesIO is significant, advise caller to pass a stream instead.
407-
# BytesIO has no peek() method, so wrap it in BufferedReader.
408-
self._stream = io.BufferedReader(io.BytesIO(something))
409-
elif hasattr(something, 'peek'):
410-
# 'something' is already a buffered stream, use directly
407+
self._stream = io.BytesIO(something)
408+
elif something.seekable():
409+
# 'something' is already a seekable stream, use directly
411410
self._stream = something
412411
else:
413-
# 'something' isn't buffered, wrap in BufferedReader
412+
# 'something' isn't seekable, wrap in BufferedReader
414413
# (let BufferedReader handle the problem of passing an
415414
# inappropriate object)
416415
self._stream = io.BufferedReader(something)
@@ -482,49 +481,23 @@ def _next_nonblank(self):
482481
c = self._stream.read(1)
483482
return c
484483

485-
def _getc(self, num=1):
484+
def _getc(self, num=1, full=True):
486485
got = self._stream.read(num)
487-
if len(got) < num:
486+
if full and len(got) < num:
488487
self._error("Trying to read past end of stream")
489488
return got
490489

491-
def _peek(self, num=1, full=True):
492-
# full=True means error if we can't peek ahead num bytes
493-
if num < 0:
494-
# There aren't many ways this can happen. The likeliest is that
495-
# we've just read garbage length bytes from a binary input string.
496-
# We happen to know that lengths are encoded as 4 bytes, so back
497-
# off by 4 bytes to try to point the user at the right spot.
498-
self._error("Invalid length field %d" % num, -4)
499-
500-
# Instead of using self._stream.peek() at all, use read(num) and reset
501-
# the read pointer. BufferedReader.peek() does not promise to return
502-
# the requested length, but does not clarify the conditions under
503-
# which it returns fewer bytes.
504-
# https://docs.python.org/3/library/io.html#io.BufferedReader.peek
505-
# In practice, we've seen it fail with an input file up over 100Kb:
506-
# peek() returns only part of what we requested, but because we also
507-
# passed full=False (see LLSDNotationParser._get_re()), we didn't
508-
# notice and the parse failed looking for a map delimiter halfway
509-
# through a large decimal integer. read(num), on the other hand,
510-
# promises to return num bytes until actual EOF.
511-
oldpos = self._stream.tell()
512-
try:
513-
got = self._stream.read(num)
514-
if full and len(got) < num:
515-
self._error("Trying to peek past end of stream")
516-
517-
return got
518-
519-
finally:
520-
self._stream.seek(oldpos)
490+
def _putback(self, cc):
491+
# if this test fails, it's not a user error, it's a coding error
492+
assert self._stream.tell() >= len(cc)
493+
self._stream.seek(-len(cc), io.SEEK_CUR)
521494

522495
def _error(self, message, offset=0):
523496
oldpos = self._stream.tell()
524497
# 'offset' is relative to current pos
525498
self._stream.seek(offset, io.SEEK_CUR)
526499
raise LLSDParseError("%s at byte %d: %r" %
527-
(message, oldpos+offset, self._peek(1, full=False)))
500+
(message, oldpos+offset, self._getc(1, full=False)))
528501

529502
# map char following escape char to corresponding character
530503
_escaped = {

llsd/serde_binary.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,14 @@
88
_str_to_bytes, binary, is_integer, is_string, uri)
99

1010

11+
try:
12+
# Python 2: make 'range()' lazy like Python 3
13+
range = xrange
14+
except NameError:
15+
# Python 3: 'range()' is already lazy
16+
pass
17+
18+
1119
class LLSDBinaryParser(LLSDBaseParser):
1220
"""
1321
Parse application/llsd+binary to a python object.
@@ -56,15 +64,16 @@ def __init__(self):
5664
for c, func in _dispatch_dict.items():
5765
self._dispatch[ord(c)] = func
5866

59-
def parse(self, buffer, ignore_binary = False):
67+
def parse(self, something, ignore_binary = False):
6068
"""
6169
This is the basic public interface for parsing.
6270
63-
:param buffer: the binary data to parse in an indexable sequence.
71+
:param something: serialized LLSD to parse: a bytes object, a binary
72+
stream or an LLSDBaseParser subclass.
6473
:param ignore_binary: parser throws away data in llsd binary nodes.
6574
:returns: returns a python object.
6675
"""
67-
self._reset(buffer)
76+
self._reset(something)
6877
self._keep_binary = not ignore_binary
6978
try:
7079
return self._parse()
@@ -107,15 +116,10 @@ def _parse_array(self):
107116
"Parse a single llsd array"
108117
rv = []
109118
size = struct.unpack("!i", self._getc(4))[0]
110-
count = 0
111-
cc = self._peek()
112-
while (cc != b']') and (count < size):
119+
for count in range(size):
113120
rv.append(self._parse())
114-
count += 1
115-
cc = self._peek()
116-
if cc != b']':
121+
if self._getc() != b']':
117122
self._error("invalid array close token")
118-
self._getc()
119123
return rv
120124

121125
def _parse_string(self):

0 commit comments

Comments
 (0)