Skip to content

Commit 1bb6812

Browse files
committed
Speed up hex escape parsing by 30~%
1 parent 232902c commit 1bb6812

File tree

2 files changed

+18
-9
lines changed

2 files changed

+18
-9
lines changed

llsd/base.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -441,18 +441,23 @@ def _parse_string_delim(self, delim):
441441
read_idx += 1
442442
if cc == _X_ORD:
443443
# It's a hex escape. char is the value of the two
444-
# following hex nybbles
444+
# following hex nybbles. This slice may result in
445+
# a short read (0 or 1 bytes), but either a
446+
# `ValueError` will be triggered by the first case,
447+
# and the second will cause an `IndexError` on the
448+
# next iteration of the loop.
449+
hex_bytes = buff[read_idx:read_idx + 2]
450+
read_idx += 2
445451
try:
446-
cc = int(chr(buff[read_idx]), 16) << 4
447-
read_idx += 1
448-
cc |= int(chr(buff[read_idx]), 16)
449-
read_idx += 1
452+
# int() can parse a `bytes` containing hex,
453+
# no explicit `bytes.decode("ascii")` required.
454+
cc = int(hex_bytes, 16)
450455
except ValueError as e:
451456
# One of the hex characters was likely invalid.
452457
# Wrap the ValueError so that we can provide a
453458
# byte offset in the error.
454459
self._index = read_idx
455-
self._error(str(e))
460+
self._error(e, offset=-2)
456461
else:
457462
# escape char preceding anything other than the chars
458463
# in _escaped just results in that same char without
@@ -477,9 +482,9 @@ def _parse_string_delim(self, delim):
477482

478483
insert_idx += 1
479484

485+
# Sync our local read index with the canonical one
486+
self._index = read_idx
480487
try:
481-
# Sync our local read index with the canonical one
482-
self._index = read_idx
483488
# Slice off only what we used of the working decode buffer
484489
return decode_buff[:insert_idx].decode('utf-8')
485490
except UnicodeDecodeError as exc:

tests/llsd_test.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -513,12 +513,16 @@ def testParseNotationUnterminatedString(self):
513513
"""
514514
self.assertRaises(llsd.LLSDParseError, self.llsd.parse, b"'foo")
515515

516-
def testParseNotationTruncatedHex(self):
516+
def testParseNotationHexEscapeNoChars(self):
517+
self.assertRaises(llsd.LLSDParseError, self.llsd.parse, b"'\\x")
518+
519+
def testParseNotationHalfTruncatedHex(self):
517520
self.assertRaises(llsd.LLSDParseError, self.llsd.parse, b"'\\xf")
518521

519522
def testParseNotationInvalidHex(self):
520523
self.assertRaises(llsd.LLSDParseError, self.llsd.parse, b"'\\xzz'")
521524

525+
522526
class LLSDBinaryUnitTest(unittest.TestCase):
523527
"""
524528
This class aggregates all the tests for parse_binary and LLSD.as_binary

0 commit comments

Comments
 (0)