Skip to content

Commit 00a4c7d

Browse files
committed
Remove iterative xml parsing
1 parent 1994a95 commit 00a4c7d

File tree

1 file changed

+2
-169
lines changed

1 file changed

+2
-169
lines changed

llsd/serde_xml.py

Lines changed: 2 additions & 169 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,10 @@
11
import base64
2-
import binascii
3-
from collections import deque
42
import io
53
import re
6-
import uuid
74

85
from llsd.base import (_LLSD, ALL_CHARS, LLSDBaseParser, LLSDBaseFormatter, XML_HEADER,
96
LLSDParseError, LLSDSerializationError, UnicodeType,
10-
_format_datestr, _str_to_bytes, is_unicode, PY2, uri, binary, _parse_datestr)
7+
_format_datestr, _str_to_bytes, _to_python, is_unicode, PY2)
118
from llsd.fastest_elementtree import ElementTreeError, fromstring, parse as _parse
129

1310
INVALID_XML_BYTES = b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c'\
@@ -237,168 +234,6 @@ def write_pretty_xml(stream, something):
237234
return LLSDXMLPrettyFormatter().write(stream, something)
238235

239236

240-
class LLSDXMLParser:
241-
def __init__(self):
242-
"Construct an xml node parser."
243-
244-
self.NODE_HANDLERS = {
245-
"undef": lambda x: None,
246-
"boolean": self._bool_to_python,
247-
"integer": self._int_to_python,
248-
"real": self._real_to_python,
249-
"uuid": self._uuid_to_python,
250-
"string": self._str_to_python,
251-
"binary": self._bin_to_python,
252-
"date": self._date_to_python,
253-
"uri": self._uri_to_python,
254-
"map": self._map_to_python,
255-
"array": self._array_to_python,
256-
}
257-
258-
self.parse_stack = deque([])
259-
260-
def _bool_to_python(self, node):
261-
"Convert boolean node to a python object."
262-
val = node.text or ''
263-
try:
264-
# string value, accept 'true' or 'True' or whatever
265-
return (val.lower() in ('true', '1', '1.0'))
266-
except AttributeError:
267-
# not a string (no lower() method), use normal Python rules
268-
return bool(val)
269-
270-
def _int_to_python(self, node):
271-
"Convert integer node to a python object."
272-
val = node.text or ''
273-
if not val.strip():
274-
return 0
275-
return int(val)
276-
277-
def _real_to_python(self, node):
278-
"Convert floating point node to a python object."
279-
val = node.text or ''
280-
if not val.strip():
281-
return 0.0
282-
return float(val)
283-
284-
def _uuid_to_python(self, node):
285-
"Convert uuid node to a python object."
286-
if node.text:
287-
return uuid.UUID(hex=node.text)
288-
return uuid.UUID(int=0)
289-
290-
def _str_to_python(self, node):
291-
"Convert string node to a python object."
292-
return node.text or ''
293-
294-
def _bin_to_python(self, node):
295-
base = node.get('encoding') or 'base64'
296-
try:
297-
if base == 'base16':
298-
# parse base16 encoded data
299-
return binary(base64.b16decode(node.text or ''))
300-
if base == 'base64':
301-
# parse base64 encoded data
302-
return binary(base64.b64decode(node.text or ''))
303-
raise LLSDParseError("Parser doesn't support %s encoding" % base)
304-
305-
except binascii.Error as exc:
306-
# convert exception class so it's more catchable
307-
raise LLSDParseError("Encoded binary data: " + str(exc))
308-
except TypeError as exc:
309-
# convert exception class so it's more catchable
310-
raise LLSDParseError("Bad binary data: " + str(exc))
311-
312-
def _date_to_python(self, node):
313-
"Convert date node to a python object."
314-
val = node.text or ''
315-
if not val:
316-
val = "1970-01-01T00:00:00Z"
317-
return _parse_datestr(val)
318-
319-
def _uri_to_python(self, node):
320-
"Convert uri node to a python object."
321-
val = node.text or ''
322-
return uri(val)
323-
324-
def _map_to_python(self, node):
325-
"Convert map node to a python object."
326-
new_result = {}
327-
new_stack_entry = [iter(node), node, new_result]
328-
self.parse_stack.appendleft(new_stack_entry)
329-
return new_result
330-
331-
def _array_to_python(self, node):
332-
"Convert array node to a python object."
333-
new_result = []
334-
new_stack_entry = [iter(node), node, new_result]
335-
self.parse_stack.appendleft(new_stack_entry)
336-
return new_result
337-
338-
def parse_node(self, something):
339-
"""
340-
Parse an ElementTree tree
341-
This parser is iterative instead of recursive. It uses
342-
Each element in parse_stack is an iterator into either the list
343-
or the dict in the tree. This limits depth by size of free memory
344-
instead of size of the function call stack, allowing us to render
345-
deeper trees than a recursive model.
346-
:param something: The xml node to parse.
347-
:returns: Returns a python object.
348-
"""
349-
350-
# if the passed in element is not a map or array, simply return
351-
# its value. Otherwise, create a dict or array to receive
352-
# child/leaf elements.
353-
if something.tag == "map":
354-
cur_result = {}
355-
elif something.tag == "array":
356-
cur_result = []
357-
else:
358-
if something.tag not in self.NODE_HANDLERS:
359-
raise LLSDParseError("Unknown value type %s" % something.tag)
360-
return self.NODE_HANDLERS[something.tag](something)
361-
362-
# start by pushing the current element iterator data onto
363-
# the stack
364-
# 0 - iterator indicating the current position in the given level of the tree
365-
# this can be either a list iterator position, or an iterator of
366-
# keys for the dict.
367-
# 1 - the actual element object.
368-
# 2 - the result for this level in the tree, onto which
369-
# children or leafs will be appended/set
370-
self.parse_stack.appendleft([iter(something), something, cur_result])
371-
while True:
372-
node_iter, iterable, cur_result = self.parse_stack[0]
373-
try:
374-
value = next(node_iter)
375-
376-
except StopIteration:
377-
node_iter, iterable, cur_result = self.parse_stack.popleft()
378-
if len(self.parse_stack) == 0:
379-
break
380-
else:
381-
if iterable.tag == "map":
382-
if value.tag != "key":
383-
raise LLSDParseError("Expected 'key', got %s" % value.tag)
384-
key = value.text
385-
if key is None:
386-
key = ''
387-
try:
388-
value = next(node_iter)
389-
except StopIteration:
390-
raise LLSDParseError("No value for map item %s" % key)
391-
try:
392-
cur_result[key] = self.NODE_HANDLERS[value.tag](value)
393-
except KeyError as err:
394-
raise LLSDParseError("Unknown value type: " + str(err))
395-
elif iterable.tag == "array":
396-
try:
397-
cur_result.append(self.NODE_HANDLERS[value.tag](value))
398-
except KeyError as err:
399-
raise LLSDParseError("Unknown value type: " + str(err))
400-
return cur_result
401-
402237
def parse_xml(something):
403238
"""
404239
This is the basic public interface for parsing llsd+xml.
@@ -414,8 +249,6 @@ def parse_xml(something):
414249
return parse_xml_nohdr(parser)
415250

416251

417-
418-
419252
def parse_xml_nohdr(baseparser):
420253
"""
421254
Parse llsd+xml known to be without an <? LLSD/XML ?> header. May still
@@ -444,7 +277,7 @@ def parse_xml_nohdr(baseparser):
444277
if element.tag != 'llsd':
445278
raise LLSDParseError("Invalid XML Declaration")
446279
# Extract its contents.
447-
return LLSDXMLParser().parse_node(element[0])
280+
return _to_python(element[0])
448281

449282

450283
def format_xml(something):

0 commit comments

Comments
 (0)