Skip to content

Commit 324d5af

Browse files
committed
SL-19707 - use iterators instead of recursion to generate XML
This will allow us to handle much deeper hierachies. Also, reduce the number of function calls required to render values for performance.
1 parent a63abbe commit 324d5af

File tree

2 files changed

+128
-73
lines changed

2 files changed

+128
-73
lines changed

llsd/serde_xml.py

Lines changed: 94 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -37,20 +37,6 @@ class LLSDXMLFormatter(LLSDBaseFormatter):
3737
this class since the module level format_xml() is the most convenient
3838
interface to this functionality.
3939
"""
40-
def _elt(self, name, contents=None):
41-
"""
42-
Serialize a single element.
43-
44-
If 'contents' is omitted, write <name/>.
45-
If 'contents' is bytes, write <name>contents</name>.
46-
If 'contents' is str, write <name>contents.encode('utf8')</name>.
47-
"""
48-
if not contents:
49-
self.stream.writelines([b"<", name, b" />"])
50-
else:
51-
self.stream.writelines([b"<", name, b">",
52-
_str_to_bytes(contents),
53-
b"</", name, b">"])
5440

5541
def xml_esc(self, v):
5642
"Escape string or unicode object v for xml output"
@@ -70,53 +56,35 @@ def xml_esc(self, v):
7056
return v.replace(b'&',b'&amp;').replace(b'<',b'&lt;').replace(b'>',b'&gt;')
7157

7258
def _LLSD(self, v):
73-
return self._generate(v.thing)
59+
raise LLSDSerializationError("We should never end up here")
7460
def _UNDEF(self, _v):
75-
return self._elt(b'undef')
61+
return b'<undef/>'
7662
def _BOOLEAN(self, v):
7763
if v:
78-
return self._elt(b'boolean', b'true')
64+
return b'<boolean>true</boolean>'
7965
else:
80-
return self._elt(b'boolean', b'false')
66+
return b'<boolean>false</boolean>'
8167
def _INTEGER(self, v):
82-
return self._elt(b'integer', str(v))
68+
return b'<integer>' + _str_to_bytes(str(v)) + b'</integer>'
8369
def _REAL(self, v):
84-
return self._elt(b'real', repr(v))
70+
return b'<real>' + _str_to_bytes(str(v)) + b'</real>'
8571
def _UUID(self, v):
8672
if v.int == 0:
87-
return self._elt(b'uuid')
73+
return b'<uuid/>'
8874
else:
89-
return self._elt(b'uuid', str(v))
75+
return b'<uuid>' + _str_to_bytes(str(v)) + b'</uuid>'
9076
def _BINARY(self, v):
91-
return self._elt(b'binary', base64.b64encode(v).strip())
77+
return b'<binary>' + base64.b64encode(v).strip() + b'</binary>'
9278
def _STRING(self, v):
93-
return self._elt(b'string', self.xml_esc(v))
79+
return b'<string>' + self.xml_esc(v) + b'</string>'
9480
def _URI(self, v):
95-
return self._elt(b'uri', self.xml_esc(str(v)))
81+
return b'<uri>' + self.xml_esc(v) + b'</uri>'
9682
def _DATE(self, v):
97-
return self._elt(b'date', _format_datestr(v))
83+
return b'<date>' + _format_datestr(v) + b'</date>'
9884
def _ARRAY(self, v):
99-
self.stream.write(b'<array>')
100-
for item in v:
101-
self._generate(item)
102-
self.stream.write(b'</array>')
85+
raise LLSDSerializationError("We should never end up here")
10386
def _MAP(self, v):
104-
self.stream.write(b'<map>')
105-
for key, value in v.items():
106-
self._elt(b'key', self.xml_esc(UnicodeType(key)))
107-
self._generate(value)
108-
self.stream.write(b'</map>')
109-
110-
def _generate(self, something):
111-
"Generate xml from a single python object."
112-
t = type(something)
113-
if t in self.type_map:
114-
return self.type_map[t](something)
115-
elif isinstance(something, _LLSD):
116-
return self.type_map[_LLSD](something)
117-
else:
118-
raise LLSDSerializationError(
119-
"Cannot serialize unknown type: %s (%s)" % (t, something))
87+
raise LLSDSerializationError("We should never end up here")
12088

12189
def _write(self, something):
12290
"""
@@ -126,7 +94,36 @@ def _write(self, something):
12694
"""
12795
self.stream.write(b'<?xml version="1.0" ?>'
12896
b'<llsd>')
129-
self._generate(something)
97+
98+
iter_stack = [(iter([something]), b"")]
99+
while True:
100+
cur_iter, iter_type = iter_stack[-1]
101+
try:
102+
item = next(cur_iter)
103+
if iter_type == b"map":
104+
self.stream.write(b'<key>' + _str_to_bytes(self.xml_esc(UnicodeType(item[0]))) + b'</key>')
105+
item = item[1]
106+
if isinstance(item, _LLSD):
107+
item = item.thing
108+
t = type(item)
109+
if not t in self.type_map:
110+
raise LLSDSerializationError(
111+
"Cannot serialize unknown type: %s (%s)" % (t, item))
112+
tf = self.type_map[t]
113+
114+
if tf == self._MAP:
115+
self.stream.write(b'<map>')
116+
iter_stack.append((iter(item.items()), b"map"))
117+
elif tf == self._ARRAY:
118+
self.stream.write(b'<array>')
119+
iter_stack.append((iter(item), b"array"))
120+
else:
121+
self.stream.write(tf(item))
122+
except StopIteration:
123+
self.stream.write(b'</' + iter_type + b'>')
124+
iter_stack.pop()
125+
if len(iter_stack) == 1:
126+
break
130127
self.stream.write(b'</llsd>')
131128

132129

@@ -161,40 +158,64 @@ def _indent(self):
161158

162159
def _ARRAY(self, v):
163160
"Recursively format an array with pretty turned on."
164-
self.stream.write(b'<array>\n')
165-
self._indent_level += 1
166-
for item in v:
167-
self._indent()
168-
self._generate(item)
169-
self.stream.write(b'\n')
170-
self._indent_level -= 1
171-
self._indent()
172-
self.stream.write(b'</array>')
161+
raise LLSDSerializationError("We should never end up here")
173162

174163
def _MAP(self, v):
175164
"Recursively format a map with pretty turned on."
176-
self.stream.write(b'<map>\n')
177-
self._indent_level += 1
178-
# sorted list of keys
179-
for key in sorted(v):
180-
self._indent()
181-
self._elt(b'key', UnicodeType(key))
182-
self.stream.write(b'\n')
183-
self._indent()
184-
self._generate(v[key])
185-
self.stream.write(b'\n')
186-
self._indent_level -= 1
187-
self._indent()
188-
self.stream.write(b'</map>')
165+
raise LLSDSerializationError("We should never end up here")
189166

190167
def _write(self, something):
191168
"""
192-
Serialize a python object to self.stream as 'pretty' application/llsd+xml.
169+
Serialize a python object to self.stream as application/llsd+xml.
170+
171+
:param something: A python object (typically a dict) to be serialized.
193172
194-
:param something: a python object (typically a dict) to be serialized.
173+
NOTE: This is nearly identical to the above _write with the exception
174+
that this one includes newlines and indentation. Doing something clever
175+
for the above may decrease performance for the common case, so it's been
176+
split out. We can probably revisit this, though.
195177
"""
196-
self.stream.write(b'<?xml version="1.0" ?>\n<llsd>')
197-
self._generate(something)
178+
self.stream.write(b'<?xml version="1.0" ?>\n'
179+
b'<llsd>\n')
180+
181+
iter_stack = [(iter([something]), b"")]
182+
while True:
183+
cur_iter, iter_type = iter_stack[-1]
184+
try:
185+
item = next(cur_iter)
186+
if iter_type == b"map":
187+
self._indent()
188+
self.stream.write(b'<key>' + _str_to_bytes(self.xml_esc(UnicodeType(item[0]))) + b'</key>\n')
189+
item = item[1]
190+
if isinstance(item, _LLSD):
191+
item = item.thing
192+
t = type(item)
193+
if not t in self.type_map:
194+
raise LLSDSerializationError(
195+
"Cannot serialize unknown type: %s (%s)" % (t, item))
196+
tf = self.type_map[t]
197+
198+
if tf == self._MAP:
199+
self._indent()
200+
self.stream.write(b'<map>\n')
201+
self._indent_level += 1
202+
iter_stack.append((iter(item.items()), b"map"))
203+
elif tf == self._ARRAY:
204+
self._indent()
205+
self.stream.write(b'<array>\n')
206+
self._indent_level += 1
207+
iter_stack.append((iter(item), b"array"))
208+
else:
209+
self._indent()
210+
self.stream.write(tf(item))
211+
self.stream.write(b'\n')
212+
except StopIteration:
213+
self._indent_level -= 1
214+
self._indent()
215+
self.stream.write(b'</' + iter_type + b'>\n')
216+
iter_stack.pop()
217+
if len(iter_stack) == 1:
218+
break
198219
self.stream.write(b'</llsd>\n')
199220

200221

tests/bench.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@
4545
</llsd>"""
4646

4747
_bench_data = llsd.parse_xml(BENCH_DATA_XML)
48+
49+
50+
4851
BENCH_DATA_BINARY = llsd.format_binary(_bench_data)
4952
BENCH_DATA_NOTATION = llsd.format_notation(_bench_data)
5053

@@ -78,6 +81,31 @@ def binary_stream():
7881
f.seek(0)
7982
yield f
8083

84+
def build_deep_xml():
85+
86+
deep_data = {}
87+
curr_data = deep_data
88+
for i in range(250):
89+
curr_data["curr_data"] = {}
90+
curr_data["integer"] = 7
91+
curr_data["string"] = "string"
92+
curr_data = curr_data["curr_data"]
93+
94+
return deep_data
95+
_deep_bench_data = build_deep_xml()
96+
97+
def build_wide_xml():
98+
wide_xml = b"""
99+
<?xml version="1.0" encoding="UTF-8"?><llsd><map><key>wide_array</key><array>"
100+
"""
101+
102+
for i in range(100000):
103+
wide_xml += b"""
104+
<real>5000</real>"""
105+
wide_xml += b"</array></map></llsd>"
106+
107+
return llsd.parse_xml(wide_xml)
108+
_wide_bench_data = build_wide_xml()
81109

82110
def bench_stream(parse, stream):
83111
ret = parse(stream)
@@ -125,3 +153,9 @@ def test_format_notation(benchmark):
125153

126154
def test_format_binary(benchmark):
127155
benchmark(llsd.format_binary, _bench_data)
156+
157+
def test_format_xml_deep(benchmark):
158+
benchmark(llsd.format_xml, _deep_bench_data)
159+
160+
def test_format_xml_wide(benchmark):
161+
benchmark(llsd.format_xml, _wide_bench_data)

0 commit comments

Comments
 (0)