Skip to content

Commit 48322c8

Browse files
committed
Make Result.serialize work more like Graph.serialize
This patch makes the following changes to `Result.serialize`. * Return str by default instead of bytes. * Use "txt" as the default tabular serialization format. * Use "turtle" as the default graph serialization format. * Support both typing.IO[bytes] and typing.TextIO destinations. Corresponding changes are made to the specific serializers also. This patch also changes how text is written to typing.IO[bytes] in serializers to ensure that the buffer is flushed and detatched from the TextIOWrapper once the serialization function completes so it can be used normally afterwards. This patch further includes a bunch of additional type hints.
1 parent 1cba9d8 commit 48322c8

File tree

20 files changed

+1291
-192
lines changed

20 files changed

+1291
-192
lines changed

rdflib/graph.py

Lines changed: 38 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -351,15 +351,15 @@ def __init__(
351351
self.default_union = False
352352

353353
@property
354-
def store(self):
354+
def store(self) -> Store: # read-only attr
355355
return self.__store
356356

357357
@property
358-
def identifier(self):
358+
def identifier(self) -> Node: # read-only attr
359359
return self.__identifier
360360

361361
@property
362-
def namespace_manager(self):
362+
def namespace_manager(self) -> NamespaceManager:
363363
"""
364364
this graph's namespace-manager
365365
"""
@@ -368,8 +368,9 @@ def namespace_manager(self):
368368
return self.__namespace_manager
369369

370370
@namespace_manager.setter
371-
def namespace_manager(self, nm):
372-
self.__namespace_manager = nm
371+
def namespace_manager(self, value: NamespaceManager):
372+
"""this graph's namespace-manager"""
373+
self.__namespace_manager = value
373374

374375
def __repr__(self):
375376
return "<Graph identifier=%s (%s)>" % (self.identifier, type(self))
@@ -1096,18 +1097,37 @@ def serialize(
10961097
encoding: Optional[str] = None,
10971098
**args: Any,
10981099
) -> Union[bytes, str, "Graph"]:
1099-
"""Serialize the Graph to destination
1100-
1101-
If destination is None serialize method returns the serialization as
1102-
bytes or string.
1103-
1104-
If encoding is None and destination is None, returns a string
1105-
If encoding is set, and Destination is None, returns bytes
1106-
1107-
Format defaults to turtle.
1108-
1109-
Format support can be extended with plugins,
1110-
but "xml", "n3", "turtle", "nt", "pretty-xml", "trix", "trig" and "nquads" are built in.
1100+
"""
1101+
Serialize the graph.
1102+
1103+
:param destination:
1104+
The destination to serialize the graph to. This can be a path as a
1105+
:class:`str` or :class:`~pathlib.PurePath` object, or it can be a
1106+
:class:`~typing.IO[bytes]` like object. If this parameter is not
1107+
supplied the serialized graph will be returned.
1108+
:type destination: Optional[Union[str, typing.IO[bytes], pathlib.PurePath]]
1109+
:param format:
1110+
The format that the output should be written in. This value
1111+
references a :class:`~rdflib.serializer.Serializer` plugin. Format
1112+
support can be extended with plugins, but `"xml"`, `"n3"`,
1113+
`"turtle"`, `"nt"`, `"pretty-xml"`, `"trix"`, `"trig"`, `"nquads"`
1114+
and `"json-ld"` are built in. Defaults to `"turtle"`.
1115+
:type format: str
1116+
:param base:
1117+
The base IRI for formats that support it. For the turtle format this
1118+
will be used as the `@base` directive.
1119+
:type base: Optional[str]
1120+
:param encoding: Encoding of output.
1121+
:type encoding: Optional[str]
1122+
:param **args:
1123+
Additional arguments to pass to the
1124+
:class:`~rdflib.serializer.Serializer` that will be used.
1125+
:type **args: Any
1126+
:return: The serialized graph if `destination` is `None`.
1127+
:rtype: :class:`bytes` if `destination` is `None` and `encoding` is not `None`.
1128+
:rtype: :class:`bytes` if `destination` is `None` and `encoding` is `None`.
1129+
:return: `self` (i.e. the :class:`~rdflib.graph.Graph` instance) if `destination` is not None.
1130+
:rtype: :class:`~rdflib.graph.Graph` if `destination` is not None.
11111131
"""
11121132

11131133
# if base is not given as attribute use the base set for the graph
@@ -1298,7 +1318,7 @@ def query(
12981318
if none are given, the namespaces from the graph's namespace manager
12991319
are used.
13001320
1301-
:returntype: rdflib.query.Result
1321+
:returntype: :class:`~rdflib.query.Result`
13021322
13031323
"""
13041324

rdflib/plugins/serializers/n3.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def p_clause(self, node, position):
109109
self.write("{")
110110
self.depth += 1
111111
serializer = N3Serializer(node, parent=self)
112-
serializer.serialize(self.stream)
112+
serializer.serialize(self.stream.buffer)
113113
self.depth -= 1
114114
self.write(self.indent() + "}")
115115
return True

rdflib/plugins/serializers/nt.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
import warnings
1313
import codecs
1414

15+
from rdflib.util import as_textio
16+
1517
__all__ = ["NTSerializer"]
1618

1719

@@ -38,9 +40,15 @@ def serialize(
3840
f"Given encoding was: {encoding}"
3941
)
4042

41-
for triple in self.store:
42-
stream.write(_nt_row(triple).encode())
43-
stream.write("\n".encode())
43+
with as_textio(
44+
stream,
45+
encoding=encoding, # TODO: CHECK: self.encoding set removed, why?
46+
errors="_rdflib_nt_escape",
47+
write_through=True,
48+
) as text_stream:
49+
for triple in self.store:
50+
text_stream.write(_nt_row(triple))
51+
text_stream.write("\n")
4452

4553

4654
class NT11Serializer(NTSerializer):

rdflib/plugins/serializers/rdfxml.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import IO, Dict, Optional, Set
1+
from typing import IO, Dict, Optional, Set, cast
22
from rdflib.plugins.serializers.xmlwriter import XMLWriter
33

44
from rdflib.namespace import Namespace, RDF, RDFS # , split_uri
@@ -173,6 +173,8 @@ def serialize(
173173
encoding: Optional[str] = None,
174174
**args,
175175
):
176+
# TODO FIXME: this should be Optional, but it's not because nothing
177+
# treats it as such.
176178
self.__serialized: Dict[Identifier, int] = {}
177179
store = self.store
178180
# if base is given here, use that, if not and a base is set for the graph use that
@@ -241,6 +243,7 @@ def subject(self, subject: IdentifiedNode, depth: int = 1):
241243
writer = self.writer
242244

243245
if subject in self.forceRDFAbout:
246+
subject = cast(URIRef, subject)
244247
writer.push(RDFVOC.Description)
245248
writer.attribute(RDFVOC.about, self.relativize(subject))
246249
writer.pop(RDFVOC.Description)
@@ -282,6 +285,7 @@ def subj_as_obj_more_than(ceil):
282285

283286
elif subject in self.forceRDFAbout:
284287
# TODO FIXME?: this looks like a duplicate of first condition
288+
subject = cast(URIRef, subject)
285289
writer.push(RDFVOC.Description)
286290
writer.attribute(RDFVOC.about, self.relativize(subject))
287291
writer.pop(RDFVOC.Description)

rdflib/plugins/serializers/trig.py

Lines changed: 40 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -62,53 +62,45 @@ def serialize(
6262
spacious: Optional[bool] = None,
6363
**args,
6464
):
65-
self.reset()
66-
self.stream = stream
67-
# if base is given here, use that, if not and a base is set for the graph use that
68-
if base is not None:
69-
self.base = base
70-
elif self.store.base is not None:
71-
self.base = self.store.base
72-
73-
if spacious is not None:
74-
self._spacious = spacious
75-
76-
self.preprocess()
77-
78-
self.startDocument()
79-
80-
firstTime = True
81-
for store, (ordered_subjects, subjects, ref) in self._contexts.items():
82-
if not ordered_subjects:
83-
continue
84-
85-
self._references = ref
86-
self._serialized = {}
87-
self.store = store
88-
self._subjects = subjects
89-
90-
if self.default_context and store.identifier == self.default_context:
91-
self.write(self.indent() + "\n{")
92-
else:
93-
iri: Optional[str]
94-
if isinstance(store.identifier, BNode):
95-
iri = store.identifier.n3()
96-
else:
97-
iri = self.getQName(store.identifier)
98-
if iri is None:
99-
iri = store.identifier.n3()
100-
self.write(self.indent() + "\n%s {" % iri)
65+
self._serialize_init(stream, base, encoding, spacious)
66+
try:
67+
self.preprocess()
10168

102-
self.depth += 1
103-
for subject in ordered_subjects:
104-
if self.isDone(subject):
69+
self.startDocument()
70+
71+
firstTime = True
72+
for store, (ordered_subjects, subjects, ref) in self._contexts.items():
73+
if not ordered_subjects:
10574
continue
106-
if firstTime:
107-
firstTime = False
108-
if self.statement(subject) and not firstTime:
109-
self.write("\n")
110-
self.depth -= 1
111-
self.write("}\n")
112-
113-
self.endDocument()
114-
stream.write("\n".encode("latin-1"))
75+
76+
self._references = ref
77+
self._serialized = {}
78+
self.store = store
79+
self._subjects = subjects
80+
81+
if self.default_context and store.identifier == self.default_context:
82+
self.write(self.indent() + "\n{")
83+
else:
84+
if isinstance(store.identifier, BNode):
85+
iri = store.identifier.n3()
86+
else:
87+
iri = self.getQName(store.identifier)
88+
if iri is None:
89+
iri = store.identifier.n3()
90+
self.write(self.indent() + "\n%s {" % iri)
91+
92+
self.depth += 1
93+
for subject in ordered_subjects:
94+
if self.isDone(subject):
95+
continue
96+
if firstTime:
97+
firstTime = False
98+
if self.statement(subject) and not firstTime:
99+
self.write("\n")
100+
self.depth -= 1
101+
self.write("}\n")
102+
103+
self.endDocument()
104+
self.write("\n")
105+
finally:
106+
self._serialize_end()

rdflib/plugins/serializers/turtle.py

Lines changed: 60 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,13 @@
66
from collections import defaultdict
77
from functools import cmp_to_key
88

9+
from rdflib.graph import Graph
910
from rdflib.term import BNode, Literal, URIRef
1011
from rdflib.exceptions import Error
1112
from rdflib.serializer import Serializer
1213
from rdflib.namespace import RDF, RDFS
14+
from io import TextIOWrapper
15+
from typing import IO, Dict, Optional
1316

1417
__all__ = ["RecursiveSerializer", "TurtleSerializer"]
1518

@@ -44,10 +47,13 @@ class RecursiveSerializer(Serializer):
4447
indentString = " "
4548
roundtrip_prefixes = ()
4649

47-
def __init__(self, store):
50+
def __init__(self, store: Graph):
4851

4952
super(RecursiveSerializer, self).__init__(store)
50-
self.stream = None
53+
# TODO FIXME: Ideally stream should be optional, but nothing treats it
54+
# as such, so least weird solution is to just type it as not optional
55+
# even thoug it can sometimes be null.
56+
self.stream: IO[str] = None # type: ignore[assignment]
5157
self.reset()
5258

5359
def addNamespace(self, prefix, uri):
@@ -166,9 +172,9 @@ def indent(self, modifier=0):
166172
"""Returns indent string multiplied by the depth"""
167173
return (self.depth + modifier) * self.indentString
168174

169-
def write(self, text):
170-
"""Write text in given encoding."""
171-
self.stream.write(text.encode(self.encoding, "replace"))
175+
def write(self, text: str):
176+
"""Write text"""
177+
self.stream.write(text)
172178

173179

174180
SUBJECT = 0
@@ -184,15 +190,15 @@ class TurtleSerializer(RecursiveSerializer):
184190
short_name = "turtle"
185191
indentString = " "
186192

187-
def __init__(self, store):
188-
self._ns_rewrite = {}
193+
def __init__(self, store: Graph):
194+
self._ns_rewrite: Dict[str, str] = {}
189195
super(TurtleSerializer, self).__init__(store)
190196
self.keywords = {RDF.type: "a"}
191197
self.reset()
192-
self.stream = None
198+
self.stream: TextIOWrapper = None # type: ignore[assignment]
193199
self._spacious = _SPACIOUS_OUTPUT
194200

195-
def addNamespace(self, prefix, namespace):
201+
def addNamespace(self, prefix: str, namespace: str):
196202
# Turtle does not support prefix that start with _
197203
# if they occur in the graph, rewrite to p_blah
198204
# this is more complicated since we need to make sure p_blah
@@ -223,36 +229,60 @@ def reset(self):
223229
self._started = False
224230
self._ns_rewrite = {}
225231

226-
def serialize(self, stream, base=None, encoding=None, spacious=None, **args):
232+
def _serialize_init(
233+
self,
234+
stream: IO[bytes],
235+
base: Optional[str],
236+
encoding: Optional[str],
237+
spacious: Optional[bool],
238+
) -> None:
227239
self.reset()
228-
self.stream = stream
240+
if encoding is not None:
241+
self.encoding = encoding
242+
self.stream = TextIOWrapper(
243+
stream, self.encoding, errors="replace", write_through=True
244+
)
229245
# if base is given here, use that, if not and a base is set for the graph use that
230246
if base is not None:
231247
self.base = base
232248
elif self.store.base is not None:
233249
self.base = self.store.base
234-
235250
if spacious is not None:
236251
self._spacious = spacious
237252

238-
self.preprocess()
239-
subjects_list = self.orderSubjects()
240-
241-
self.startDocument()
242-
243-
firstTime = True
244-
for subject in subjects_list:
245-
if self.isDone(subject):
246-
continue
247-
if firstTime:
248-
firstTime = False
249-
if self.statement(subject) and not firstTime:
250-
self.write("\n")
251-
252-
self.endDocument()
253-
stream.write("\n".encode("latin-1"))
254-
255-
self.base = None
253+
def _serialize_end(self) -> None:
254+
self.stream.flush()
255+
self.stream.detach()
256+
self.stream = None # type: ignore[assignment]
257+
258+
def serialize(
259+
self,
260+
stream: IO[bytes],
261+
base: Optional[str] = None,
262+
encoding: Optional[str] = None,
263+
spacious: Optional[bool] = None,
264+
**args,
265+
):
266+
self._serialize_init(stream, base, encoding, spacious)
267+
try:
268+
self.preprocess()
269+
subjects_list = self.orderSubjects()
270+
271+
self.startDocument()
272+
273+
firstTime = True
274+
for subject in subjects_list:
275+
if self.isDone(subject):
276+
continue
277+
if firstTime:
278+
firstTime = False
279+
if self.statement(subject) and not firstTime:
280+
self.write("\n")
281+
282+
self.endDocument()
283+
self.stream.write("\n")
284+
finally:
285+
self._serialize_end()
256286

257287
def preprocessTriple(self, triple):
258288
super(TurtleSerializer, self).preprocessTriple(triple)

0 commit comments

Comments
 (0)