diff --git a/rdflib/graph.py b/rdflib/graph.py
index 857491a2e..f9e00f2f9 100644
--- a/rdflib/graph.py
+++ b/rdflib/graph.py
@@ -355,6 +355,11 @@
_TripleOrQuadSelectorType = Union["_TripleSelectorType", "_QuadSelectorType"]
_TriplePathType = Tuple["_SubjectType", Path, "_ObjectType"]
_TripleOrTriplePathType = Union["_TripleType", "_TriplePathType"]
+_TripleChoiceType = Union[
+ Tuple[List[_SubjectType], Optional[_PredicateType], Optional[_ObjectType]],
+ Tuple[Optional[_SubjectType], List[_PredicateType], Optional[_ObjectType]],
+ Tuple[Optional[_SubjectType], Optional[_PredicateType], List[_ObjectType]],
+]
_GraphT = TypeVar("_GraphT", bound="Graph")
_ConjunctiveGraphT = TypeVar("_ConjunctiveGraphT", bound="ConjunctiveGraph")
@@ -994,11 +999,7 @@ def predicate_objects(
def triples_choices(
self,
- triple: Union[
- Tuple[List[_SubjectType], _PredicateType, _ObjectType],
- Tuple[_SubjectType, List[_PredicateType], _ObjectType],
- Tuple[_SubjectType, _PredicateType, List[_ObjectType]],
- ],
+ triple: _TripleChoiceType,
context: Optional[_ContextType] = None,
) -> Generator[_TripleType, None, None]:
subject, predicate, object_ = triple
@@ -2196,11 +2197,7 @@ def quads(
def triples_choices(
self,
- triple: Union[
- Tuple[List[_SubjectType], _PredicateType, _ObjectType],
- Tuple[_SubjectType, List[_PredicateType], _ObjectType],
- Tuple[_SubjectType, _PredicateType, List[_ObjectType]],
- ],
+ triple: _TripleChoiceType,
context: Optional[_ContextType] = None,
) -> Generator[_TripleType, None, None]:
"""Iterate over all the triples in the entire conjunctive graph"""
@@ -2946,11 +2943,7 @@ def __isub__(self: _GraphT, other: Iterable[_TripleType]) -> NoReturn:
def triples_choices(
self,
- triple: Union[
- Tuple[List[_SubjectType], _PredicateType, _ObjectType],
- Tuple[_SubjectType, List[_PredicateType], _ObjectType],
- Tuple[_SubjectType, _PredicateType, List[_ObjectType]],
- ],
+ triple: _TripleChoiceType,
context: Optional[_ContextType] = None,
) -> Generator[_TripleType, None, None]:
subject, predicate, object_ = triple
diff --git a/rdflib/plugins/parsers/rdfxml.py b/rdflib/plugins/parsers/rdfxml.py
index e0f6e05fa..54fc69567 100644
--- a/rdflib/plugins/parsers/rdfxml.py
+++ b/rdflib/plugins/parsers/rdfxml.py
@@ -298,7 +298,8 @@ def document_element_start(
self, name: Tuple[str, str], qname, attrs: AttributesImpl
) -> None:
if name[0] and URIRef("".join(name)) == RDFVOC.RDF:
- next = self.next
+ # Cheap hack so 2to3 doesn't turn it into __next__
+ next = getattr(self, "next")
next.start = self.node_element_start
next.end = self.node_element_end
else:
@@ -315,7 +316,8 @@ def node_element_start(
current = self.current
absolutize = self.absolutize
- next = self.next
+ # Cheap hack so 2to3 doesn't turn it into __next__
+ next = getattr(self, "next")
next.start = self.property_element_start
next.end = self.property_element_end
@@ -408,7 +410,8 @@ def property_element_start(
current = self.current
absolutize = self.absolutize
- next = self.next
+ # Cheap hack so 2to3 doesn't turn it into __next__
+ next = getattr(self, "next")
object: Optional[_ObjectType] = None
current.data = None
current.list = None
diff --git a/rdflib/plugins/serializers/longturtle.py b/rdflib/plugins/serializers/longturtle.py
index 8de1e52a2..2aaed36e6 100644
--- a/rdflib/plugins/serializers/longturtle.py
+++ b/rdflib/plugins/serializers/longturtle.py
@@ -39,21 +39,20 @@
class LongTurtleSerializer(RecursiveSerializer):
+ """LongTurtle, a Turtle serialization format.
+
+ When the optional parameter ``canon`` is set to :py:obj:`True`, the graph is canonicalized
+ before serialization. This normalizes blank node identifiers and allows for
+ deterministic serialization of the graph. Useful when consistent outputs are required.
+ """
+
short_name = "longturtle"
indentString = " "
def __init__(self, store):
self._ns_rewrite = {}
- store = to_canonical_graph(store)
- content = store.serialize(format="application/n-triples")
- lines = content.split("\n")
- lines.sort()
- graph = Graph()
- graph.parse(
- data="\n".join(lines), format="application/n-triples", skolemize=True
- )
- graph = graph.de_skolemize()
- super(LongTurtleSerializer, self).__init__(graph)
+ self._canon = False
+ super(LongTurtleSerializer, self).__init__(store)
self.keywords = {RDF.type: "a"}
self.reset()
self.stream = None
@@ -83,11 +82,34 @@ def addNamespace(self, prefix, namespace):
super(LongTurtleSerializer, self).addNamespace(prefix, namespace)
return prefix
+ def canonize(self):
+ """Apply canonicalization to the store.
+
+ This normalizes blank node identifiers and allows for deterministic
+ serialization of the graph.
+ """
+ if not self._canon:
+ return
+
+ namespace_manager = self.store.namespace_manager
+ store = to_canonical_graph(self.store)
+ content = store.serialize(format="application/n-triples")
+ lines = content.split("\n")
+ lines.sort()
+ graph = Graph()
+ graph.parse(
+ data="\n".join(lines), format="application/n-triples", skolemize=True
+ )
+ graph = graph.de_skolemize()
+ graph.namespace_manager = namespace_manager
+ self.store = graph
+
def reset(self):
super(LongTurtleSerializer, self).reset()
self._shortNames = {}
self._started = False
self._ns_rewrite = {}
+ self.canonize()
def serialize(
self,
@@ -97,6 +119,7 @@ def serialize(
spacious: Optional[bool] = None,
**kwargs: Any,
) -> None:
+ self._canon = kwargs.get("canon", False)
self.reset()
self.stream = stream
# if base is given here, use, if not and a base is set for the graph use that
diff --git a/rdflib/plugins/stores/berkeleydb.py b/rdflib/plugins/stores/berkeleydb.py
index 872dc368e..12009787c 100644
--- a/rdflib/plugins/stores/berkeleydb.py
+++ b/rdflib/plugins/stores/berkeleydb.py
@@ -428,7 +428,8 @@ def remove( # type: ignore[override]
cursor = index.cursor(txn=txn)
try:
cursor.set_range(key)
- current = cursor.next
+ # Hack to stop 2to3 converting this to next(cursor)
+ current = getattr(cursor, "next")()
except db.DBNotFoundError:
current = None
cursor.close()
@@ -505,7 +506,8 @@ def triples(
cursor = index.cursor(txn=txn)
try:
cursor.set_range(key)
- current = cursor.next
+ # Cheap hack so 2to3 doesn't convert to next(cursor)
+ current = getattr(cursor, "next")()
except db.DBNotFoundError:
current = None
cursor.close()
@@ -537,7 +539,8 @@ def __len__(self, context: Optional[_ContextType] = None) -> int:
key, value = current
if key.startswith(prefix):
count += 1
- current = cursor.next
+ # Hack to stop 2to3 converting this to next(cursor)
+ current = getattr(cursor, "next")()
else:
break
cursor.close()
@@ -590,7 +593,8 @@ def namespaces(self) -> Generator[Tuple[str, URIRef], None, None]:
while current:
prefix, namespace = current
results.append((prefix.decode("utf-8"), namespace.decode("utf-8")))
- current = cursor.next
+ # Hack to stop 2to3 converting this to next(cursor)
+ current = getattr(cursor, "next")()
cursor.close()
for prefix, namespace in results:
yield prefix, URIRef(namespace)
@@ -633,7 +637,8 @@ def contexts(
cursor = index.cursor()
try:
cursor.set_range(key)
- current = cursor.next
+ # Hack to stop 2to3 converting this to next(cursor)
+ current = getattr(cursor, "next")()
except db.DBNotFoundError:
current = None
cursor.close()
diff --git a/rdflib/plugins/stores/sparqlstore.py b/rdflib/plugins/stores/sparqlstore.py
index f9827cf94..e7a9723e8 100644
--- a/rdflib/plugins/stores/sparqlstore.py
+++ b/rdflib/plugins/stores/sparqlstore.py
@@ -35,6 +35,7 @@
_TripleType,
_ContextType,
_QuadType,
+ _TripleChoiceType,
_TriplePatternType,
_SubjectType,
_PredicateType,
@@ -367,11 +368,7 @@ def triples( # type: ignore[override]
def triples_choices(
self,
- _: Tuple[
- Union[_SubjectType, List[_SubjectType]],
- Union[_PredicateType, List[_PredicateType]],
- Union[_ObjectType, List[_ObjectType]],
- ],
+ _: _TripleChoiceType,
context: Optional[_ContextType] = None,
) -> Generator[
Tuple[
diff --git a/rdflib/store.py b/rdflib/store.py
index 2ca03529a..9cada631d 100644
--- a/rdflib/store.py
+++ b/rdflib/store.py
@@ -36,7 +36,6 @@
Generator,
Iterable,
Iterator,
- List,
Mapping,
Optional,
Tuple,
@@ -49,10 +48,8 @@
from rdflib.graph import (
Graph,
_ContextType,
- _ObjectType,
- _PredicateType,
_QuadType,
- _SubjectType,
+ _TripleChoiceType,
_TriplePatternType,
_TripleType,
)
@@ -281,11 +278,7 @@ def remove(
def triples_choices(
self,
- triple: Union[
- Tuple[List[_SubjectType], _PredicateType, _ObjectType],
- Tuple[_SubjectType, List[_PredicateType], _ObjectType],
- Tuple[_SubjectType, _PredicateType, List[_ObjectType]],
- ],
+ triple: _TripleChoiceType,
context: Optional[_ContextType] = None,
) -> Generator[
Tuple[
diff --git a/test/data/longturtle/longturtle-target.ttl b/test/data/longturtle/longturtle-target.ttl
index 54cf23e9f..b9df06e75 100644
--- a/test/data/longturtle/longturtle-target.ttl
+++ b/test/data/longturtle/longturtle-target.ttl
@@ -1,72 +1,74 @@
+PREFIX cn:
+PREFIX ex:
PREFIX geo:
PREFIX rdf:
-PREFIX schema:
+PREFIX sdo:
PREFIX xsd:
-
- a schema:Person ;
- schema:age 41 ;
- schema:alternateName
+ex:nicholas
+ a sdo:Person ;
+ sdo:age 41 ;
+ sdo:alternateName
[
- schema:name "Dr N.J. Car" ;
+ sdo:name "Dr N.J. Car" ;
] ,
"N.J. Car" ,
"Nick Car" ;
- schema:name
+ sdo:name
[
- a ;
- schema:hasPart
+ a cn:CompoundName ;
+ sdo:hasPart
[
- a ;
- schema:hasPart
+ a cn:CompoundName ;
+ sdo:hasPart
[
- a ;
+ a cn:CompoundName ;
rdf:value "Car" ;
] ,
[
- a ;
+ a cn:CompoundName ;
rdf:value "Maxov" ;
] ;
] ,
[
- a ;
+ a cn:CompoundName ;
rdf:value "Nicholas" ;
] ,
[
- a ;
+ a cn:CompoundName ;
rdf:value "John" ;
] ;
] ;
- schema:worksFor ;
+ sdo:worksFor ;
.
- a schema:Organization ;
- schema:location ;
+ a sdo:Organization ;
+ sdo:location ;
.
- a schema:Place ;
- schema:address
+ a sdo:Place ;
+ sdo:address
[
- a schema:PostalAddress ;
- schema:addressCountry
+ a sdo:PostalAddress ;
+ sdo:addressCountry
[
- schema:identifier "au" ;
- schema:name "Australia" ;
+ sdo:identifier "au" ;
+ sdo:name "Australia" ;
] ;
- schema:addressLocality "Shorncliffe" ;
- schema:addressRegion "QLD" ;
- schema:postalCode 4017 ;
- schema:streetAddress (
+ sdo:addressLocality "Shorncliffe" ;
+ sdo:addressRegion "QLD" ;
+ sdo:postalCode 4017 ;
+ sdo:streetAddress (
72
"Yundah"
"Street"
) ;
] ;
- schema:geo
+ sdo:geo
[
- schema:polygon "POLYGON((153.082403 -27.325801, 153.08241 -27.32582, 153.082943 -27.325612, 153.083010 -27.325742, 153.083543 -27.325521, 153.083456 -27.325365, 153.082403 -27.325801))"^^geo:wktLiteral ;
+ sdo:polygon "POLYGON((153.082403 -27.325801, 153.08241 -27.32582, 153.082943 -27.325612, 153.083010 -27.325742, 153.083543 -27.325521, 153.083456 -27.325365, 153.082403 -27.325801))"^^geo:wktLiteral ;
] ;
- schema:name "KurrawongAI HQ" ;
+ sdo:name "KurrawongAI HQ" ;
.
diff --git a/test/test_graph/test_graph.py b/test/test_graph/test_graph.py
index 639aa710c..0e8227042 100644
--- a/test/test_graph/test_graph.py
+++ b/test/test_graph/test_graph.py
@@ -399,7 +399,7 @@ def test_guess_format_for_parse_http_text_plain():
assert len(graph) > 0
# A url that returns content-type text/html.
- url = "https://github.com/RDFLib/rdflib/issues/2734"
+ url = "https://www.w3.org/TR/REC-rdf-syntax/"
with pytest.raises(PluginException):
graph = Graph().parse(url)
diff --git a/test/test_serializers/test_serializer_longturtle.py b/test/test_serializers/test_serializer_longturtle.py
index c1761b6da..65821784e 100644
--- a/test/test_serializers/test_serializer_longturtle.py
+++ b/test/test_serializers/test_serializer_longturtle.py
@@ -167,7 +167,7 @@ def test_longturtle():
g.bind("sdo", SDO)
# run the long turtle serializer
- output = g.serialize(format="longturtle")
+ output = g.serialize(format="longturtle", canon=True)
# fix the target
current_dir = Path.cwd() # Get the current directory
diff --git a/test/test_serializers/test_serializer_longturtle_sort.py b/test/test_serializers/test_serializer_longturtle_sort.py
index 0e397afaf..044660e3e 100644
--- a/test/test_serializers/test_serializer_longturtle_sort.py
+++ b/test/test_serializers/test_serializer_longturtle_sort.py
@@ -62,55 +62,55 @@ def test_sort_semiblank_graph() -> None:
graph.add((outer_node, EX.has, inner_node))
graph.add((inner_node, RDFS.seeAlso, nested))
- graph_text = graph.serialize(format="longturtle", sort=True)
+ graph_text = graph.serialize(format="longturtle", canon=True)
if first_graph_text == "":
first_graph_text = graph_text
serialization_counter[graph_text] += 1
expected_serialization = """\
-PREFIX ns1:
+PREFIX ex:
PREFIX rdfs:
-ns1:A
+ex:A
rdfs:comment "Thing A" ;
.
-ns1:C
+ex:C
rdfs:comment "Thing C" ;
.
-ns1:B
+ex:B
rdfs:comment "Thing B" ;
.
-[] ns1:has
+[] ex:has
[
- rdfs:seeAlso ns1:A ;
+ rdfs:seeAlso ex:A ;
] ;
.
-[] rdfs:seeAlso ns1:B ;
+[] rdfs:seeAlso ex:B ;
.
-[] ns1:has
+[] ex:has
[
- rdfs:seeAlso ns1:C ;
+ rdfs:seeAlso ex:C ;
] ;
.
-[] rdfs:seeAlso ns1:A ;
+[] rdfs:seeAlso ex:A ;
.
-[] rdfs:seeAlso ns1:C ;
+[] rdfs:seeAlso ex:C ;
.
-[] rdfs:seeAlso ns1:B ;
+[] rdfs:seeAlso ex:B ;
.
-[] ns1:has
+[] ex:has
[
- rdfs:seeAlso ns1:B ;
+ rdfs:seeAlso ex:B ;
] ;
.