diff --git a/rdflib/graph.py b/rdflib/graph.py index 857491a2e..f9e00f2f9 100644 --- a/rdflib/graph.py +++ b/rdflib/graph.py @@ -355,6 +355,11 @@ _TripleOrQuadSelectorType = Union["_TripleSelectorType", "_QuadSelectorType"] _TriplePathType = Tuple["_SubjectType", Path, "_ObjectType"] _TripleOrTriplePathType = Union["_TripleType", "_TriplePathType"] +_TripleChoiceType = Union[ + Tuple[List[_SubjectType], Optional[_PredicateType], Optional[_ObjectType]], + Tuple[Optional[_SubjectType], List[_PredicateType], Optional[_ObjectType]], + Tuple[Optional[_SubjectType], Optional[_PredicateType], List[_ObjectType]], +] _GraphT = TypeVar("_GraphT", bound="Graph") _ConjunctiveGraphT = TypeVar("_ConjunctiveGraphT", bound="ConjunctiveGraph") @@ -994,11 +999,7 @@ def predicate_objects( def triples_choices( self, - triple: Union[ - Tuple[List[_SubjectType], _PredicateType, _ObjectType], - Tuple[_SubjectType, List[_PredicateType], _ObjectType], - Tuple[_SubjectType, _PredicateType, List[_ObjectType]], - ], + triple: _TripleChoiceType, context: Optional[_ContextType] = None, ) -> Generator[_TripleType, None, None]: subject, predicate, object_ = triple @@ -2196,11 +2197,7 @@ def quads( def triples_choices( self, - triple: Union[ - Tuple[List[_SubjectType], _PredicateType, _ObjectType], - Tuple[_SubjectType, List[_PredicateType], _ObjectType], - Tuple[_SubjectType, _PredicateType, List[_ObjectType]], - ], + triple: _TripleChoiceType, context: Optional[_ContextType] = None, ) -> Generator[_TripleType, None, None]: """Iterate over all the triples in the entire conjunctive graph""" @@ -2946,11 +2943,7 @@ def __isub__(self: _GraphT, other: Iterable[_TripleType]) -> NoReturn: def triples_choices( self, - triple: Union[ - Tuple[List[_SubjectType], _PredicateType, _ObjectType], - Tuple[_SubjectType, List[_PredicateType], _ObjectType], - Tuple[_SubjectType, _PredicateType, List[_ObjectType]], - ], + triple: _TripleChoiceType, context: Optional[_ContextType] = None, ) -> Generator[_TripleType, None, None]: subject, predicate, object_ = triple diff --git a/rdflib/plugins/parsers/rdfxml.py b/rdflib/plugins/parsers/rdfxml.py index e0f6e05fa..54fc69567 100644 --- a/rdflib/plugins/parsers/rdfxml.py +++ b/rdflib/plugins/parsers/rdfxml.py @@ -298,7 +298,8 @@ def document_element_start( self, name: Tuple[str, str], qname, attrs: AttributesImpl ) -> None: if name[0] and URIRef("".join(name)) == RDFVOC.RDF: - next = self.next + # Cheap hack so 2to3 doesn't turn it into __next__ + next = getattr(self, "next") next.start = self.node_element_start next.end = self.node_element_end else: @@ -315,7 +316,8 @@ def node_element_start( current = self.current absolutize = self.absolutize - next = self.next + # Cheap hack so 2to3 doesn't turn it into __next__ + next = getattr(self, "next") next.start = self.property_element_start next.end = self.property_element_end @@ -408,7 +410,8 @@ def property_element_start( current = self.current absolutize = self.absolutize - next = self.next + # Cheap hack so 2to3 doesn't turn it into __next__ + next = getattr(self, "next") object: Optional[_ObjectType] = None current.data = None current.list = None diff --git a/rdflib/plugins/serializers/longturtle.py b/rdflib/plugins/serializers/longturtle.py index 8de1e52a2..2aaed36e6 100644 --- a/rdflib/plugins/serializers/longturtle.py +++ b/rdflib/plugins/serializers/longturtle.py @@ -39,21 +39,20 @@ class LongTurtleSerializer(RecursiveSerializer): + """LongTurtle, a Turtle serialization format. + + When the optional parameter ``canon`` is set to :py:obj:`True`, the graph is canonicalized + before serialization. This normalizes blank node identifiers and allows for + deterministic serialization of the graph. Useful when consistent outputs are required. + """ + short_name = "longturtle" indentString = " " def __init__(self, store): self._ns_rewrite = {} - store = to_canonical_graph(store) - content = store.serialize(format="application/n-triples") - lines = content.split("\n") - lines.sort() - graph = Graph() - graph.parse( - data="\n".join(lines), format="application/n-triples", skolemize=True - ) - graph = graph.de_skolemize() - super(LongTurtleSerializer, self).__init__(graph) + self._canon = False + super(LongTurtleSerializer, self).__init__(store) self.keywords = {RDF.type: "a"} self.reset() self.stream = None @@ -83,11 +82,34 @@ def addNamespace(self, prefix, namespace): super(LongTurtleSerializer, self).addNamespace(prefix, namespace) return prefix + def canonize(self): + """Apply canonicalization to the store. + + This normalizes blank node identifiers and allows for deterministic + serialization of the graph. + """ + if not self._canon: + return + + namespace_manager = self.store.namespace_manager + store = to_canonical_graph(self.store) + content = store.serialize(format="application/n-triples") + lines = content.split("\n") + lines.sort() + graph = Graph() + graph.parse( + data="\n".join(lines), format="application/n-triples", skolemize=True + ) + graph = graph.de_skolemize() + graph.namespace_manager = namespace_manager + self.store = graph + def reset(self): super(LongTurtleSerializer, self).reset() self._shortNames = {} self._started = False self._ns_rewrite = {} + self.canonize() def serialize( self, @@ -97,6 +119,7 @@ def serialize( spacious: Optional[bool] = None, **kwargs: Any, ) -> None: + self._canon = kwargs.get("canon", False) self.reset() self.stream = stream # if base is given here, use, if not and a base is set for the graph use that diff --git a/rdflib/plugins/stores/berkeleydb.py b/rdflib/plugins/stores/berkeleydb.py index 872dc368e..12009787c 100644 --- a/rdflib/plugins/stores/berkeleydb.py +++ b/rdflib/plugins/stores/berkeleydb.py @@ -428,7 +428,8 @@ def remove( # type: ignore[override] cursor = index.cursor(txn=txn) try: cursor.set_range(key) - current = cursor.next + # Hack to stop 2to3 converting this to next(cursor) + current = getattr(cursor, "next")() except db.DBNotFoundError: current = None cursor.close() @@ -505,7 +506,8 @@ def triples( cursor = index.cursor(txn=txn) try: cursor.set_range(key) - current = cursor.next + # Cheap hack so 2to3 doesn't convert to next(cursor) + current = getattr(cursor, "next")() except db.DBNotFoundError: current = None cursor.close() @@ -537,7 +539,8 @@ def __len__(self, context: Optional[_ContextType] = None) -> int: key, value = current if key.startswith(prefix): count += 1 - current = cursor.next + # Hack to stop 2to3 converting this to next(cursor) + current = getattr(cursor, "next")() else: break cursor.close() @@ -590,7 +593,8 @@ def namespaces(self) -> Generator[Tuple[str, URIRef], None, None]: while current: prefix, namespace = current results.append((prefix.decode("utf-8"), namespace.decode("utf-8"))) - current = cursor.next + # Hack to stop 2to3 converting this to next(cursor) + current = getattr(cursor, "next")() cursor.close() for prefix, namespace in results: yield prefix, URIRef(namespace) @@ -633,7 +637,8 @@ def contexts( cursor = index.cursor() try: cursor.set_range(key) - current = cursor.next + # Hack to stop 2to3 converting this to next(cursor) + current = getattr(cursor, "next")() except db.DBNotFoundError: current = None cursor.close() diff --git a/rdflib/plugins/stores/sparqlstore.py b/rdflib/plugins/stores/sparqlstore.py index f9827cf94..e7a9723e8 100644 --- a/rdflib/plugins/stores/sparqlstore.py +++ b/rdflib/plugins/stores/sparqlstore.py @@ -35,6 +35,7 @@ _TripleType, _ContextType, _QuadType, + _TripleChoiceType, _TriplePatternType, _SubjectType, _PredicateType, @@ -367,11 +368,7 @@ def triples( # type: ignore[override] def triples_choices( self, - _: Tuple[ - Union[_SubjectType, List[_SubjectType]], - Union[_PredicateType, List[_PredicateType]], - Union[_ObjectType, List[_ObjectType]], - ], + _: _TripleChoiceType, context: Optional[_ContextType] = None, ) -> Generator[ Tuple[ diff --git a/rdflib/store.py b/rdflib/store.py index 2ca03529a..9cada631d 100644 --- a/rdflib/store.py +++ b/rdflib/store.py @@ -36,7 +36,6 @@ Generator, Iterable, Iterator, - List, Mapping, Optional, Tuple, @@ -49,10 +48,8 @@ from rdflib.graph import ( Graph, _ContextType, - _ObjectType, - _PredicateType, _QuadType, - _SubjectType, + _TripleChoiceType, _TriplePatternType, _TripleType, ) @@ -281,11 +278,7 @@ def remove( def triples_choices( self, - triple: Union[ - Tuple[List[_SubjectType], _PredicateType, _ObjectType], - Tuple[_SubjectType, List[_PredicateType], _ObjectType], - Tuple[_SubjectType, _PredicateType, List[_ObjectType]], - ], + triple: _TripleChoiceType, context: Optional[_ContextType] = None, ) -> Generator[ Tuple[ diff --git a/test/data/longturtle/longturtle-target.ttl b/test/data/longturtle/longturtle-target.ttl index 54cf23e9f..b9df06e75 100644 --- a/test/data/longturtle/longturtle-target.ttl +++ b/test/data/longturtle/longturtle-target.ttl @@ -1,72 +1,74 @@ +PREFIX cn: +PREFIX ex: PREFIX geo: PREFIX rdf: -PREFIX schema: +PREFIX sdo: PREFIX xsd: - - a schema:Person ; - schema:age 41 ; - schema:alternateName +ex:nicholas + a sdo:Person ; + sdo:age 41 ; + sdo:alternateName [ - schema:name "Dr N.J. Car" ; + sdo:name "Dr N.J. Car" ; ] , "N.J. Car" , "Nick Car" ; - schema:name + sdo:name [ - a ; - schema:hasPart + a cn:CompoundName ; + sdo:hasPart [ - a ; - schema:hasPart + a cn:CompoundName ; + sdo:hasPart [ - a ; + a cn:CompoundName ; rdf:value "Car" ; ] , [ - a ; + a cn:CompoundName ; rdf:value "Maxov" ; ] ; ] , [ - a ; + a cn:CompoundName ; rdf:value "Nicholas" ; ] , [ - a ; + a cn:CompoundName ; rdf:value "John" ; ] ; ] ; - schema:worksFor ; + sdo:worksFor ; . - a schema:Organization ; - schema:location ; + a sdo:Organization ; + sdo:location ; . - a schema:Place ; - schema:address + a sdo:Place ; + sdo:address [ - a schema:PostalAddress ; - schema:addressCountry + a sdo:PostalAddress ; + sdo:addressCountry [ - schema:identifier "au" ; - schema:name "Australia" ; + sdo:identifier "au" ; + sdo:name "Australia" ; ] ; - schema:addressLocality "Shorncliffe" ; - schema:addressRegion "QLD" ; - schema:postalCode 4017 ; - schema:streetAddress ( + sdo:addressLocality "Shorncliffe" ; + sdo:addressRegion "QLD" ; + sdo:postalCode 4017 ; + sdo:streetAddress ( 72 "Yundah" "Street" ) ; ] ; - schema:geo + sdo:geo [ - schema:polygon "POLYGON((153.082403 -27.325801, 153.08241 -27.32582, 153.082943 -27.325612, 153.083010 -27.325742, 153.083543 -27.325521, 153.083456 -27.325365, 153.082403 -27.325801))"^^geo:wktLiteral ; + sdo:polygon "POLYGON((153.082403 -27.325801, 153.08241 -27.32582, 153.082943 -27.325612, 153.083010 -27.325742, 153.083543 -27.325521, 153.083456 -27.325365, 153.082403 -27.325801))"^^geo:wktLiteral ; ] ; - schema:name "KurrawongAI HQ" ; + sdo:name "KurrawongAI HQ" ; . diff --git a/test/test_graph/test_graph.py b/test/test_graph/test_graph.py index 639aa710c..0e8227042 100644 --- a/test/test_graph/test_graph.py +++ b/test/test_graph/test_graph.py @@ -399,7 +399,7 @@ def test_guess_format_for_parse_http_text_plain(): assert len(graph) > 0 # A url that returns content-type text/html. - url = "https://github.com/RDFLib/rdflib/issues/2734" + url = "https://www.w3.org/TR/REC-rdf-syntax/" with pytest.raises(PluginException): graph = Graph().parse(url) diff --git a/test/test_serializers/test_serializer_longturtle.py b/test/test_serializers/test_serializer_longturtle.py index c1761b6da..65821784e 100644 --- a/test/test_serializers/test_serializer_longturtle.py +++ b/test/test_serializers/test_serializer_longturtle.py @@ -167,7 +167,7 @@ def test_longturtle(): g.bind("sdo", SDO) # run the long turtle serializer - output = g.serialize(format="longturtle") + output = g.serialize(format="longturtle", canon=True) # fix the target current_dir = Path.cwd() # Get the current directory diff --git a/test/test_serializers/test_serializer_longturtle_sort.py b/test/test_serializers/test_serializer_longturtle_sort.py index 0e397afaf..044660e3e 100644 --- a/test/test_serializers/test_serializer_longturtle_sort.py +++ b/test/test_serializers/test_serializer_longturtle_sort.py @@ -62,55 +62,55 @@ def test_sort_semiblank_graph() -> None: graph.add((outer_node, EX.has, inner_node)) graph.add((inner_node, RDFS.seeAlso, nested)) - graph_text = graph.serialize(format="longturtle", sort=True) + graph_text = graph.serialize(format="longturtle", canon=True) if first_graph_text == "": first_graph_text = graph_text serialization_counter[graph_text] += 1 expected_serialization = """\ -PREFIX ns1: +PREFIX ex: PREFIX rdfs: -ns1:A +ex:A rdfs:comment "Thing A" ; . -ns1:C +ex:C rdfs:comment "Thing C" ; . -ns1:B +ex:B rdfs:comment "Thing B" ; . -[] ns1:has +[] ex:has [ - rdfs:seeAlso ns1:A ; + rdfs:seeAlso ex:A ; ] ; . -[] rdfs:seeAlso ns1:B ; +[] rdfs:seeAlso ex:B ; . -[] ns1:has +[] ex:has [ - rdfs:seeAlso ns1:C ; + rdfs:seeAlso ex:C ; ] ; . -[] rdfs:seeAlso ns1:A ; +[] rdfs:seeAlso ex:A ; . -[] rdfs:seeAlso ns1:C ; +[] rdfs:seeAlso ex:C ; . -[] rdfs:seeAlso ns1:B ; +[] rdfs:seeAlso ex:B ; . -[] ns1:has +[] ex:has [ - rdfs:seeAlso ns1:B ; + rdfs:seeAlso ex:B ; ] ; .