Skip to content

Commit c580c00

Browse files
edmondchucnicholascarddeschepperslahn
authored
Merge 7-maintenance branch into 7.x (#3222)
* 7.1.4 pre-release * fix namespace prefixes in longturtle serialization (#3134) Co-authored-by: Daan de Schepper <[email protected]> * Fix failing webtest (#3194) * test: fix failing webtest Fixes #3192 * Revert "remove old hacks against 2to3 (#3076)" (#3195) This reverts commit b74c657. * Specify `Optional` parameters in `Graph.triples_choices` (#3075) * Specify `Optional` parameters in `Graph.triples_choices` The two non-list parameters can be `None`, but this is not reflected in the type hint. Also introduces a type alias to simplify method signatures. * style: remove unused imports --------- Co-authored-by: Nicholas Car <[email protected]> Co-authored-by: Edmond Chuc <[email protected]> Co-authored-by: Edmond Chuc <[email protected]> * feat: canonicalization with longturtle serializer now optional (#3197) * feat: canonicalization with longturtle serializer now optional Fixes #3196 * docs: fix docs build error by removing py obj reference to canon --------- Co-authored-by: Nicholas Car <[email protected]> Co-authored-by: Daan de Schepper <[email protected]> Co-authored-by: Sigmund Lahn <[email protected]> Co-authored-by: Nicholas Car <[email protected]>
1 parent 721164c commit c580c00

File tree

10 files changed

+111
-95
lines changed

10 files changed

+111
-95
lines changed

rdflib/graph.py

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,11 @@
355355
_TripleOrQuadSelectorType = Union["_TripleSelectorType", "_QuadSelectorType"]
356356
_TriplePathType = Tuple["_SubjectType", Path, "_ObjectType"]
357357
_TripleOrTriplePathType = Union["_TripleType", "_TriplePathType"]
358+
_TripleChoiceType = Union[
359+
Tuple[List[_SubjectType], Optional[_PredicateType], Optional[_ObjectType]],
360+
Tuple[Optional[_SubjectType], List[_PredicateType], Optional[_ObjectType]],
361+
Tuple[Optional[_SubjectType], Optional[_PredicateType], List[_ObjectType]],
362+
]
358363

359364
_GraphT = TypeVar("_GraphT", bound="Graph")
360365
_ConjunctiveGraphT = TypeVar("_ConjunctiveGraphT", bound="ConjunctiveGraph")
@@ -994,11 +999,7 @@ def predicate_objects(
994999

9951000
def triples_choices(
9961001
self,
997-
triple: Union[
998-
Tuple[List[_SubjectType], _PredicateType, _ObjectType],
999-
Tuple[_SubjectType, List[_PredicateType], _ObjectType],
1000-
Tuple[_SubjectType, _PredicateType, List[_ObjectType]],
1001-
],
1002+
triple: _TripleChoiceType,
10021003
context: Optional[_ContextType] = None,
10031004
) -> Generator[_TripleType, None, None]:
10041005
subject, predicate, object_ = triple
@@ -2196,11 +2197,7 @@ def quads(
21962197

21972198
def triples_choices(
21982199
self,
2199-
triple: Union[
2200-
Tuple[List[_SubjectType], _PredicateType, _ObjectType],
2201-
Tuple[_SubjectType, List[_PredicateType], _ObjectType],
2202-
Tuple[_SubjectType, _PredicateType, List[_ObjectType]],
2203-
],
2200+
triple: _TripleChoiceType,
22042201
context: Optional[_ContextType] = None,
22052202
) -> Generator[_TripleType, None, None]:
22062203
"""Iterate over all the triples in the entire conjunctive graph"""
@@ -2946,11 +2943,7 @@ def __isub__(self: _GraphT, other: Iterable[_TripleType]) -> NoReturn:
29462943

29472944
def triples_choices(
29482945
self,
2949-
triple: Union[
2950-
Tuple[List[_SubjectType], _PredicateType, _ObjectType],
2951-
Tuple[_SubjectType, List[_PredicateType], _ObjectType],
2952-
Tuple[_SubjectType, _PredicateType, List[_ObjectType]],
2953-
],
2946+
triple: _TripleChoiceType,
29542947
context: Optional[_ContextType] = None,
29552948
) -> Generator[_TripleType, None, None]:
29562949
subject, predicate, object_ = triple

rdflib/plugins/parsers/rdfxml.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,8 @@ def document_element_start(
298298
self, name: Tuple[str, str], qname, attrs: AttributesImpl
299299
) -> None:
300300
if name[0] and URIRef("".join(name)) == RDFVOC.RDF:
301-
next = self.next
301+
# Cheap hack so 2to3 doesn't turn it into __next__
302+
next = getattr(self, "next")
302303
next.start = self.node_element_start
303304
next.end = self.node_element_end
304305
else:
@@ -315,7 +316,8 @@ def node_element_start(
315316
current = self.current
316317
absolutize = self.absolutize
317318

318-
next = self.next
319+
# Cheap hack so 2to3 doesn't turn it into __next__
320+
next = getattr(self, "next")
319321
next.start = self.property_element_start
320322
next.end = self.property_element_end
321323

@@ -408,7 +410,8 @@ def property_element_start(
408410
current = self.current
409411
absolutize = self.absolutize
410412

411-
next = self.next
413+
# Cheap hack so 2to3 doesn't turn it into __next__
414+
next = getattr(self, "next")
412415
object: Optional[_ObjectType] = None
413416
current.data = None
414417
current.list = None

rdflib/plugins/serializers/longturtle.py

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,21 +39,20 @@
3939

4040

4141
class LongTurtleSerializer(RecursiveSerializer):
42+
"""LongTurtle, a Turtle serialization format.
43+
44+
When the optional parameter ``canon`` is set to :py:obj:`True`, the graph is canonicalized
45+
before serialization. This normalizes blank node identifiers and allows for
46+
deterministic serialization of the graph. Useful when consistent outputs are required.
47+
"""
48+
4249
short_name = "longturtle"
4350
indentString = " "
4451

4552
def __init__(self, store):
4653
self._ns_rewrite = {}
47-
store = to_canonical_graph(store)
48-
content = store.serialize(format="application/n-triples")
49-
lines = content.split("\n")
50-
lines.sort()
51-
graph = Graph()
52-
graph.parse(
53-
data="\n".join(lines), format="application/n-triples", skolemize=True
54-
)
55-
graph = graph.de_skolemize()
56-
super(LongTurtleSerializer, self).__init__(graph)
54+
self._canon = False
55+
super(LongTurtleSerializer, self).__init__(store)
5756
self.keywords = {RDF.type: "a"}
5857
self.reset()
5958
self.stream = None
@@ -83,11 +82,34 @@ def addNamespace(self, prefix, namespace):
8382
super(LongTurtleSerializer, self).addNamespace(prefix, namespace)
8483
return prefix
8584

85+
def canonize(self):
86+
"""Apply canonicalization to the store.
87+
88+
This normalizes blank node identifiers and allows for deterministic
89+
serialization of the graph.
90+
"""
91+
if not self._canon:
92+
return
93+
94+
namespace_manager = self.store.namespace_manager
95+
store = to_canonical_graph(self.store)
96+
content = store.serialize(format="application/n-triples")
97+
lines = content.split("\n")
98+
lines.sort()
99+
graph = Graph()
100+
graph.parse(
101+
data="\n".join(lines), format="application/n-triples", skolemize=True
102+
)
103+
graph = graph.de_skolemize()
104+
graph.namespace_manager = namespace_manager
105+
self.store = graph
106+
86107
def reset(self):
87108
super(LongTurtleSerializer, self).reset()
88109
self._shortNames = {}
89110
self._started = False
90111
self._ns_rewrite = {}
112+
self.canonize()
91113

92114
def serialize(
93115
self,
@@ -97,6 +119,7 @@ def serialize(
97119
spacious: Optional[bool] = None,
98120
**kwargs: Any,
99121
) -> None:
122+
self._canon = kwargs.get("canon", False)
100123
self.reset()
101124
self.stream = stream
102125
# if base is given here, use, if not and a base is set for the graph use that

rdflib/plugins/stores/berkeleydb.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,8 @@ def remove( # type: ignore[override]
428428
cursor = index.cursor(txn=txn)
429429
try:
430430
cursor.set_range(key)
431-
current = cursor.next
431+
# Hack to stop 2to3 converting this to next(cursor)
432+
current = getattr(cursor, "next")()
432433
except db.DBNotFoundError:
433434
current = None
434435
cursor.close()
@@ -505,7 +506,8 @@ def triples(
505506
cursor = index.cursor(txn=txn)
506507
try:
507508
cursor.set_range(key)
508-
current = cursor.next
509+
# Cheap hack so 2to3 doesn't convert to next(cursor)
510+
current = getattr(cursor, "next")()
509511
except db.DBNotFoundError:
510512
current = None
511513
cursor.close()
@@ -537,7 +539,8 @@ def __len__(self, context: Optional[_ContextType] = None) -> int:
537539
key, value = current
538540
if key.startswith(prefix):
539541
count += 1
540-
current = cursor.next
542+
# Hack to stop 2to3 converting this to next(cursor)
543+
current = getattr(cursor, "next")()
541544
else:
542545
break
543546
cursor.close()
@@ -590,7 +593,8 @@ def namespaces(self) -> Generator[Tuple[str, URIRef], None, None]:
590593
while current:
591594
prefix, namespace = current
592595
results.append((prefix.decode("utf-8"), namespace.decode("utf-8")))
593-
current = cursor.next
596+
# Hack to stop 2to3 converting this to next(cursor)
597+
current = getattr(cursor, "next")()
594598
cursor.close()
595599
for prefix, namespace in results:
596600
yield prefix, URIRef(namespace)
@@ -633,7 +637,8 @@ def contexts(
633637
cursor = index.cursor()
634638
try:
635639
cursor.set_range(key)
636-
current = cursor.next
640+
# Hack to stop 2to3 converting this to next(cursor)
641+
current = getattr(cursor, "next")()
637642
except db.DBNotFoundError:
638643
current = None
639644
cursor.close()

rdflib/plugins/stores/sparqlstore.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
_TripleType,
3636
_ContextType,
3737
_QuadType,
38+
_TripleChoiceType,
3839
_TriplePatternType,
3940
_SubjectType,
4041
_PredicateType,
@@ -367,11 +368,7 @@ def triples( # type: ignore[override]
367368

368369
def triples_choices(
369370
self,
370-
_: Tuple[
371-
Union[_SubjectType, List[_SubjectType]],
372-
Union[_PredicateType, List[_PredicateType]],
373-
Union[_ObjectType, List[_ObjectType]],
374-
],
371+
_: _TripleChoiceType,
375372
context: Optional[_ContextType] = None,
376373
) -> Generator[
377374
Tuple[

rdflib/store.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
Generator,
3737
Iterable,
3838
Iterator,
39-
List,
4039
Mapping,
4140
Optional,
4241
Tuple,
@@ -49,10 +48,8 @@
4948
from rdflib.graph import (
5049
Graph,
5150
_ContextType,
52-
_ObjectType,
53-
_PredicateType,
5451
_QuadType,
55-
_SubjectType,
52+
_TripleChoiceType,
5653
_TriplePatternType,
5754
_TripleType,
5855
)
@@ -281,11 +278,7 @@ def remove(
281278

282279
def triples_choices(
283280
self,
284-
triple: Union[
285-
Tuple[List[_SubjectType], _PredicateType, _ObjectType],
286-
Tuple[_SubjectType, List[_PredicateType], _ObjectType],
287-
Tuple[_SubjectType, _PredicateType, List[_ObjectType]],
288-
],
281+
triple: _TripleChoiceType,
289282
context: Optional[_ContextType] = None,
290283
) -> Generator[
291284
Tuple[
Lines changed: 33 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,72 +1,74 @@
1+
PREFIX cn: <https://linked.data.gov.au/def/cn/>
2+
PREFIX ex: <http://example.com/>
13
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
24
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
3-
PREFIX schema: <https://schema.org/>
5+
PREFIX sdo: <https://schema.org/>
46
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
57

6-
<http://example.com/nicholas>
7-
a schema:Person ;
8-
schema:age 41 ;
9-
schema:alternateName
8+
ex:nicholas
9+
a sdo:Person ;
10+
sdo:age 41 ;
11+
sdo:alternateName
1012
[
11-
schema:name "Dr N.J. Car" ;
13+
sdo:name "Dr N.J. Car" ;
1214
] ,
1315
"N.J. Car" ,
1416
"Nick Car" ;
15-
schema:name
17+
sdo:name
1618
[
17-
a <https://linked.data.gov.au/def/cn/CompoundName> ;
18-
schema:hasPart
19+
a cn:CompoundName ;
20+
sdo:hasPart
1921
[
20-
a <https://linked.data.gov.au/def/cn/CompoundName> ;
21-
schema:hasPart
22+
a cn:CompoundName ;
23+
sdo:hasPart
2224
[
23-
a <https://linked.data.gov.au/def/cn/CompoundName> ;
25+
a cn:CompoundName ;
2426
rdf:value "Car" ;
2527
] ,
2628
[
27-
a <https://linked.data.gov.au/def/cn/CompoundName> ;
29+
a cn:CompoundName ;
2830
rdf:value "Maxov" ;
2931
] ;
3032
] ,
3133
[
32-
a <https://linked.data.gov.au/def/cn/CompoundName> ;
34+
a cn:CompoundName ;
3335
rdf:value "Nicholas" ;
3436
] ,
3537
[
36-
a <https://linked.data.gov.au/def/cn/CompoundName> ;
38+
a cn:CompoundName ;
3739
rdf:value "John" ;
3840
] ;
3941
] ;
40-
schema:worksFor <https://kurrawong.ai> ;
42+
sdo:worksFor <https://kurrawong.ai> ;
4143
.
4244

4345
<https://kurrawong.ai>
44-
a schema:Organization ;
45-
schema:location <https://kurrawong.ai/hq> ;
46+
a sdo:Organization ;
47+
sdo:location <https://kurrawong.ai/hq> ;
4648
.
4749

4850
<https://kurrawong.ai/hq>
49-
a schema:Place ;
50-
schema:address
51+
a sdo:Place ;
52+
sdo:address
5153
[
52-
a schema:PostalAddress ;
53-
schema:addressCountry
54+
a sdo:PostalAddress ;
55+
sdo:addressCountry
5456
[
55-
schema:identifier "au" ;
56-
schema:name "Australia" ;
57+
sdo:identifier "au" ;
58+
sdo:name "Australia" ;
5759
] ;
58-
schema:addressLocality "Shorncliffe" ;
59-
schema:addressRegion "QLD" ;
60-
schema:postalCode 4017 ;
61-
schema:streetAddress (
60+
sdo:addressLocality "Shorncliffe" ;
61+
sdo:addressRegion "QLD" ;
62+
sdo:postalCode 4017 ;
63+
sdo:streetAddress (
6264
72
6365
"Yundah"
6466
"Street"
6567
) ;
6668
] ;
67-
schema:geo
69+
sdo:geo
6870
[
69-
schema:polygon "POLYGON((153.082403 -27.325801, 153.08241 -27.32582, 153.082943 -27.325612, 153.083010 -27.325742, 153.083543 -27.325521, 153.083456 -27.325365, 153.082403 -27.325801))"^^geo:wktLiteral ;
71+
sdo:polygon "POLYGON((153.082403 -27.325801, 153.08241 -27.32582, 153.082943 -27.325612, 153.083010 -27.325742, 153.083543 -27.325521, 153.083456 -27.325365, 153.082403 -27.325801))"^^geo:wktLiteral ;
7072
] ;
71-
schema:name "KurrawongAI HQ" ;
73+
sdo:name "KurrawongAI HQ" ;
7274
.

test/test_graph/test_graph.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,7 @@ def test_guess_format_for_parse_http_text_plain():
399399
assert len(graph) > 0
400400

401401
# A url that returns content-type text/html.
402-
url = "https://github.com/RDFLib/rdflib/issues/2734"
402+
url = "https://www.w3.org/TR/REC-rdf-syntax/"
403403
with pytest.raises(PluginException):
404404
graph = Graph().parse(url)
405405

test/test_serializers/test_serializer_longturtle.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ def test_longturtle():
167167
g.bind("sdo", SDO)
168168

169169
# run the long turtle serializer
170-
output = g.serialize(format="longturtle")
170+
output = g.serialize(format="longturtle", canon=True)
171171

172172
# fix the target
173173
current_dir = Path.cwd() # Get the current directory

0 commit comments

Comments
 (0)