Skip to content

Commit 1a15897

Browse files
mgbergnicholascar
authored andcommitted
Add RDFLib Path to SHACL path utility and corresponding tests (#2990)
* shacl path parser: Add additional test case * shacl utilities: Add new SHACL path building utility with corresponding tests --------- Co-authored-by: Nicholas Car <[email protected]> # Conflicts: # rdflib/extras/shacl.py
1 parent aca8be0 commit 1a15897

File tree

2 files changed

+203
-6
lines changed

2 files changed

+203
-6
lines changed

rdflib/extras/shacl.py

Lines changed: 121 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,28 @@
66

77
from typing import Optional, Union
88

9-
from rdflib import Graph, Literal, URIRef, paths
9+
from rdflib import BNode, Graph, Literal, URIRef, paths
10+
from rdflib.collection import Collection
1011
from rdflib.namespace import RDF, SH
1112
from rdflib.paths import Path
12-
from rdflib.term import Node
13+
14+
if TYPE_CHECKING:
15+
from rdflib.graph import _ObjectType
16+
from rdflib.term import IdentifiedNode
1317

1418

1519
class SHACLPathError(Exception):
1620
pass
1721

1822

23+
# Map the variable length path operators to the corresponding SHACL path predicates
24+
_PATH_MOD_TO_PRED = {
25+
paths.ZeroOrMore: SH.zeroOrMorePath,
26+
paths.OneOrMore: SH.oneOrMorePath,
27+
paths.ZeroOrOne: SH.zeroOrOnePath,
28+
}
29+
30+
1931
# This implementation is roughly based on
2032
# pyshacl.helper.sparql_query_helper::SPARQLQueryHelper._shacl_path_to_sparql_path
2133
def parse_shacl_path(
@@ -91,3 +103,110 @@ def parse_shacl_path(
91103
raise SHACLPathError(f"Cannot parse {repr(path_identifier)} as a SHACL Path.")
92104

93105
return path
106+
107+
108+
def _build_path_component(
109+
graph: Graph, path_component: URIRef | Path
110+
) -> IdentifiedNode:
111+
"""
112+
Helper method that implements the recursive component of SHACL path
113+
triple construction.
114+
115+
:param graph: A :class:`~rdflib.graph.Graph` into which to insert triples
116+
:param graph_component: A :class:`~rdflib.term.URIRef` or
117+
:class:`~rdflib.paths.Path` that is part of a path expression
118+
:return: The :class:`~rdflib.term.IdentifiedNode of the resource in the
119+
graph that corresponds to the provided path_component
120+
"""
121+
# Literals or other types are not allowed
122+
if not isinstance(path_component, (URIRef, Path)):
123+
raise TypeError(
124+
f"Objects of type {type(path_component)} are not valid "
125+
+ "components of a SHACL path."
126+
)
127+
128+
# If the path component is a URI, return it
129+
elif isinstance(path_component, URIRef):
130+
return path_component
131+
# Otherwise, the path component is represented as a blank node
132+
bnode = BNode()
133+
134+
# Handle Sequence Paths
135+
if isinstance(path_component, paths.SequencePath):
136+
# Sequence paths are a Collection directly with at least two items
137+
if len(path_component.args) < 2:
138+
raise SHACLPathError(
139+
"A list of SHACL Sequence Paths must contain at least two path items."
140+
)
141+
Collection(
142+
graph,
143+
bnode,
144+
[_build_path_component(graph, arg) for arg in path_component.args],
145+
)
146+
147+
# Handle Inverse Paths
148+
elif isinstance(path_component, paths.InvPath):
149+
graph.add(
150+
(bnode, SH.inversePath, _build_path_component(graph, path_component.arg))
151+
)
152+
153+
# Handle Alternative Paths
154+
elif isinstance(path_component, paths.AlternativePath):
155+
# Alternative paths are a Collection but referenced by sh:alternativePath
156+
# with at least two items
157+
if len(path_component.args) < 2:
158+
raise SHACLPathError(
159+
"List of SHACL alternate paths must have at least two path items."
160+
)
161+
coll = Collection(
162+
graph,
163+
BNode(),
164+
[_build_path_component(graph, arg) for arg in path_component.args],
165+
)
166+
graph.add((bnode, SH.alternativePath, coll.uri))
167+
168+
# Handle Variable Length Paths
169+
elif isinstance(path_component, paths.MulPath):
170+
# Get the predicate corresponding to the path modifiier
171+
pred = _PATH_MOD_TO_PRED.get(path_component.mod)
172+
if pred is None:
173+
raise SHACLPathError(f"Unknown path modifier {path_component.mod}")
174+
graph.add((bnode, pred, _build_path_component(graph, path_component.path)))
175+
176+
# Return the blank node created for the provided path_component
177+
return bnode
178+
179+
180+
def build_shacl_path(
181+
path: URIRef | Path, target_graph: Graph | None = None
182+
) -> tuple[IdentifiedNode, Graph | None]:
183+
"""
184+
Build the SHACL Path triples for a path given by a :class:`~rdflib.term.URIRef` for
185+
simple paths or a :class:`~rdflib.paths.Path` for complex paths.
186+
187+
Returns an :class:`~rdflib.term.IdentifiedNode` for the path (which should be
188+
the object of a triple with predicate sh:path) and the graph into which any
189+
new triples were added.
190+
191+
:param path: A :class:`~rdflib.term.URIRef` or a :class:`~rdflib.paths.Path`
192+
:param target_graph: Optionally, a :class:`~rdflib.graph.Graph` into which to put
193+
constructed triples. If not provided, a new graph will be created
194+
:return: A (path_identifier, graph) tuple where:
195+
- path_identifier: If path is a :class:`~rdflib.term.URIRef`, this is simply
196+
the provided path. If path is a :class:`~rdflib.paths.Path`, this is
197+
the :class:`~rdflib.term.BNode` corresponding to the root of the SHACL
198+
path expression added to the graph.
199+
- graph: None if path is a :class:`~rdflib.term.URIRef` (as no new triples
200+
are constructed). If path is a :class:`~rdflib.paths.Path`, this is either the
201+
target_graph provided or a new graph into which the path triples were added.
202+
"""
203+
# If a path is a URI, that's the whole path. No graph needs to be constructed.
204+
if isinstance(path, URIRef):
205+
return path, None
206+
207+
# Create a graph if one was not provided
208+
if target_graph is None:
209+
target_graph = Graph()
210+
211+
# Recurse through the path to build the graph representation
212+
return _build_path_component(target_graph, path), target_graph

test/test_extras/test_shacl_extras.py

Lines changed: 82 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@
44

55
import pytest
66

7-
from rdflib import Graph, URIRef
8-
from rdflib.extras.shacl import SHACLPathError, parse_shacl_path
7+
from rdflib import Graph, Literal, URIRef, paths
8+
from rdflib.compare import graph_diff
9+
from rdflib.extras.shacl import SHACLPathError, build_shacl_path, parse_shacl_path
910
from rdflib.namespace import SH, Namespace
1011
from rdflib.paths import Path
1112

@@ -109,7 +110,32 @@ def path_source_data():
109110
) ;
110111
] ;
111112
.
112-
ex:TestPropShape10
113+
ex:TestPropShape10a
114+
sh:path (
115+
[
116+
sh:zeroOrMorePath [
117+
sh:inversePath ex:pred1 ;
118+
] ;
119+
]
120+
[
121+
sh:alternativePath (
122+
[
123+
sh:zeroOrMorePath [
124+
sh:inversePath ex:pred1 ;
125+
] ;
126+
]
127+
ex:pred1
128+
[
129+
sh:oneOrMorePath ex:pred2 ;
130+
]
131+
[
132+
sh:zeroOrMorePath ex:pred3 ;
133+
]
134+
) ;
135+
]
136+
) ;
137+
.
138+
ex:TestPropShape10b
113139
sh:path (
114140
[
115141
sh:zeroOrMorePath [
@@ -192,7 +218,13 @@ def path_source_data():
192218
~EX.pred1 | EX.pred1 / EX.pred2 | EX.pred1 | EX.pred2 | EX.pred3,
193219
),
194220
(
195-
EX.TestPropShape10,
221+
EX.TestPropShape10a,
222+
~EX.pred1
223+
* "*"
224+
/ (~EX.pred1 * "*" | EX.pred1 | EX.pred2 * "+" | EX.pred3 * "*"), # type: ignore[operator]
225+
),
226+
(
227+
EX.TestPropShape10b,
196228
~EX.pred1
197229
* "*"
198230
/ (~EX.pred1 * "*" | EX.pred1 | EX.pred2 * "+" | EX.pred3 * "*"), # type: ignore[operator]
@@ -216,3 +248,49 @@ def test_parse_shacl_path(
216248
parse_shacl_path(path_source_data, path_root) # type: ignore[arg-type]
217249
else:
218250
assert parse_shacl_path(path_source_data, path_root) == expected # type: ignore[arg-type]
251+
252+
253+
@pytest.mark.parametrize(
254+
("resource", "path"),
255+
(
256+
# Single SHACL Path
257+
(EX.TestPropShape1, EX.pred1),
258+
(EX.TestPropShape2a, EX.pred1 / EX.pred2 / EX.pred3),
259+
(EX.TestPropShape3, ~EX.pred1),
260+
(EX.TestPropShape4a, EX.pred1 | EX.pred2 | EX.pred3),
261+
(EX.TestPropShape5, EX.pred1 * "*"), # type: ignore[operator]
262+
(EX.TestPropShape6, EX.pred1 * "+"), # type: ignore[operator]
263+
(EX.TestPropShape7, EX.pred1 * "?"), # type: ignore[operator]
264+
# SHACL Path Combinations
265+
(EX.TestPropShape8, ~EX.pred1 * "*"),
266+
(
267+
EX.TestPropShape10a,
268+
~EX.pred1
269+
* "*"
270+
/ (~EX.pred1 * "*" | EX.pred1 | EX.pred2 * "+" | EX.pred3 * "*"), # type: ignore[operator]
271+
),
272+
(TypeError, Literal("Not a valid path")),
273+
(SHACLPathError, paths.SequencePath(SH.targetClass)),
274+
(SHACLPathError, paths.AlternativePath(SH.targetClass)),
275+
),
276+
)
277+
def test_build_shacl_path(
278+
path_source_data: Graph, resource: URIRef | type, path: Union[URIRef, Path]
279+
):
280+
if isinstance(resource, type):
281+
with pytest.raises(resource):
282+
build_shacl_path(path)
283+
else:
284+
expected_path_root = path_source_data.value(resource, SH.path)
285+
actual_path_root, actual_path_graph = build_shacl_path(path)
286+
if isinstance(expected_path_root, URIRef):
287+
assert actual_path_root == expected_path_root
288+
assert actual_path_graph is None
289+
else:
290+
assert isinstance(actual_path_graph, Graph)
291+
expected_path_graph = path_source_data.cbd(expected_path_root) # type: ignore[arg-type]
292+
in_both, in_first, in_second = graph_diff(
293+
expected_path_graph, actual_path_graph
294+
)
295+
assert len(in_first) == 0
296+
assert len(in_second) == 0

0 commit comments

Comments
 (0)