Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 31 additions & 25 deletions cwltool/command_line_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,9 +200,7 @@ def revmap_file(
outside the container. Recognizes files in the pathmapper or remaps
internal output directories to the external directory.
"""
split = urllib.parse.urlsplit(outdir)
if not split.scheme:
outdir = file_uri(str(outdir))
outdir_uri, outdir_path = file_uri(str(outdir)), outdir

# builder.outdir is the inner (container/compute node) output directory
# outdir is the outer (host/storage system) output directory
Expand Down Expand Up @@ -236,21 +234,20 @@ def revmap_file(
):
f["location"] = revmap_f[1]
elif (
uripath == outdir
or uripath.startswith(outdir + os.sep)
or uripath.startswith(outdir + "/")
uripath == outdir_uri
or uripath.startswith(outdir_uri + os.sep)
or uripath.startswith(outdir_uri + "/")
):
f["location"] = file_uri(path)
f["location"] = uripath
elif (
path == builder.outdir
or path.startswith(builder.outdir + os.sep)
or path.startswith(builder.outdir + "/")
):
f["location"] = builder.fs_access.join(
outdir, path[len(builder.outdir) + 1 :]
joined_path = builder.fs_access.join(
outdir_path, path[len(builder.outdir) + 1 :]
)
elif not os.path.isabs(path):
f["location"] = builder.fs_access.join(outdir, path)
f["location"] = file_uri(joined_path)
else:
raise WorkflowException(
"Output file path %s must be within designated output directory (%s) or an input "
Expand Down Expand Up @@ -1337,6 +1334,15 @@ def collect_output(
)
try:
prefix = fs_access.glob(outdir)
sorted_glob_result = sorted(
fs_access.glob(fs_access.join(outdir, gb)),
key=cmp_to_key(
cast(
Callable[[str, str], int],
locale.strcoll,
)
),
)
r.extend(
[
{
Expand All @@ -1347,24 +1353,24 @@ def collect_output(
g[len(prefix[0]) + 1 :]
),
),
"basename": os.path.basename(g),
"nameroot": os.path.splitext(
os.path.basename(g)
)[0],
"nameext": os.path.splitext(
os.path.basename(g)
)[1],
"basename": decoded_basename,
"nameroot": os.path.splitext(decoded_basename)[
0
],
"nameext": os.path.splitext(decoded_basename)[
1
],
"class": "File"
if fs_access.isfile(g)
else "Directory",
}
for g in sorted(
fs_access.glob(fs_access.join(outdir, gb)),
key=cmp_to_key(
cast(
Callable[[str, str], int],
locale.strcoll,
)
for g, decoded_basename in zip(
sorted_glob_result,
map(
lambda x: os.path.basename(
urllib.parse.unquote(x)
),
sorted_glob_result,
),
)
]
Expand Down
78 changes: 75 additions & 3 deletions tests/test_path_checks.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,19 @@
from pathlib import Path

import pytest
import urllib.parse

from cwltool.main import main
from pathlib import Path
from typing import cast
from ruamel.yaml.comments import CommentedMap
from schema_salad.sourceline import cmap

from cwltool.main import main
from cwltool.command_line_tool import CommandLineTool
from cwltool.context import LoadingContext, RuntimeContext
from cwltool.update import INTERNAL_VERSION
from cwltool.utils import CWLObjectType
from .util import needs_docker


script = """
#!/usr/bin/env cwl-runner
cwlVersion: v1.0
Expand Down Expand Up @@ -95,3 +103,67 @@ def test_unicode_in_output_files(tmp_path: Path, filename: str) -> None:
filename,
]
assert main(params) == 0


def test_clt_returns_specialchar_names(tmp_path: Path) -> None:
"""Confirm that special characters in filenames do not cause problems."""
loading_context = LoadingContext(
{
"metadata": {
"cwlVersion": INTERNAL_VERSION,
"http://commonwl.org/cwltool#original_cwlVersion": INTERNAL_VERSION,
}
}
)
clt = CommandLineTool(
cast(
CommentedMap,
cmap(
{
"cwlVersion": INTERNAL_VERSION,
"class": "CommandLineTool",
"inputs": [],
"outputs": [],
"requirements": [],
}
),
),
loading_context,
)

# Reserved characters will be URL encoded during the creation of a file URI
# Internal references to files are in URI form, and are therefore URL encoded
# Final output files should not retain their URL encoded filenames
rfc_3986_gen_delims = [":", "/", "?", "#", "[", "]", "@"]
rfc_3986_sub_delims = ["!", "$", "&", "'", "(", ")", "*", "+", ",", ";", "="]
unix_reserved = ["/", "\0"]
reserved = [
special_char
for special_char in (rfc_3986_gen_delims + rfc_3986_sub_delims)
if special_char not in unix_reserved
]

# Mock an "output" file with the above special characters in its name
special = "".join(reserved)
output_schema = cast(
CWLObjectType, {"type": "File", "outputBinding": {"glob": special}}
)
mock_output = tmp_path / special
mock_output.touch()

# Prepare minimal arguments for CommandLineTool.collect_output()
builder = clt._init_job({}, RuntimeContext())
builder.pathmapper = clt.make_path_mapper(
builder.files, builder.stagedir, RuntimeContext(), True
)
fs_access = builder.make_fs_access(str(tmp_path))

result = cast(
CWLObjectType,
clt.collect_output(output_schema, builder, str(tmp_path), fs_access),
)

assert result["class"] == "File"
assert result["basename"] == special
assert result["nameroot"] == special
assert str(result["location"]).endswith(urllib.parse.quote(special))