diff --git a/cwltool/command_line_tool.py b/cwltool/command_line_tool.py index 4554f5014..08b103855 100644 --- a/cwltool/command_line_tool.py +++ b/cwltool/command_line_tool.py @@ -200,9 +200,7 @@ def revmap_file( outside the container. Recognizes files in the pathmapper or remaps internal output directories to the external directory. """ - split = urllib.parse.urlsplit(outdir) - if not split.scheme: - outdir = file_uri(str(outdir)) + outdir_uri, outdir_path = file_uri(str(outdir)), outdir # builder.outdir is the inner (container/compute node) output directory # outdir is the outer (host/storage system) output directory @@ -236,21 +234,20 @@ def revmap_file( ): f["location"] = revmap_f[1] elif ( - uripath == outdir - or uripath.startswith(outdir + os.sep) - or uripath.startswith(outdir + "/") + uripath == outdir_uri + or uripath.startswith(outdir_uri + os.sep) + or uripath.startswith(outdir_uri + "/") ): - f["location"] = file_uri(path) + f["location"] = uripath elif ( path == builder.outdir or path.startswith(builder.outdir + os.sep) or path.startswith(builder.outdir + "/") ): - f["location"] = builder.fs_access.join( - outdir, path[len(builder.outdir) + 1 :] + joined_path = builder.fs_access.join( + outdir_path, path[len(builder.outdir) + 1 :] ) - elif not os.path.isabs(path): - f["location"] = builder.fs_access.join(outdir, path) + f["location"] = file_uri(joined_path) else: raise WorkflowException( "Output file path %s must be within designated output directory (%s) or an input " @@ -1337,6 +1334,15 @@ def collect_output( ) try: prefix = fs_access.glob(outdir) + sorted_glob_result = sorted( + fs_access.glob(fs_access.join(outdir, gb)), + key=cmp_to_key( + cast( + Callable[[str, str], int], + locale.strcoll, + ) + ), + ) r.extend( [ { @@ -1347,24 +1353,24 @@ def collect_output( g[len(prefix[0]) + 1 :] ), ), - "basename": os.path.basename(g), - "nameroot": os.path.splitext( - os.path.basename(g) - )[0], - "nameext": os.path.splitext( - os.path.basename(g) - )[1], + "basename": decoded_basename, + "nameroot": os.path.splitext(decoded_basename)[ + 0 + ], + "nameext": os.path.splitext(decoded_basename)[ + 1 + ], "class": "File" if fs_access.isfile(g) else "Directory", } - for g in sorted( - fs_access.glob(fs_access.join(outdir, gb)), - key=cmp_to_key( - cast( - Callable[[str, str], int], - locale.strcoll, - ) + for g, decoded_basename in zip( + sorted_glob_result, + map( + lambda x: os.path.basename( + urllib.parse.unquote(x) + ), + sorted_glob_result, ), ) ] diff --git a/tests/test_path_checks.py b/tests/test_path_checks.py index e8a300fed..df9d7f6a0 100644 --- a/tests/test_path_checks.py +++ b/tests/test_path_checks.py @@ -1,11 +1,19 @@ -from pathlib import Path - import pytest +import urllib.parse -from cwltool.main import main +from pathlib import Path +from typing import cast +from ruamel.yaml.comments import CommentedMap +from schema_salad.sourceline import cmap +from cwltool.main import main +from cwltool.command_line_tool import CommandLineTool +from cwltool.context import LoadingContext, RuntimeContext +from cwltool.update import INTERNAL_VERSION +from cwltool.utils import CWLObjectType from .util import needs_docker + script = """ #!/usr/bin/env cwl-runner cwlVersion: v1.0 @@ -95,3 +103,67 @@ def test_unicode_in_output_files(tmp_path: Path, filename: str) -> None: filename, ] assert main(params) == 0 + + +def test_clt_returns_specialchar_names(tmp_path: Path) -> None: + """Confirm that special characters in filenames do not cause problems.""" + loading_context = LoadingContext( + { + "metadata": { + "cwlVersion": INTERNAL_VERSION, + "http://commonwl.org/cwltool#original_cwlVersion": INTERNAL_VERSION, + } + } + ) + clt = CommandLineTool( + cast( + CommentedMap, + cmap( + { + "cwlVersion": INTERNAL_VERSION, + "class": "CommandLineTool", + "inputs": [], + "outputs": [], + "requirements": [], + } + ), + ), + loading_context, + ) + + # Reserved characters will be URL encoded during the creation of a file URI + # Internal references to files are in URI form, and are therefore URL encoded + # Final output files should not retain their URL encoded filenames + rfc_3986_gen_delims = [":", "/", "?", "#", "[", "]", "@"] + rfc_3986_sub_delims = ["!", "$", "&", "'", "(", ")", "*", "+", ",", ";", "="] + unix_reserved = ["/", "\0"] + reserved = [ + special_char + for special_char in (rfc_3986_gen_delims + rfc_3986_sub_delims) + if special_char not in unix_reserved + ] + + # Mock an "output" file with the above special characters in its name + special = "".join(reserved) + output_schema = cast( + CWLObjectType, {"type": "File", "outputBinding": {"glob": special}} + ) + mock_output = tmp_path / special + mock_output.touch() + + # Prepare minimal arguments for CommandLineTool.collect_output() + builder = clt._init_job({}, RuntimeContext()) + builder.pathmapper = clt.make_path_mapper( + builder.files, builder.stagedir, RuntimeContext(), True + ) + fs_access = builder.make_fs_access(str(tmp_path)) + + result = cast( + CWLObjectType, + clt.collect_output(output_schema, builder, str(tmp_path), fs_access), + ) + + assert result["class"] == "File" + assert result["basename"] == special + assert result["nameroot"] == special + assert str(result["location"]).endswith(urllib.parse.quote(special))