From 067946db94924989f38f5b5d567bbb265eb51381 Mon Sep 17 00:00:00 2001 From: Ralf Grubenmann Date: Mon, 15 Mar 2021 16:48:03 +0100 Subject: [PATCH 1/5] add unicode regex and fix path encoding --- cwltool/command_line_tool.py | 5 +++-- requirements.txt | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/cwltool/command_line_tool.py b/cwltool/command_line_tool.py index 55478e602..fa4d45e20 100644 --- a/cwltool/command_line_tool.py +++ b/cwltool/command_line_tool.py @@ -11,6 +11,7 @@ import tempfile import threading import urllib +import urllib.parse from functools import cmp_to_key, partial from typing import ( IO, @@ -83,7 +84,7 @@ ACCEPTLIST_EN_STRICT_RE = re.compile(r"^[a-zA-Z0-9._+-]+$") ACCEPTLIST_EN_RELAXED_RE = re.compile(r".*") # Accept anything -ACCEPTLIST_RE = ACCEPTLIST_EN_STRICT_RE +ACCEPTLIST_RE = re.compile("^[\w0-9._+\- \u2600-\u26FF]+$") # accept unicode word characters and emojis DEFAULT_CONTAINER_MSG = """ We are on Microsoft Windows and not all components of this CWL description have a container specified. This means that these steps will be executed in the default container, @@ -913,7 +914,7 @@ def collect_output( { "location": g, "path": fs_access.join( - builder.outdir, g[len(prefix[0]) + 1 :] + builder.outdir, urllib.parse.unquote(g[len(prefix[0]) + 1 :]) ), "basename": os.path.basename(g), "nameroot": os.path.splitext( diff --git a/requirements.txt b/requirements.txt index 37a6ff9d6..004020f32 100755 --- a/requirements.txt +++ b/requirements.txt @@ -6,8 +6,8 @@ schema-salad>=5,<6 typing>=3.5.3; python_version<"3.6" pathlib2 != 2.3.1 prov==1.5.1 -bagit==1.6.4 +bagit==1.7.0 mypy-extensions -psutil +psutil<5.8.0 typing-extensions coloredlogs From 5f99a0a45d4b359b3f151f20a7c2fc46ea88a99e Mon Sep 17 00:00:00 2001 From: Ralf Grubenmann Date: Mon, 15 Mar 2021 17:16:45 +0100 Subject: [PATCH 2/5] address PR comments --- cwltool/command_line_tool.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cwltool/command_line_tool.py b/cwltool/command_line_tool.py index fa4d45e20..8c3a0613e 100644 --- a/cwltool/command_line_tool.py +++ b/cwltool/command_line_tool.py @@ -82,9 +82,9 @@ if TYPE_CHECKING: from .provenance import ProvenanceProfile # pylint: disable=unused-import -ACCEPTLIST_EN_STRICT_RE = re.compile(r"^[a-zA-Z0-9._+-]+$") +ACCEPTLIST_EN_STRICT_RE = re.compile(r"^[\w0-9._+\- \u2600-\u26FF]+$") # accept unicode word characters and emojis ACCEPTLIST_EN_RELAXED_RE = re.compile(r".*") # Accept anything -ACCEPTLIST_RE = re.compile("^[\w0-9._+\- \u2600-\u26FF]+$") # accept unicode word characters and emojis +ACCEPTLIST_RE = ACCEPTLIST_EN_STRICT_RE DEFAULT_CONTAINER_MSG = """ We are on Microsoft Windows and not all components of this CWL description have a container specified. This means that these steps will be executed in the default container, From 8ab13e2b816790543f7bd22421867f8dcc79dbb4 Mon Sep 17 00:00:00 2001 From: Ralf Grubenmann Date: Mon, 15 Mar 2021 17:27:40 +0100 Subject: [PATCH 3/5] fix formatting --- cwltool/command_line_tool.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/cwltool/command_line_tool.py b/cwltool/command_line_tool.py index 8c3a0613e..2cd4f4b26 100644 --- a/cwltool/command_line_tool.py +++ b/cwltool/command_line_tool.py @@ -82,7 +82,9 @@ if TYPE_CHECKING: from .provenance import ProvenanceProfile # pylint: disable=unused-import -ACCEPTLIST_EN_STRICT_RE = re.compile(r"^[\w0-9._+\- \u2600-\u26FF]+$") # accept unicode word characters and emojis +ACCEPTLIST_EN_STRICT_RE = re.compile( + r"^[\w0-9._+\- \u2600-\u26FF]+$" +) # accept unicode word characters and emojis ACCEPTLIST_EN_RELAXED_RE = re.compile(r".*") # Accept anything ACCEPTLIST_RE = ACCEPTLIST_EN_STRICT_RE DEFAULT_CONTAINER_MSG = """ @@ -914,7 +916,10 @@ def collect_output( { "location": g, "path": fs_access.join( - builder.outdir, urllib.parse.unquote(g[len(prefix[0]) + 1 :]) + builder.outdir, + urllib.parse.unquote( + g[len(prefix[0]) + 1 :] + ), ), "basename": os.path.basename(g), "nameroot": os.path.splitext( From 9752193fd23ddbf993abb620948eb5ba4b77efe3 Mon Sep 17 00:00:00 2001 From: Ralf Grubenmann Date: Tue, 16 Mar 2021 10:05:11 +0100 Subject: [PATCH 4/5] add tests --- cwltool/command_line_tool.py | 2 +- tests/test_path_checks.py | 111 ++++++++++++++++++++++++++++++++ tests/test_relax_path_checks.py | 50 -------------- 3 files changed, 112 insertions(+), 51 deletions(-) create mode 100644 tests/test_path_checks.py delete mode 100644 tests/test_relax_path_checks.py diff --git a/cwltool/command_line_tool.py b/cwltool/command_line_tool.py index 2cd4f4b26..ea4e2f98e 100644 --- a/cwltool/command_line_tool.py +++ b/cwltool/command_line_tool.py @@ -83,7 +83,7 @@ from .provenance import ProvenanceProfile # pylint: disable=unused-import ACCEPTLIST_EN_STRICT_RE = re.compile( - r"^[\w0-9._+\- \u2600-\u26FF]+$" + r"^[\w.+\-\u2600-\u26FF\U0001f600-\U0001f64f]+$" ) # accept unicode word characters and emojis ACCEPTLIST_EN_RELAXED_RE = re.compile(r".*") # Accept anything ACCEPTLIST_RE = ACCEPTLIST_EN_STRICT_RE diff --git a/tests/test_path_checks.py b/tests/test_path_checks.py new file mode 100644 index 000000000..e15e060b9 --- /dev/null +++ b/tests/test_path_checks.py @@ -0,0 +1,111 @@ +import os +import pytest +from tempfile import NamedTemporaryFile + +from cwltool.main import main + +from .util import needs_docker + +script = """ +#!/usr/bin/env cwl-runner +cwlVersion: v1.0 +class: CommandLineTool +inputs: + - id: input + type: File + inputBinding: + position: 0 + - id: output + type: string +outputs: + - id: output + type: File + outputBinding: + glob: "$(inputs.output)" +stdout: "$(inputs.output)" +baseCommand: [cat] +""" + + +@needs_docker +def test_spaces_in_input_files(tmpdir): + try: + script_file = NamedTemporaryFile(mode="w", delete=False) + script_file.write(script) + script_file.flush() + script_file.close() + + spaces = NamedTemporaryFile(prefix="test with spaces", delete=False) + spaces.close() + + params = [ + "--debug", + "--outdir", + str(tmpdir), + script_file.name, + "--input", + spaces.name, + "--output", + "test.txt", + ] + assert main(params) == 1 + assert main(["--relax-path-checks"] + params) == 0 + finally: + os.remove(script_file.name) + os.remove(spaces.name) + + +@needs_docker +@pytest.mark.parametrize("filename", ["測試", "그래프", "график", "𒁃", "☕😍", "امتحان"]) +def test_unicode_in_input_files(tmpdir, filename): + try: + script_file = NamedTemporaryFile(mode="w", delete=False) + script_file.write(script) + script_file.flush() + script_file.close() + + inputfile = NamedTemporaryFile(prefix=filename, delete=False) + inputfile.close() + + params = [ + "--debug", + "--outdir", + str(tmpdir), + script_file.name, + "--input", + inputfile.name, + "--output", + "test.txt", + ] + assert main(params) == 0 + finally: + os.remove(script_file.name) + os.remove(inputfile.name) + + +@needs_docker +@pytest.mark.parametrize("filename", ["測試", "그래프", "график", "𒁃", "☕😍", "امتحان"]) +def test_unicode_in_output_files(tmpdir, filename): + try: + script_file = NamedTemporaryFile(mode="w", delete=False) + script_file.write(script) + script_file.flush() + script_file.close() + + inputfile = NamedTemporaryFile(prefix="test", delete=False) + inputfile.close() + + params = [ + "--debug", + "--outdir", + str(tmpdir), + script_file.name, + "--input", + inputfile.name, + "--output", + filename, + ] + assert main(params) == 0 + finally: + os.remove(script_file.name) + os.remove(inputfile.name) diff --git a/tests/test_relax_path_checks.py b/tests/test_relax_path_checks.py deleted file mode 100644 index 1136934a6..000000000 --- a/tests/test_relax_path_checks.py +++ /dev/null @@ -1,50 +0,0 @@ -import os -from tempfile import NamedTemporaryFile - -from cwltool.main import main - -from .util import needs_docker - -script = """ -#!/usr/bin/env cwl-runner -cwlVersion: v1.0 -class: CommandLineTool -inputs: - - id: input - type: File - inputBinding: - position: 0 -outputs: - - id: output - type: File - outputBinding: - glob: test.txt -stdout: test.txt -baseCommand: [cat] -""" - - -@needs_docker -def test_spaces_in_input_files(tmpdir): - try: - script_file = NamedTemporaryFile(mode="w", delete=False) - script_file.write(script) - script_file.flush() - script_file.close() - - spaces = NamedTemporaryFile(prefix="test with spaces", delete=False) - spaces.close() - - params = [ - "--debug", - "--outdir", - str(tmpdir), - script_file.name, - "--input", - spaces.name, - ] - assert main(params) == 1 - assert main(["--relax-path-checks"] + params) == 0 - finally: - os.remove(script_file.name) - os.remove(spaces.name) From f7275b3f7ef5fc592750676b1ceda7271aedf2f7 Mon Sep 17 00:00:00 2001 From: Ralf Grubenmann Date: Tue, 16 Mar 2021 13:34:44 +0100 Subject: [PATCH 5/5] fix formatting --- tests/test_path_checks.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/test_path_checks.py b/tests/test_path_checks.py index 7b88acdd0..1783a10ee 100644 --- a/tests/test_path_checks.py +++ b/tests/test_path_checks.py @@ -50,7 +50,9 @@ def test_spaces_in_input_files(tmp_path: Path) -> None: @needs_docker -@pytest.mark.parametrize("filename", ["測試", "그래프", "график", "𒁃", "☕😍", "امتحان", "abc+DEFGZ.z_12345-"]) +@pytest.mark.parametrize( + "filename", ["測試", "그래프", "график", "𒁃", "☕😍", "امتحان", "abc+DEFGZ.z_12345-"] +) def test_unicode_in_input_files(tmp_path: Path, filename: str) -> None: script_name = tmp_path / "script" inputfile = tmp_path / filename @@ -72,7 +74,9 @@ def test_unicode_in_input_files(tmp_path: Path, filename: str) -> None: @needs_docker -@pytest.mark.parametrize("filename", ["測試", "그래프", "график", "𒁃", "☕😍", "امتحان", "abc+DEFGZ.z_12345-"]) +@pytest.mark.parametrize( + "filename", ["測試", "그래프", "график", "𒁃", "☕😍", "امتحان", "abc+DEFGZ.z_12345-"] +) def test_unicode_in_output_files(tmp_path: Path, filename: str) -> None: script_name = tmp_path / "script" inputfile = tmp_path / "test"