Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 61 additions & 1 deletion test/test_client_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,12 @@
import os
import logging
import subprocess
import sys

from wes_client.util import expand_globs
pkg_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) # noqa
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please put this (lines 9 & 10) into the other test file as well.

sys.path.insert(0, pkg_root) # noqa

from wes_client.util import expand_globs, wf_info

logging.basicConfig(level=logging.INFO)

Expand All @@ -14,6 +18,22 @@ class IntegrationTest(unittest.TestCase):
def setUp(self):
dirname, filename = os.path.split(os.path.abspath(__file__))
self.testdata_dir = dirname + 'data'
self.local = {'cwl': 'file://' + os.path.join(os.getcwd() + '/testdata/md5sum.cwl'),
'wdl': 'file://' + os.path.join(os.getcwd() + '/testdata/md5sum.wdl'),
'py': 'file://' + os.path.join(os.getcwd() + '/test/test_integration.py'),
'unsupported': 'fake.txt'}

self.remote = {
'cwl': 'https://raw.githubusercontent.com/common-workflow-language/workflow-service/master/testdata/md5sum.cwl',
'wdl': 'https://raw.githubusercontent.com/common-workflow-language/workflow-service/master/testdata/md5sum.wdl',
'py': 'https://raw.githubusercontent.com/common-workflow-language/workflow-service/master/test/test_integration.py',
'unsupported': 'gs://topmed_workflow_testing/topmed_aligner/small_test_files_sbg/example_human_known_snp.py',
'unreachable': 'https://fake.py'}

self.expected = {'cwl': ('v1.0', 'CWL'),
'wdl': ('draft-2', 'WDL'),
'py': ('2.7', 'PY'),
'pyWithPrefix': ('2.7', 'PY')}

def tearDown(self):
unittest.TestCase.tearDown(self)
Expand All @@ -34,6 +54,46 @@ def test_expand_globs(self):
glob_files = expand_globs('*')
assert set(files) == glob_files, '\n' + str(set(files)) + '\n' + str(glob_files)

def testSupportedFormatChecking(self):
"""
Check that non-wdl, -python, -cwl files are rejected.

This test is run only on local files to avoid downloading and removing a new file.
"""

for file_format, location in self.local.items():
if file_format != 'unsupported':
# Tests the behavior after receiving supported file types with and without the 'file://' prefix
self.assertEquals(wf_info(location), self.expected[file_format])
self.assertEquals(wf_info(location[7:]), self.expected[file_format])

else:
# Tests behavior after receiving a non supported file type.
with self.assertRaises(TypeError):
wf_info(location)

def testFileLocationChecking(self):
"""
Check that the function rejects unsupported file locations.

This test needs to be run on remote files to test the location checking functionality of wf_info().
"""

for file_format, location in self.remote.items():
if file_format == 'unsupported':
# Tests behavior after receiving a file hosted at an unsupported location.
with self.assertRaises(NotImplementedError):
wf_info(location)

elif file_format == 'unreachable':
# Tests behavior after receiving a non-existent file.
with self.assertRaises(IOError):
wf_info(location)

else:
self.assertEquals(wf_info(location), self.expected[file_format])
self.assertFalse(os.path.isfile(os.path.join(os.getcwd(), 'fetchedFromRemote.' + file_format)))


if __name__ == '__main__':
unittest.main() # run all tests
4 changes: 4 additions & 0 deletions test/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
import signal
import shutil
import logging
import sys

pkg_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) # noqa
sys.path.insert(0, pkg_root) # noqa

from wes_client.util import WESClient

Expand Down
75 changes: 55 additions & 20 deletions wes_client/util.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,69 @@
import os
import json
import schema_salad.ref_resolver
from subprocess32 import check_call, DEVNULL, CalledProcessError
import yaml
import glob
import requests
import urllib
import logging
import schema_salad.ref_resolver

from wes_service.util import visit
from urllib import urlopen


def wf_type(workflow_file):
if workflow_file.lower().endswith('wdl'):
return 'WDL'
elif workflow_file.lower().endswith('cwl'):
return 'CWL'
elif workflow_file.lower().endswith('py'):
return 'PY'
else:
raise ValueError('Unrecognized/unsupported workflow file extension: %s' % workflow_file.lower().split('.')[-1])
def two_seven_compatible(filePath):
"""Determines if a python file is 2.7 compatible by seeing if it compiles in a subprocess"""
try:
check_call(['python2', '-m', 'py_compile', filePath], stderr=DEVNULL)
except CalledProcessError:
raise RuntimeError('Python files must be 2.7 compatible')
return True


def wf_version(workflow_file):
# TODO: Check inside of the file, handling local/http/etc.
if wf_type(workflow_file) == 'PY':
def get_version(extension, workflow_file):
'''Determines the version of a .py, .wdl, or .cwl file.'''
if extension == 'py' and two_seven_compatible(workflow_file):
return '2.7'
# elif wf_type(workflow_file) == 'CWL':
# # only works locally
# return yaml.load(open(workflow_file))['cwlVersion']
elif extension == 'cwl':
return yaml.load(open(workflow_file))['cwlVersion']
else: # Must be a wdl file.
# Borrowed from https://github.com/Sage-Bionetworks/synapse-orchestrator/blob/develop/synorchestrator/util.py#L142
try:
return [l.lstrip('version') for l in workflow_file.splitlines() if 'version' in l.split(' ')][0]
except IndexError:
return 'draft-2'


def wf_info(workflow_path):
"""
Returns the version of the file and the file extension.

Assumes that the file path is to the file directly ie, ends with a valid file extension.Supports checking local
files as well as files at http:// and https:// locations. Files at these remote locations are recreated locally to
enable our approach to version checking, then removed after version is extracted.
"""

supported_formats = ['py', 'wdl', 'cwl']
file_type = workflow_path.lower().split('.')[-1] # Grab the file extension
workflow_path = workflow_path if ':' in workflow_path else 'file://' + workflow_path

if file_type in supported_formats:
if workflow_path.startswith('file://'):
version = get_version(file_type, workflow_path[7:])
elif workflow_path.startswith('https://') or workflow_path.startswith('http://'):
# If file not local go fetch it.
html = urlopen(workflow_path).read()
local_loc = os.path.join(os.getcwd(), 'fetchedFromRemote.' + file_type)
with open(local_loc, 'w') as f:
f.write(html)
version = wf_info('file://' + local_loc)[0] # Don't take the file_type here, found it above.
os.remove(local_loc) # TODO: Find a way to avoid recreating file before version determination.
else:
raise NotImplementedError('Unsupported workflow file location: {}. Must be local or HTTP(S).'.format(workflow_path))
else:
# TODO: actually check the wdl file
return "v1.0"
raise TypeError('Unsupported workflow type: .{}. Must be {}.'.format(file_type, '.py, .cwl, or .wdl'))
return version, file_type.upper()


def build_wes_request(workflow_file, json_path, attachments=None):
Expand All @@ -42,10 +76,11 @@ def build_wes_request(workflow_file, json_path, attachments=None):
"""
workflow_file = "file://" + workflow_file if ":" not in workflow_file else workflow_file
json_path = json_path[7:] if json_path.startswith("file://") else json_path
wf_version, wf_type = wf_info(workflow_file)

parts = [("workflow_params", json.dumps(json.load(open(json_path)))),
("workflow_type", wf_type(workflow_file)),
("workflow_type_version", wf_version(workflow_file))]
("workflow_type", wf_type),
("workflow_type_version", wf_version)]

if workflow_file.startswith("file://"):
parts.append(("workflow_attachment", (os.path.basename(workflow_file[7:]), open(workflow_file[7:], "rb"))))
Expand Down