diff --git a/README.md b/README.md new file mode 100644 index 0000000..a9bef17 --- /dev/null +++ b/README.md @@ -0,0 +1,105 @@ +# Common Workflow Language (CWL) Workflows + +CWL feature extraction workflow for imaging dataset + +## Workflow Steps: + +create a [Conda](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#activating-an-environment) environment using python = ">=3.9,<3.12" + +#### 1. Install polus-plugins. + +- clone a image-tools repository +`git clone https://github.com/camilovelezr/image-tools.git ../` +- cd `image-tools` +- create a new branch +`git checkout -b hd2 remotes/origin/hd2` +- `pip install .` + +#### 2. Install workflow-inference-compiler. +- clone a workflow-inference-compiler repository +`git clone https://github.com/camilovelezr/workflow-inference-compiler.git ../` +- cd `workflow-inference-compiler` +- create a new branch +`git checkout -b hd2 remotes/origin/hd2` +- `pip install -e ".[all]"` + +#### 3. Install image-workflow. +- cd `image-workflows` +- poetry install + +#### Note: +Ensure that the [docker-desktop](https://www.docker.com/products/docker-desktop/) is running in the background. To verify that it's operational, you can use the following command: +`docker run -d -p 80:80 docker/getting-started` +This command will launch the `docker/getting-started container` in detached mode (-d flag), exposing port 80 on your local machine (-p 80:80). It's a simple way to test if Docker Desktop is functioning correctly. + +## Details +This workflow integrates eight distinct plugins, starting from data retrieval from [Broad Bioimage Benchmark Collection](https://bbbc.broadinstitute.org/), renaming files, correcting uneven illumination, segmenting nuclear objects, and culminating in the extraction of features from identified objects + +Below are the specifics of the plugins employed in the workflow +1. [bbbc-download-plugin](https://github.com/saketprem/polus-plugins/tree/bbbc_download/utils/bbbc-download-plugin) +2. [file-renaming-tool](https://github.com/PolusAI/image-tools/tree/master/formats/file-renaming-tool) +3. [ome-converter-tool](https://github.com/PolusAI/image-tools/tree/master/formats/ome-converter-tool) +4. [basic-flatfield-estimation-tool](https://github.com/PolusAI/image-tools/tree/master/regression/basic-flatfield-estimation-tool) +5. [apply-flatfield-tool](https://github.com/PolusAI/image-tools/tree/master/transforms/images/apply-flatfield-tool) +6. [kaggle-nuclei-segmentation](https://github.com/hamshkhawar/image-tools/tree/kaggle-nuclei_seg/segmentation/kaggle-nuclei-segmentation) +7. [polus-ftl-label-plugin](https://github.com/hamshkhawar/image-tools/tree/kaggle-nuclei_seg/transforms/images/polus-ftl-label-plugin) +8. [nyxus-plugin](https://github.com/PolusAI/image-tools/tree/kaggle-nuclei_seg/features/nyxus-plugin) + +## Execute CWL workflows +Three different CWL workflows can be executed for specific datasets +1. segmentation +2. analysis + +During the execution of the segmentation workflow, `1 to 7` plugins will be utilized. However, for executing the analysis workflow, `1 to 8` plugins will be employed. +If a user wishes to execute a workflow for a new dataset, they can utilize a sample YAML file to input parameter values. This YAML file can be saved in the desired subdirectory of the `configuration` folder with the name `dataset.yml` + +If a user opts to run a workflow without background correction, they can set `background_correction` to false. In this case, the workflow will skip steps `4 and 5` + +`python -m polus.image.workflows --name="BBBC001" --workflow=analysis` + +A directory named `outputs` is generated, encompassing CLTs for each plugin, YAML files, and all outputs are stored within the `outdir` directory. +``` +outputs +├── experiment +│ └── cwl_adapters +| experiment.cwl +| experiment.yml +| +└── outdir + └── experiment + ├── step 1 BbbcDownload + │ └── outDir + │ └── bbbc.outDir + │ └── BBBC + │ └── BBBC039 + │ └── raw + │ ├── Ground_Truth + │ │ ├── masks + │ │ └── metadata + │ └── Images + │ └── images + ├── step 2 FileRenaming + │ └── outDir + │ └── rename.outDir + ├── step 3 OmeConverter + │ └── outDir + │ └── ome_converter.outDir + ├── step 4 BasicFlatfieldEstimation + │ └── outDir + │ └── estimate_flatfield.outDir + ├── step 5 ApplyFlatfield + │ └── outDir + │ └── apply_flatfield.outDir + ├── step 6 KaggleNucleiSegmentation + │ └── outDir + │ └── kaggle_nuclei_segmentation.outDir + ├── step 7 FtlLabel + │ └── outDir + │ └── ftl_plugin.outDir + └── step 8 NyxusPlugin + └── outDir + └── nyxus_plugin.outDir + +``` +#### Note: +Step 7 and step 8 are executed only in the case of the `analysis` workflow. \ No newline at end of file diff --git a/configuration/__init__.py b/configuration/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/configuration/analysis/BBBC001.yml b/configuration/analysis/BBBC001.yml new file mode 100644 index 0000000..7efe214 --- /dev/null +++ b/configuration/analysis/BBBC001.yml @@ -0,0 +1,14 @@ +--- +name : BBBC001 +file_pattern : /.*/.*/.*/Images/.*/.*_{row:c}{col:dd}f{f:dd}d{channel:d}.tif +out_file_pattern : x{row:dd}_y{col:dd}_p{f:dd}_c{channel:d}.tif +image_pattern: x{x:dd}_y{y:dd}_p{p:dd}_c{c:d}.ome.tif +seg_pattern: x{x:dd}_y{y:dd}_p{p:dd}_c0.ome.tif +ff_pattern: "x00_y03_p0\\(0-5\\)_c{c:d}_flatfield.ome.tif" +df_pattern: "x00_y03_p0\\(0-5\\)_c{c:d}_darkfield.ome.tif" +group_by: c +map_directory: false +features: ALL +file_extension: pandas +background_correction: false + diff --git a/configuration/analysis/BBBC039.yml b/configuration/analysis/BBBC039.yml new file mode 100644 index 0000000..308a274 --- /dev/null +++ b/configuration/analysis/BBBC039.yml @@ -0,0 +1,13 @@ +--- +name : BBBC039 +file_pattern : /.*/.*/.*/Images/.*/.*_{row:c}{col:dd}_s{s:d}_w{channel:d}.*.tif +out_file_pattern : x{row:dd}_y{col:dd}_p{s:dd}_c{channel:d}.tif +image_pattern: x{x:dd}_y{y:dd}_p{p:dd}_c{c:d}.ome.tif +seg_pattern: x{x:dd}_y{y:dd}_p{p:dd}_c1.ome.tif +ff_pattern: "x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_flatfield.ome.tif" +df_pattern: "x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_darkfield.ome.tif" +group_by: c +map_directory: false +features: "ALL_INTENSITY" +file_extension: pandas +background_correction: false \ No newline at end of file diff --git a/configuration/analysis/__init__.py b/configuration/analysis/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/configuration/analysis/sample.yml b/configuration/analysis/sample.yml new file mode 100644 index 0000000..47ffb02 --- /dev/null +++ b/configuration/analysis/sample.yml @@ -0,0 +1,13 @@ +--- +name : +file_pattern : +out_file_pattern : +image_pattern: +seg_pattern: +ff_pattern: +df_pattern: +group_by: +map_directory: +features: +file_extension: +background_correction: \ No newline at end of file diff --git a/configuration/segmentation/BBBC001.yml b/configuration/segmentation/BBBC001.yml new file mode 100644 index 0000000..4ed7653 --- /dev/null +++ b/configuration/segmentation/BBBC001.yml @@ -0,0 +1,11 @@ +--- +name : BBBC001 +file_pattern : /.*/.*/.*/Images/.*/.*_{row:c}{col:dd}f{f:dd}d{channel:d}.tif +out_file_pattern : x{row:dd}_y{col:dd}_p{f:dd}_c{channel:d}.tif +image_pattern: x{x:dd}_y{y:dd}_p{p:dd}_c{c:d}.ome.tif +seg_pattern: x{x:dd}_y{y:dd}_p{p:dd}_c0.ome.tif +ff_pattern: "x00_y03_p0\\(0-5\\)_c{c:d}_flatfield.ome.tif" +df_pattern: "x00_y03_p0\\(0-5\\)_c{c:d}_darkfield.ome.tif" +group_by: c +map_directory: false +background_correction: false \ No newline at end of file diff --git a/configuration/segmentation/BBBC039.yml b/configuration/segmentation/BBBC039.yml new file mode 100644 index 0000000..1884878 --- /dev/null +++ b/configuration/segmentation/BBBC039.yml @@ -0,0 +1,11 @@ +--- +name : BBBC039 +file_pattern : /.*/.*/.*/Images/.*/.*_{row:c}{col:dd}_s{s:d}_w{channel:d}.*.tif +out_file_pattern : x{row:dd}_y{col:dd}_p{s:dd}_c{channel:d}.tif +image_pattern: x{x:dd}_y{y:dd}_p{p:dd}_c{c:d}.ome.tif +seg_pattern: x{x:dd}_y{y:dd}_p{p:dd}_c1.ome.tif +ff_pattern: "x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_flatfield.ome.tif" +df_pattern: "x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_darkfield.ome.tif" +group_by: c +map_directory: false +background_correction: false \ No newline at end of file diff --git a/configuration/segmentation/__init__.py b/configuration/segmentation/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/configuration/segmentation/sample.yml b/configuration/segmentation/sample.yml new file mode 100644 index 0000000..ecc82e1 --- /dev/null +++ b/configuration/segmentation/sample.yml @@ -0,0 +1,12 @@ +--- +name : +file_pattern : +out_file_pattern : +image_pattern: +seg_pattern: +ff_pattern: +df_pattern: +group_by: +map_directory: +features: +file_extension: \ No newline at end of file diff --git a/cwl_adapters/basic-flatfield-estimation.cwl b/cwl-adapters/basic-flatfield-estimation.cwl similarity index 100% rename from cwl_adapters/basic-flatfield-estimation.cwl rename to cwl-adapters/basic-flatfield-estimation.cwl diff --git a/cwl_adapters/bbbcdownload.cwl b/cwl-adapters/bbbcdownload.cwl similarity index 100% rename from cwl_adapters/bbbcdownload.cwl rename to cwl-adapters/bbbcdownload.cwl diff --git a/cwl_adapters/file-renaming.cwl b/cwl-adapters/file-renaming.cwl similarity index 100% rename from cwl_adapters/file-renaming.cwl rename to cwl-adapters/file-renaming.cwl diff --git a/cwl_adapters/image_assembler.cwl b/cwl-adapters/image_assembler.cwl similarity index 100% rename from cwl_adapters/image_assembler.cwl rename to cwl-adapters/image_assembler.cwl diff --git a/cwl_adapters/montage.cwl b/cwl-adapters/montage.cwl similarity index 100% rename from cwl_adapters/montage.cwl rename to cwl-adapters/montage.cwl diff --git a/cwl_adapters/ome-converter.cwl b/cwl-adapters/ome-converter.cwl similarity index 100% rename from cwl_adapters/ome-converter.cwl rename to cwl-adapters/ome-converter.cwl diff --git a/cwl_adapters/precompute_slide.cwl b/cwl-adapters/precompute_slide.cwl similarity index 100% rename from cwl_adapters/precompute_slide.cwl rename to cwl-adapters/precompute_slide.cwl diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..85287fa --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,38 @@ +[tool.poetry] +name = "polus-image-workflows" +version = "0.1.1-dev1" +description = "Build and execute pipelines of polus plugins on Compute." +authors = ["Hamdah Shafqat Abbasi "] +readme = "README.md" +packages = [{include = "polus", from = "src"}] + +[tool.poetry.dependencies] +python = ">=3.9,<3.12" +typer = "^0.9.0" +pyyaml = "^6.0.1" +pydantic = "^2.6.1" +cwl-utils="0.31" +toil="^5.12" +polus-plugins = {path = "../image-tools", develop = true} +workflow-inference-compiler = {path = "../workflow-inference-compiler", develop = true} + +[tool.poetry.group.dev.dependencies] +jupyter = "^1.0.0" +nbconvert = "^7.11.0" +pytest = "^7.4.4" +bump2version = "^1.0.1" +pre-commit = "^3.3.3" +black = "^23.3.0" +ruff = "^0.0.274" +mypy = "^1.4.0" +pytest-xdist = "^3.3.1" +pytest-sugar = "^0.9.7" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" + +[tool.pytest.ini_options] +addopts = [ + "--import-mode=importlib", +] \ No newline at end of file diff --git a/src/polus/image/workflows/__init__.py b/src/polus/image/workflows/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/polus/image/workflows/__main__.py b/src/polus/image/workflows/__main__.py new file mode 100644 index 0000000..863f1ef --- /dev/null +++ b/src/polus/image/workflows/__main__.py @@ -0,0 +1,65 @@ +"""CWL Workflow.""" +import logging +import typer +from pathlib import Path +from polus.image.workflows.utils import LoadYaml +from workflows.cwl_analysis import CWLAnalysisWorkflow +from workflows.cwl_nuclear_segmentation import CWLSegmentationWorkflow +from pathlib import Path + + +app = typer.Typer() + +# Initialize the logger +logging.basicConfig( + format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", +) +logger = logging.getLogger("WIC Python API") +logger.setLevel(logging.INFO) + + +@app.command() +def main( + name: str = typer.Option( + ..., + "--name", + "-n", + help="Name of imaging dataset of Broad Bioimage Benchmark Collection (https://bbbc.broadinstitute.org/image_sets)" + ), + workflow: str = typer.Option( + ..., + "--workflow", + "-w", + help="Name of cwl workflow" + ) +) -> None: + + """Execute CWL Workflow.""" + + logger.info(f"name = {name}") + logger.info(f"workflow = {workflow}") + + config_path = Path(__file__).parent.parent.parent.parent.parent.joinpath(f"configuration/{workflow}/{name}.yml") + print(config_path) + + + model = LoadYaml(workflow=workflow, config_path=config_path) + params = model.parse_yaml() + + if workflow == "analysis": + logger.info(f"Executing {workflow}!!!") + model = CWLAnalysisWorkflow(**params) + model.workflow() + + if workflow == "segmentation": + logger.info(f"Executing {workflow}!!!") + model = CWLSegmentationWorkflow(**params) + model.workflow() + + + logger.info("Completed CWL workflow!!!") + + +if __name__ == "__main__": + app() \ No newline at end of file diff --git a/src/polus/image/workflows/utils.py b/src/polus/image/workflows/utils.py new file mode 100644 index 0000000..7daa9b7 --- /dev/null +++ b/src/polus/image/workflows/utils.py @@ -0,0 +1,68 @@ +import pydantic +from pathlib import Path +from typing import Dict +from typing import Union +import yaml + + +GITHUB_TAG = "https://raw.githubusercontent.com" + + +ANALYSIS_KEYS = ["name", "file_pattern", "out_file_pattern", "image_pattern", "seg_pattern", "ff_pattern", "df_pattern", "group_by", "map_directory", "features", "file_extension", "background_correction"] +SEG_KEYS = ["name", "file_pattern", "out_file_pattern", "image_pattern", "seg_pattern", "ff_pattern", "df_pattern", "group_by", "map_directory", "background_correction"] + + +class DataModel(pydantic.BaseModel): + data: Dict[str, Dict[str, Union[str, bool]]] + + +class LoadYaml(pydantic.BaseModel): + """Validation of Dataset yaml.""" + workflow:str + config_path: Union[str, Path] + + @pydantic.validator("config_path", pre=True) + @classmethod + def validate_path(cls, value: Union[str, Path]) -> Union[str, Path]: + """Validation of Paths.""" + if not Path(value).exists(): + msg = f"{value} does not exist! Please do check it again" + raise ValueError(msg) + if isinstance(value, str): + return Path(value) + return value + + @pydantic.validator("workflow", pre=True) + @classmethod + def validate_workflow_name(cls, value: str) -> str: + """Validation of workflow name.""" + if not value in ["analysis", "segmentation", "visualization"]: + msg = f"Please choose a valid workflow name i-e analysis segmentation visualization" + raise ValueError(msg) + return value + + def parse_yaml(self) -> Dict[str, Union[str, bool]]: + """Parsing yaml configuration file for each dataset.""" + + with open(f'{self.config_path}','r') as f: + data = yaml.safe_load(f) + + check_values = any([v for _, v in data.items() if f is None]) + + if check_values is True: + msg = f"All the parameters are not defined! Please do check it again" + raise ValueError(msg) + + + if self.workflow == "analysis": + if data['background_correction'] == True: + if list(data.keys()) != ANALYSIS_KEYS: + msg = f"Please do check parameters again for analysis workflow!!" + raise ValueError(msg) + + if self.workflow == "segmentation": + if data['background_correction'] == True: + if list(data.keys()) != SEG_KEYS: + msg = f"Please do check parameters again for segmentation workflow!!" + raise ValueError(msg) + return data diff --git a/workflows/__init__.py b/workflows/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/workflows/cwl_analysis.py b/workflows/cwl_analysis.py new file mode 100644 index 0000000..345c0af --- /dev/null +++ b/workflows/cwl_analysis.py @@ -0,0 +1,289 @@ +import wic.api.pythonapi as api +import polus.plugins as pp +from pathlib import Path +import yaml +import logging +import re +import shutil +import typing +import sys +sys.path.append('../') +from polus.image.workflows.utils import GITHUB_TAG + +# Initialize the logger +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +class CWLAnalysisWorkflow: + """ + A CWL feature extraction or Analysis pipeline. + + Attributes: + name : Name of imaging dataset of Broad Bioimage Benchmark Collection (https://bbbc.broadinstitute.org/image_sets). + file_pattern : Pattern for parsing raw filenames. + out_file_pattern : Preferred format for filenames + image_pattern : Pattern for parsing intensity image filenames after renaming when using map_directory + seg_pattern : Pattern use to parse segmentation image filenames + map_directory : Mapping of folder name + ff_pattern: The filename pattern employed to select flatfield components from the ffDir. + df_pattern:The filename pattern employed to select darkfield components from the ffDir + group_by: Grouping variables for filePattern + features:Features from Nyxus (https://github.com/PolusAI/nyxus/) that need extraction + file_extension: Output file format + background_correction: Execute background correction + """ + def __init__( + self, + name: str, + file_pattern: str, + out_file_pattern: str, + image_pattern: str, + seg_pattern: str, + ff_pattern: typing.Optional[str] = '', + df_pattern: typing.Optional[str] = '', + group_by: typing.Optional[str] = '', + map_directory: typing.Optional[bool] = False, + features: typing.Optional[str]="ALL", + file_extension: typing.Optional[str]="arrowipc", + background_correction: typing.Optional[bool] = False + + ): + self.name = name + self.file_pattern = file_pattern + self.out_file_pattern = out_file_pattern + self.ff_pattern = ff_pattern + self.df_pattern = df_pattern + self.group_by = group_by + self.wic_path = api._WIC_PATH + self.PATH = Path(self.wic_path.parent).joinpath("image-workflows") + self.cwl_path, self.workflow_path = self._create_directories() + self.image_pattern = image_pattern + self.seg_pattern = seg_pattern + self.features = features + self.file_extension = file_extension + self.map_directory = map_directory + self.background_correction = background_correction + + def _create_directories(self) -> None: + """Create directories for CWL outputs""" + cwl_path = self.PATH.joinpath("cwl_adapters") + cwl_path.mkdir(parents=True, exist_ok=True) + workflow_path = self.PATH.joinpath("outputs").resolve() + workflow_path.mkdir(exist_ok=True) + return cwl_path, workflow_path + + def _clean(self) -> None: + """Cleaning of redundant directories generating on running CWL""" + logger.info("Cleaning directories!!!") + destination_path = self.workflow_path.joinpath("experiment") + dir_names = ("autogenerated", "cachedir", "RUNS", "provenance") + for i, d in zip(self.wic_path.iterdir(), self.PATH.iterdir()): + if i.name.endswith(dir_names): + shutil.rmtree(d) + if d.name.endswith(dir_names): + shutil.rmtree(d) + + for d in destination_path.iterdir(): + if d.name.endswith("cwl_adapters"): + shutil.rmtree(d) + for d in self.PATH.iterdir(): + if d.name.endswith("cwl_adapters"): + shutil.move(d, destination_path) + + return + + def _move_outputs(self) -> None: + """Transfer outputs from the WIC directory to the workflow path""" + logger.info("Move outputs to workflow path!!!") + for d in self.wic_path.iterdir(): + if d.name.endswith("outdir"): + shutil.move(d, self.workflow_path) + return + + def _camel(self, name: str) -> str: + """Convert plugin name to camel case.""" + name = re.sub(r"(_|-)+", " ", name).title().replace(" ", "") + return "".join([name[0].upper(), name[1:]]) + + def _string_after_period(self, x): + """Get a string after period.""" + match = re.search(r"\.(.*)", x) + if match: + # Get the part after the period + return f".*.{match.group(1)}" + else: + return "" + + def _add_backslash_before_parentheses(self, x): + """Add backslash to generate ff_pattern and df_pattern""" + # Define the regular expression pattern to match parenthesis + pattern_1 = r"(\()|(\))" + # Use re.sub() to add a backslash before starting and finishing parenthesis + result = re.sub(pattern_1, r"\\\1\2", x) + pattern_2 = r"\d" + result = ( + result.split("_c")[0] + + "_c{c:d}" + + re.sub(pattern_2, "", result.split("_c")[1]) + ) + return result + + def create_step(self, url: str) -> api.Step: + """Generate the plugin class name from the plugin name specified in the manifest""" + manifest = pp.submit_plugin(url) + plugin_version = str(manifest.version) + cwl_tool = pp.get_plugin(self._camel(manifest.name), plugin_version).save_cwl( + self.cwl_path.joinpath(f"{self._camel(manifest.name)}.cwl") + ) + step = api.Step(cwl_tool) + return step + + def manifest_urls(self, x: str) -> str: + """URLs on GitHub for plugin manifests""" + + urls = { + "bbbc_download": f"{GITHUB_TAG}/saketprem/polus-plugins/bbbc_download/utils/bbbc-download-plugin/plugin.json", + "file_renaming": f"{GITHUB_TAG}/hamshkhawar/image-tools/filepattern_filerenaming/formats/file-renaming-tool/plugin.json", + "ome_converter": f"{GITHUB_TAG}/hamshkhawar/image-tools/basecontainer_omecontainer/formats/ome-converter-plugin/plugin.json", + "estimate_flatfield": f"{GITHUB_TAG}/nishaq503/image-tools/fix/basic/regression/basic-flatfield-estimation-tool/plugin.json", + "apply_flatfield": f"{GITHUB_TAG}/hamshkhawar/image-tools/cast_images/transforms/images/apply-flatfield-tool/plugin.json", + "kaggle_nuclei_segmentation": f"{GITHUB_TAG}/hamshkhawar/image-tools/kaggle-nucleiseg/segmentation/kaggle-nuclei-segmentation-tool/plugin.json", + "ftl_plugin": f"{GITHUB_TAG}/nishaq503/image-tools/fix/ftl-label/transforms/images/polus-ftl-label-plugin/plugin.json", + "nyxus_plugin": f"{GITHUB_TAG}/hamshkhawar/image-tools/nyxus_manifest/features/nyxus-plugin/plugin.json", + } + return urls[x] + + def modify_cwl(self) -> None: + """Modify CWL to incorporate environmental variables and permission access""" + for f in list(self.cwl_path.rglob("*.cwl")): + if "cwl" in f.name: + try: + with Path.open(f, "r") as file: + config = yaml.safe_load(file) + config["requirements"]["NetworkAccess"] = { + "networkAccess": True + } + config["requirements"]["EnvVarRequirement"] = { + "envDef": {"HOME": "/home/polusai"} + } + with open(f, "w") as out_file: + yaml.dump(config, out_file) + except FileNotFoundError: + logger.info("Error: There was an unexpected error while processing the file.") + return + + def workflow(self) -> None: + """ + A CWL feature extraction pipeline. + """ + # BBBCDownload + bbbc = self.create_step(self.manifest_urls("bbbc_download")) + bbbc.name = self.name + bbbc.outDir = Path("bbbc.outDir") + + # Renaming plugin + rename = self.create_step(self.manifest_urls("file_renaming")) + rename.filePattern = self.file_pattern + rename.outFilePattern = self.out_file_pattern + rename.mapDirectory = self.map_directory + rename.inpDir = bbbc.outDir + rename.outDir = Path("rename.outDir") + + # OMEConverter + ome_converter = self.create_step(self.manifest_urls("ome_converter")) + ome_converter.filePattern = self._string_after_period(self.out_file_pattern) + ome_converter.fileExtension = ".ome.tif" + ome_converter.inpDir = rename.outDir + ome_converter.outDir = Path("ome_converter.outDir") + + if self.background_correction: + # Estimate Flatfield + estimate_flatfield = self.create_step(self.manifest_urls("estimate_flatfield")) + estimate_flatfield.inpDir = ome_converter.outDir + estimate_flatfield.filePattern = self.image_pattern + estimate_flatfield.groupBy = self.group_by + estimate_flatfield.getDarkfield = True + estimate_flatfield.outDir = Path("estimate_flatfield.outDir") + + # # Apply Flatfield + apply_flatfield = self.create_step(self.manifest_urls("apply_flatfield")) + apply_flatfield.imgDir = ome_converter.outDir + apply_flatfield.imgPattern = self.image_pattern + apply_flatfield.ffDir = estimate_flatfield.outDir + apply_flatfield.ffPattern = self.ff_pattern + apply_flatfield.dfPattern = self.df_pattern + apply_flatfield.outDir = Path("apply_flatfield.outDir") + apply_flatfield.dataType = True + + ## Kaggle Nuclei Segmentation + kaggle_nuclei_segmentation = self.create_step( + self.manifest_urls("kaggle_nuclei_segmentation") + ) + if self.background_correction: + kaggle_nuclei_segmentation.inpDir = apply_flatfield.outDir + else: + kaggle_nuclei_segmentation.inpDir = ome_converter.outDir + kaggle_nuclei_segmentation.filePattern = self.image_pattern + kaggle_nuclei_segmentation.outDir = Path("kaggle_nuclei_segmentation.outDir") + + ## FTL Label Plugin + ftl_plugin = self.create_step(self.manifest_urls("ftl_plugin")) + ftl_plugin.inpDir = kaggle_nuclei_segmentation.outDir + ftl_plugin.connectivity = 1 + ftl_plugin.binarizationThreshold = 0.5 + ftl_plugin.outDir = Path("ftl_plugin.outDir") + + # # ## Nyxus Plugin + nyxus_plugin = self.create_step(self.manifest_urls("nyxus_plugin")) + if self.background_correction: + nyxus_plugin.inpDir = apply_flatfield.outDir + else: + nyxus_plugin.inpDir = ome_converter.outDir + nyxus_plugin.segDir = ftl_plugin.outDir + nyxus_plugin.intPattern = self.image_pattern + nyxus_plugin.segPattern = self.seg_pattern + nyxus_plugin.features = self.features + nyxus_plugin.fileExtension = self.file_extension + nyxus_plugin.neighborDist = 5 + nyxus_plugin.pixelPerMicron = 1.0 + nyxus_plugin.outDir = Path("nyxus_plugin.outDir") + + logger.info("Initiating CWL Feature Extraction Workflow!!!") + if self.background_correction: + steps = [ + bbbc, + rename, + ome_converter, + estimate_flatfield, + apply_flatfield, + kaggle_nuclei_segmentation, + ftl_plugin, + nyxus_plugin + ] + else: + steps = [ + bbbc, + rename, + ome_converter, + kaggle_nuclei_segmentation, + ftl_plugin, + nyxus_plugin + ] + + workflow = api.Workflow(steps, "experiment", self.workflow_path) + # # Saving CLT for plugins + workflow._save_all_cwl(overwrite=True) + # # Adding environmental variables for bbbc_download and ome_converter plugin + self.modify_cwl() + # # # Save yaml to run CWL tool + workflow._save_yaml() + # Compile and run using WIC python API + workflow.compile(run_local=True, overwrite=False) + # # print(workflow.yml_path) + # # clean autognerated directories + self._clean() + self._move_outputs() + logger.info("Completed CWL Feature Extraction /Analysis Workflow.") + return + \ No newline at end of file diff --git a/workflows/cwl_nuclear_segmentation.py b/workflows/cwl_nuclear_segmentation.py new file mode 100644 index 0000000..d7d264b --- /dev/null +++ b/workflows/cwl_nuclear_segmentation.py @@ -0,0 +1,262 @@ +import wic.api.pythonapi as api +import polus.plugins as pp +from pathlib import Path +import yaml +import logging +import typing +import re +import shutil +import sys +sys.path.append('../') +from polus.image.workflows.utils import GITHUB_TAG + +# Initialize the logger +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +class CWLSegmentationWorkflow: + """ + A CWL Nuclear Segmentation pipeline. + + Attributes: + name : Name of imaging dataset of Broad Bioimage Benchmark Collection (https://bbbc.broadinstitute.org/image_sets). + file_pattern : Pattern for parsing raw filenames. + out_file_pattern : Preferred format for filenames + image_pattern : Pattern for parsing intensity image filenames after renaming when using map_directory + seg_pattern : Pattern use to parse segmentation image filenames + map_directory : Mapping of folder name + ff_pattern: The filename pattern employed to select flatfield components from the ffDir. + df_pattern:The filename pattern employed to select darkfield components from the ffDir + group_by: Grouping variables for filePattern + background_correction: Execute background correction + """ + def __init__( + self, + name: str, + file_pattern: str, + out_file_pattern: str, + image_pattern: str, + seg_pattern: str, + ff_pattern: typing.Optional[str] = '', + df_pattern: typing.Optional[str] = '', + group_by: typing.Optional[str] = '', + map_directory: typing.Optional[bool] = False, + background_correction: typing.Optional[bool] = False, + ): + self.name = name + self.file_pattern = file_pattern + self.out_file_pattern = out_file_pattern + self.map_directory = map_directory + self.ff_pattern = ff_pattern + self.df_pattern = df_pattern + self.group_by = group_by + self.wic_path = api._WIC_PATH + self.PATH = Path(self.wic_path.parent).joinpath("image-workflows") + self.cwl_path, self.workflow_path = self._create_directories() + self.image_pattern = image_pattern + self.seg_pattern = seg_pattern + self.background_correction = background_correction + + def _create_directories(self) -> None: + """Create directories for CWL outputs""" + cwl_path = self.PATH.joinpath("cwl_adapters") + cwl_path.mkdir(parents=True, exist_ok=True) + workflow_path = self.PATH.joinpath("outputs").resolve() + workflow_path.mkdir(exist_ok=True) + return cwl_path, workflow_path + + def _clean(self) -> None: + """Cleaning of redundant directories generating on running CWL""" + logger.info("Cleaning directories!!!") + destination_path = self.workflow_path.joinpath("experiment") + dir_names = ("autogenerated", "cachedir", "RUNS", "provenance", "cwl_adapters") + dir_list = [w for w in self.wic_path.iterdir() if w.is_dir() if w.name in dir_names] + for d in dir_list: + shutil.rmtree(d) + for d in destination_path.iterdir(): + if d.name.endswith("cwl_adapters"): + shutil.rmtree(d) + for d in self.PATH.iterdir(): + if d.name.endswith("cwl_adapters"): + shutil.move(d, destination_path) + + return + + def _move_outputs(self) -> None: + """Transfer outputs from the WIC directory to the workflow path""" + logger.info("Move outputs to workflow path!!!") + for d in self.wic_path.iterdir(): + if d.name.endswith("outdir"): + shutil.move(d, self.workflow_path) + return + + def _camel(self, name: str) -> str: + """Convert plugin name to camel case.""" + name = re.sub(r"(_|-)+", " ", name).title().replace(" ", "") + return "".join([name[0].upper(), name[1:]]) + + def _string_after_period(self, x): + """Get a string after period.""" + match = re.search(r"\.(.*)", x) + if match: + # Get the part after the period + return f".*.{match.group(1)}" + else: + return "" + + def _add_backslash_before_parentheses(self, x): + """Add backslash to generate ff_pattern and df_pattern""" + # Define the regular expression pattern to match parenthesis + pattern_1 = r"(\()|(\))" + # Use re.sub() to add a backslash before starting and finishing parenthesis + result = re.sub(pattern_1, r"\\\1\2", x) + pattern_2 = r"\d" + result = ( + result.split("_c")[0] + + "_c{c:d}" + + re.sub(pattern_2, "", result.split("_c")[1]) + ) + return result + + def create_step(self, url: str) -> api.Step: + """Generate the plugin class name from the plugin name specified in the manifest""" + manifest = pp.submit_plugin(url) + plugin_version = str(manifest.version) + cwl_tool = pp.get_plugin(self._camel(manifest.name), plugin_version).save_cwl( + self.cwl_path.joinpath(f"{self._camel(manifest.name)}.cwl") + ) + step = api.Step(cwl_tool) + return step + + def manifest_urls(self, x: str) -> str: + """URLs on GitHub for plugin manifests""" + urls = { + "bbbc_download": f"{GITHUB_TAG}/saketprem/polus-plugins/bbbc_download/utils/bbbc-download-plugin/plugin.json", + "file_renaming": f"{GITHUB_TAG}/hamshkhawar/image-tools/filepattern_filerenaming/formats/file-renaming-tool/plugin.json", + "ome_converter": f"{GITHUB_TAG}/hamshkhawar/image-tools/basecontainer_omecontainer/formats/ome-converter-plugin/plugin.json", + "estimate_flatfield": f"{GITHUB_TAG}/nishaq503/image-tools/fix/basic/regression/basic-flatfield-estimation-tool/plugin.json", + "apply_flatfield": f"{GITHUB_TAG}/hamshkhawar/image-tools/cast_images/transforms/images/apply-flatfield-tool/plugin.json", + "kaggle_nuclei_segmentation": f"{GITHUB_TAG}/hamshkhawar/image-tools/kaggle-nucleiseg/segmentation/kaggle-nuclei-segmentation-tool/plugin.json", + "ftl_plugin": f"{GITHUB_TAG}/nishaq503/image-tools/fix/ftl-label/transforms/images/polus-ftl-label-plugin/plugin.json" + } + return urls[x] + + def modify_cwl(self) -> None: + """Modify CWL to incorporate environmental variables and permission access""" + for f in list(self.cwl_path.rglob("*.cwl")): + if "cwl" in f.name: + try: + with Path.open(f, "r") as file: + config = yaml.safe_load(file) + config["requirements"]["NetworkAccess"] = { + "networkAccess": True + } + config["requirements"]["EnvVarRequirement"] = { + "envDef": {"HOME": "/home/polusai"} + } + with open(f, "w") as out_file: + yaml.dump(config, out_file) + except FileNotFoundError: + logger.info("Error: There was an unexpected error while processing the file.") + return + + def workflow(self) -> None: + """ + A CWL nuclear segmentation pipeline. + """ + # BBBCDownload + bbbc = self.create_step(self.manifest_urls("bbbc_download")) + bbbc.name = self.name + bbbc.outDir = Path("bbbc.outDir") + + # Renaming plugin + rename = self.create_step(self.manifest_urls("file_renaming")) + rename.filePattern = self.file_pattern + rename.outFilePattern = self.out_file_pattern + rename.mapDirectory = self.map_directory + rename.inpDir = bbbc.outDir + rename.outDir = Path("rename.outDir") + + + # OMEConverter + ome_converter = self.create_step(self.manifest_urls("ome_converter")) + ome_converter.filePattern = self._string_after_period(self.out_file_pattern) + ome_converter.fileExtension = ".ome.tif" + ome_converter.inpDir = rename.outDir + ome_converter.outDir = Path("ome_converter.outDir") + + if self.background_correction: + # Estimate Flatfield + estimate_flatfield = self.create_step(self.manifest_urls("estimate_flatfield")) + estimate_flatfield.inpDir = ome_converter.outDir + estimate_flatfield.filePattern = self.image_pattern + estimate_flatfield.groupBy = self.group_by + estimate_flatfield.getDarkfield = True + estimate_flatfield.outDir = Path("estimate_flatfield.outDir") + + # # Apply Flatfield + apply_flatfield = self.create_step(self.manifest_urls("apply_flatfield")) + apply_flatfield.imgDir = ome_converter.outDir + apply_flatfield.imgPattern = self.image_pattern + apply_flatfield.ffDir = estimate_flatfield.outDir + apply_flatfield.ffPattern = self.ff_pattern + apply_flatfield.dfPattern = self.df_pattern + apply_flatfield.outDir = Path("apply_flatfield.outDir") + apply_flatfield.dataType = True + + ## Kaggle Nuclei Segmentation + kaggle_nuclei_segmentation = self.create_step( + self.manifest_urls("kaggle_nuclei_segmentation") + ) + if self.background_correction: + kaggle_nuclei_segmentation.inpDir = apply_flatfield.outDir + else: + kaggle_nuclei_segmentation.inpDir = ome_converter.outDir + + kaggle_nuclei_segmentation.filePattern = self.image_pattern + kaggle_nuclei_segmentation.outDir = Path("kaggle_nuclei_segmentation.outDir") + + ## FTL Label Plugin + ftl_plugin = self.create_step(self.manifest_urls("ftl_plugin")) + ftl_plugin.inpDir = kaggle_nuclei_segmentation.outDir + ftl_plugin.connectivity = 1 + ftl_plugin.binarizationThreshold = 0.5 + ftl_plugin.outDir = Path("ftl_plugin.outDir") + + logger.info("Initiating CWL Nuclear Segmentation Workflow!!!") + if self.background_correction: + steps = [ + bbbc, + rename, + ome_converter, + estimate_flatfield, + apply_flatfield, + kaggle_nuclei_segmentation, + ftl_plugin + ] + else: + steps = [ + bbbc, + rename, + ome_converter, + kaggle_nuclei_segmentation, + ftl_plugin] + + + + workflow = api.Workflow(steps, "experiment", self.workflow_path) + # # Saving CLT for plugins + workflow._save_all_cwl(overwrite=True) + # # Adding environmental variables for bbbc_download and ome_converter plugin + self.modify_cwl() + # # # Save yaml to run CWL tool + workflow._save_yaml() + # Compile and run using WIC python API + workflow.compile(run_local=True, overwrite=False) + # # print(workflow.yml_path) + # # clean autognerated directories + self._clean() + self._move_outputs() + logger.info("Completed CWL nuclear segmentation workflow.") + return \ No newline at end of file