diff --git a/Makefile b/Makefile index 7723ba4..fe93195 100644 --- a/Makefile +++ b/Makefile @@ -145,11 +145,11 @@ diff-cover.html: coverage.xml ## test : run the cwl-utils test suite test: $(PYSOURCES) - python -m pytest -rsx ${PYTEST_EXTRA} + python -m pytest ${PYTEST_EXTRA} ## testcov : run the cwl-utils test suite and collect coverage testcov: $(PYSOURCES) - pytest --cov ${PYTEST_EXTRA} + python -m pytest --cov ${PYTEST_EXTRA} sloccount.sc: $(PYSOURCES) Makefile sloccount --duplicates --wide --details $^ > $@ diff --git a/cwl_utils/graph_split.py b/cwl_utils/graph_split.py index c3bfbc1..3a1b346 100755 --- a/cwl_utils/graph_split.py +++ b/cwl_utils/graph_split.py @@ -10,19 +10,32 @@ import argparse import json +import logging import os +import re import sys from collections.abc import MutableMapping -from typing import IO, TYPE_CHECKING, Any, Union, cast +from io import TextIOWrapper +from pathlib import Path +from typing import ( + IO, + Any, + Union, + cast, +) from cwlformat.formatter import stringify_dict -from ruamel.yaml.dumper import RoundTripDumper -from ruamel.yaml.main import YAML, dump +from ruamel.yaml.main import YAML from ruamel.yaml.representer import RoundTripRepresenter from schema_salad.sourceline import SourceLine, add_lc_filename -if TYPE_CHECKING: - from _typeshed import StrPath +from cwl_utils.loghandler import _logger as _cwlutilslogger + +_logger = logging.getLogger("cwl-graph-split") # pylint: disable=invalid-name +defaultStreamHandler = logging.StreamHandler() # pylint: disable=invalid-name +_logger.addHandler(defaultStreamHandler) +_logger.setLevel(logging.INFO) +_cwlutilslogger.setLevel(100) def arg_parser() -> argparse.ArgumentParser: @@ -73,7 +86,7 @@ def run(args: list[str]) -> int: with open(options.cwlfile) as source_handle: graph_split( source_handle, - options.outdir, + Path(options.outdir), options.output_format, options.mainfile, options.pretty, @@ -83,7 +96,7 @@ def run(args: list[str]) -> int: def graph_split( sourceIO: IO[str], - output_dir: "StrPath", + output_dir: Path, output_format: str, mainfile: str, pretty: bool, @@ -100,6 +113,13 @@ def graph_split( version = source.pop("cwlVersion") + # Check outdir parent exists + if not output_dir.parent.is_dir(): + raise NotADirectoryError(f"Parent directory of {output_dir} does not exist") + # If output_dir is not a directory, create it + if not output_dir.is_dir(): + output_dir.mkdir() + def my_represent_none( self: Any, data: Any ) -> Any: # pylint: disable=unused-argument @@ -111,7 +131,7 @@ def my_represent_none( for entry in source["$graph"]: entry_id = entry.pop("id").lstrip("#") entry["cwlVersion"] = version - imports = rewrite(entry, entry_id) + imports = rewrite(entry, entry_id, output_dir) if imports: for import_name in imports: rewrite_types(entry, f"#{import_name}", False) @@ -121,25 +141,28 @@ def my_represent_none( else: entry_id = mainfile - output_file = os.path.join(output_dir, entry_id + ".cwl") + output_file = output_dir / (re.sub(".cwl$", "", entry_id) + ".cwl") if output_format == "json": json_dump(entry, output_file) elif output_format == "yaml": - yaml_dump(entry, output_file, pretty) + with output_file.open("w", encoding="utf-8") as output_handle: + yaml_dump(entry, output_handle, pretty) -def rewrite(document: Any, doc_id: str) -> set[str]: +def rewrite( + document: Any, doc_id: str, output_dir: Path, pretty: bool = False +) -> set[str]: """Rewrite the given element from the CWL $graph.""" imports = set() if isinstance(document, list) and not isinstance(document, str): for entry in document: - imports.update(rewrite(entry, doc_id)) + imports.update(rewrite(entry, doc_id, output_dir, pretty)) elif isinstance(document, dict): this_id = document["id"] if "id" in document else None for key, value in document.items(): with SourceLine(document, key, Exception): if key == "run" and isinstance(value, str) and value[0] == "#": - document[key] = f"{value[1:]}.cwl" + document[key] = f"{re.sub('.cwl$', '', value[1:])}.cwl" elif key in ("id", "outputSource") and value.startswith("#" + doc_id): document[key] = value[len(doc_id) + 2 :] elif key == "out" and isinstance(value, list): @@ -179,15 +202,15 @@ def rewrite_id(entry: Any) -> Union[MutableMapping[Any, Any], str]: elif key == "$import": rewrite_import(document) elif key == "class" and value == "SchemaDefRequirement": - return rewrite_schemadef(document) + return rewrite_schemadef(document, output_dir, pretty) else: - imports.update(rewrite(value, doc_id)) + imports.update(rewrite(value, doc_id, output_dir, pretty)) return imports def rewrite_import(document: MutableMapping[str, Any]) -> None: """Adjust the $import directive.""" - external_file = document["$import"].split("/")[0][1:] + external_file = document["$import"].split("/")[0].lstrip("#") document["$import"] = external_file @@ -215,22 +238,25 @@ def rewrite_types(field: Any, entry_file: str, sameself: bool) -> None: rewrite_types(entry, entry_file, sameself) -def rewrite_schemadef(document: MutableMapping[str, Any]) -> set[str]: +def rewrite_schemadef( + document: MutableMapping[str, Any], output_dir: Path, pretty: bool = False +) -> set[str]: """Dump the schemadefs to their own file.""" for entry in document["types"]: if "$import" in entry: rewrite_import(entry) elif "name" in entry and "/" in entry["name"]: - entry_file, entry["name"] = entry["name"].split("/") - for field in entry["fields"]: + entry_file, entry["name"] = entry["name"].lstrip("#").split("/") + for field in entry.get("fields", []): field["name"] = field["name"].split("/")[2] rewrite_types(field, entry_file, True) - with open(entry_file[1:], "a", encoding="utf-8") as entry_handle: - dump([entry], entry_handle, Dumper=RoundTripDumper) - entry["$import"] = entry_file[1:] + with (output_dir / entry_file).open("a", encoding="utf-8") as entry_handle: + yaml_dump(entry, entry_handle, pretty) + entry["$import"] = entry_file del entry["name"] del entry["type"] - del entry["fields"] + if "fields" in entry: + del entry["fields"] seen_imports = set() def seen_import(entry: MutableMapping[str, Any]) -> bool: @@ -247,26 +273,26 @@ def seen_import(entry: MutableMapping[str, Any]) -> bool: return seen_imports -def json_dump(entry: Any, output_file: str) -> None: +def json_dump(entry: Any, output_file: Path) -> None: """Output object as JSON.""" - with open(output_file, "w", encoding="utf-8") as result_handle: + with output_file.open("w", encoding="utf-8") as result_handle: json.dump(entry, result_handle, indent=4) -def yaml_dump(entry: Any, output_file: str, pretty: bool) -> None: +def yaml_dump( + entry: Any, + output_handle: TextIOWrapper, + pretty: bool, +) -> None: """Output object as YAML.""" - yaml = YAML(typ="rt") + if pretty: + output_handle.write(stringify_dict(entry)) + return + yaml = YAML(typ="rt", pure=True) yaml.default_flow_style = False - yaml.map_indent = 4 - yaml.sequence_indent = 2 - with open(output_file, "w", encoding="utf-8") as result_handle: - if pretty: - result_handle.write(stringify_dict(entry)) - else: - yaml.dump( - entry, - result_handle, - ) + yaml.indent = 4 + yaml.block_seq_indent = 2 + yaml.dump(entry, output_handle) if __name__ == "__main__": diff --git a/pyproject.toml b/pyproject.toml index bba79a6..4137e3c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,6 +60,7 @@ test = "pytest" [tool.pytest.ini_options] testpaths = ["tests"] +addopts = "-rsx -n auto" [tool.setuptools] packages = [ diff --git a/requirements.txt b/requirements.txt index 604f77e..7f943b1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ requests schema-salad >= 8.8.20250205075315,<9 ruamel.yaml >= 0.17.6, < 0.19 typing_extensions;python_version<'3.10' +cwlformat >= 2022.2.18 diff --git a/test-requirements.txt b/test-requirements.txt index 508e795..6ca7d9a 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,6 +1,8 @@ pytest < 9 pytest-cov -pytest-xdist +pytest-xdist[psutil] cwlformat pytest-mock >= 1.10.0 jsonschema >= 4.21.1 +udocker +cwltool diff --git a/testdata/lib.js b/testdata/lib.js index 0cae276..364774f 100644 --- a/testdata/lib.js +++ b/testdata/lib.js @@ -3,5 +3,5 @@ var foo = function(x) { } var bar = function(n, x) { - return `{n} engineers walk into a {x}` -} \ No newline at end of file + return n + " engineers walk into a " + x +} diff --git a/testdata/remote-cwl/wf1-packed.cwl b/testdata/remote-cwl/wf1-packed.cwl new file mode 100644 index 0000000..80ad002 --- /dev/null +++ b/testdata/remote-cwl/wf1-packed.cwl @@ -0,0 +1,158 @@ +{ + "$graph": [ + { + "class": "CommandLineTool", + "inputs": [ + { + "type": "string", + "inputBinding": { + "position": 1, + "valueFrom": "A_$(inputs.in1)_B_${return inputs.in1}_C_$(inputs.in1)" + }, + "id": "#tool1.cwl/in1" + } + ], + "baseCommand": "echo", + "arguments": [ + { + "valueFrom": "$(runtime)" + } + ], + "stdout": "out.txt", + "requirements": [ + { + "expressionLib": [ + "var foo = function(x) {\n return 2 * x\n}\n\nvar bar = function(n, x) {\n return `{n} engineers walk into a {x}`\n}" + ], + "class": "InlineJavascriptRequirement" + } + ], + "id": "#tool1.cwl", + "outputs": [ + { + "type": "string", + "outputBinding": { + "glob": "out.txt", + "loadContents": true, + "outputEval": "$(self)_D_$(runtime)" + }, + "id": "#tool1.cwl/out1" + } + ] + }, + { + "class": "CommandLineTool", + "inputs": [ + { + "type": "#testtypes.yml/my_boolean_array", + "inputBinding": { + "position": 1, + "valueFrom": "A_$(inputs.in1)_B_${return inputs.in1}_C_$(inputs.in1)" + }, + "id": "#tool2.cwl/in1" + } + ], + "baseCommand": "echo", + "arguments": [ + { + "valueFrom": "$(runtime)" + } + ], + "outputs": [ + { + "type": "string", + "outputBinding": { + "glob": "out.txt", + "loadContents": true, + "outputEval": "$(self)_D_$(runtime)" + }, + "id": "#tool2.cwl/out1" + } + ], + "stdout": "out.txt", + "requirements": [ + { + "types": [ + { + "name": "#testtypes.yml/my_boolean_array", + "type": "array", + "items": "boolean", + "label": "A boolean array" + }, + { + "name": "#testtypes.yml/my_enum", + "type": "enum", + "symbols": [ + "#testtypes.yml/my_enum/a", + "#testtypes.yml/my_enum/b", + "#testtypes.yml/my_enum/c" + ], + "label": "A required enum" + } + ], + "class": "SchemaDefRequirement" + } + ], + "id": "#tool2.cwl" + }, + { + "class": "Workflow", + "inputs": [ + { + "id": "#main/in1", + "type": "#testtypes.yml/my_boolean_array" + } + ], + "steps": [ + { + "run": "#tool2.cwl", + "in": [ + { + "source": "#main/in1", + "id": "#main/s1/in1" + } + ], + "out": [ + "#main/s1/out1" + ], + "id": "#main/s1" + }, + { + "run": "#tool1.cwl", + "in": [ + { + "source": "#main/s1/out1", + "id": "#main/s2/in1" + } + ], + "out": [ + "#main/s2/out1" + ], + "id": "#main/s2" + } + ], + "outputs": [ + { + "id": "#main/out1", + "type": "string", + "outputSource": "#main/s2/out1" + } + ], + "requirements": [ + { + "types": [ + { + "$import": "#testtypes.yml/my_boolean_array" + }, + { + "$import": "#testtypes.yml/my_enum" + } + ], + "class": "SchemaDefRequirement" + } + ], + "id": "#main" + } + ], + "cwlVersion": "v1.2" +} diff --git a/testdata/workflows/clt1.cwl b/testdata/workflows/clt1.cwl new file mode 100644 index 0000000..ae92b68 --- /dev/null +++ b/testdata/workflows/clt1.cwl @@ -0,0 +1,45 @@ +#!/usr/bin/env cwl-runner + +class: CommandLineTool +cwlVersion: v1.0 + +requirements: + InitialWorkDirRequirement: + listing: + - entryname: inputs.txt + entry: | + $(inputs.in1.file.path) + $(inputs.in1.meta.species) + SchemaDefRequirement: + types: + - $import: ../types/recursive.yml + - $import: ../types/array.yml + - $import: ../types/singletype.yml + # - $import: ../types/singletype2.yml + +inputs: + in1: ../types/recursive.yml#file_with_sample_meta + in2: + type: ../types/array.yml#study_meta_too + in3: + type: ../types/singletype.yml#simple_record +# in4: +# type: ../types/singletype2.yml#simple_record2 + in4: + type: [string, ../types/recursive.yml#sample_meta] + in5: + type: Any? + +outputs: + out1: + type: File + outputBinding: + glob: '*.txt' + out2: + type: ../types/array.yml#study_meta_too + outputBinding: + outputEval: $(inputs.in2) + out3: stdout + +baseCommand: [echo] +arguments: [hello world] diff --git a/testdata/workflows/link-to-clt1.cwl b/testdata/workflows/link-to-clt1.cwl new file mode 120000 index 0000000..e2cd4b4 --- /dev/null +++ b/testdata/workflows/link-to-clt1.cwl @@ -0,0 +1 @@ +clt1.cwl \ No newline at end of file diff --git a/testdata/workflows/wf5-packed.cwl b/testdata/workflows/wf5-packed.cwl new file mode 100644 index 0000000..81b0253 --- /dev/null +++ b/testdata/workflows/wf5-packed.cwl @@ -0,0 +1,274 @@ +{ + "$graph": [ + { + "class": "CommandLineTool", + "requirements": [ + { + "listing": [ + { + "entryname": "inputs.txt", + "entry": "$(inputs.in1.file.path)\n$(inputs.in1.meta.species)\n" + } + ], + "class": "InitialWorkDirRequirement" + }, + { + "types": [ + { + "name": "#recursive.yml/sample_meta", + "type": "record", + "fields": [ + { + "name": "#recursive.yml/sample_meta/sample", + "type": [ + "null", + "string" + ] + }, + { + "name": "#recursive.yml/sample_meta/species", + "type": "string" + } + ] + }, + { + "name": "#recursive.yml/file_with_sample_meta", + "type": "record", + "fields": [ + { + "name": "#recursive.yml/file_with_sample_meta/file", + "type": "File" + }, + { + "name": "#recursive.yml/file_with_sample_meta/meta", + "type": "#recursive.yml/sample_meta" + } + ] + }, + { + "name": "#recursive.yml/info_with_sample_meta", + "type": "record", + "fields": [ + { + "type": "string", + "name": "#recursive.yml/info_with_sample_meta/comment" + }, + { + "type": "#recursive.yml/sample_meta", + "name": "#recursive.yml/info_with_sample_meta/meta" + } + ] + }, + { + "name": "#array.yml/sample_meta2", + "type": "record", + "fields": [ + { + "name": "#array.yml/sample_meta2/prop", + "type": "string" + } + ] + }, + { + "name": "#array.yml/study_meta", + "type": "array", + "items": "#array.yml/sample_meta2" + }, + { + "name": "#array.yml/study_meta_too", + "type": "record", + "fields": [ + { + "type": "#array.yml/sample_meta2", + "name": "#array.yml/study_meta_too/meta1" + }, + { + "type": "#array.yml/study_meta", + "name": "#array.yml/study_meta_too/meta2" + } + ] + }, + { + "name": "#singletype.yml/simple_record", + "type": "record", + "fields": [ + { + "type": "string", + "name": "#singletype.yml/simple_record/prop" + } + ] + } + ], + "class": "SchemaDefRequirement" + } + ], + "inputs": [ + { + "type": "#recursive.yml/file_with_sample_meta", + "id": "#link-to-clt1.cwl/in1" + }, + { + "type": "#array.yml/study_meta_too", + "id": "#link-to-clt1.cwl/in2" + }, + { + "type": "#singletype.yml/simple_record", + "id": "#link-to-clt1.cwl/in3" + }, + { + "type": [ + "string", + "#recursive.yml/sample_meta" + ], + "id": "#link-to-clt1.cwl/in4" + }, + { + "type": [ + "null", + "Any" + ], + "id": "#link-to-clt1.cwl/in5" + } + ], + "baseCommand": [ + "echo" + ], + "arguments": [ + "hello world" + ], + "id": "#link-to-clt1.cwl", + "stdout": "2cd5f434d33dce1a50ec686c741fba97b41d2544", + "hints": [ + { + "class": "LoadListingRequirement", + "loadListing": "deep_listing" + }, + { + "class": "NetworkAccess", + "networkAccess": true + } + ], + "outputs": [ + { + "type": "File", + "outputBinding": { + "glob": "*.txt" + }, + "id": "#link-to-clt1.cwl/out1" + }, + { + "type": "#array.yml/study_meta_too", + "outputBinding": { + "outputEval": "$(inputs.in2)" + }, + "id": "#link-to-clt1.cwl/out2" + }, + { + "type": "File", + "id": "#link-to-clt1.cwl/out3", + "outputBinding": { + "glob": "2cd5f434d33dce1a50ec686c741fba97b41d2544" + } + } + ] + }, + { + "class": "Workflow", + "inputs": [ + { + "type": "#recursive.yml/file_with_sample_meta", + "id": "#main/in1" + }, + { + "type": "#array.yml/study_meta_too", + "id": "#main/in2" + }, + { + "type": "#singletype.yml/simple_record", + "id": "#main/in3" + }, + { + "type": [ + "string", + "#recursive.yml/sample_meta" + ], + "id": "#main/in4" + } + ], + "steps": [ + { + "run": "#link-to-clt1.cwl", + "in": [ + { + "source": "#main/in1", + "id": "#main/s1/in1" + }, + { + "source": "#main/in2", + "id": "#main/s1/in2" + }, + { + "source": "#main/in3", + "id": "#main/s1/in3" + }, + { + "source": "#main/in4", + "id": "#main/s1/in4" + } + ], + "out": [ + "#main/s1/out2" + ], + "id": "#main/s1" + } + ], + "outputs": [ + { + "id": "#main/out1", + "type": "#array.yml/study_meta_too", + "outputSource": "#main/s1/out2" + } + ], + "requirements": [ + { + "types": [ + { + "$import": "#recursive.yml/sample_meta" + }, + { + "$import": "#recursive.yml/file_with_sample_meta" + }, + { + "$import": "#recursive.yml/info_with_sample_meta" + }, + { + "$import": "#array.yml/sample_meta2" + }, + { + "$import": "#array.yml/study_meta" + }, + { + "$import": "#array.yml/study_meta_too" + }, + { + "$import": "#singletype.yml/simple_record" + }, + { + "name": "#main/user_type1", + "type": "record", + "fields": [ + { + "name": "#main/user_type1/prop", + "type": "string" + } + ] + } + ], + "class": "SchemaDefRequirement" + } + ], + "id": "#main" + } + ], + "cwlVersion": "v1.2" +} diff --git a/testdata/workflows/wf5.cwl b/testdata/workflows/wf5.cwl index c7ff3cb..77999ef 100755 --- a/testdata/workflows/wf5.cwl +++ b/testdata/workflows/wf5.cwl @@ -15,7 +15,7 @@ inputs: steps: s1: - run: ../tools/link-to-clt1.cwl + run: link-to-clt1.cwl in: in1: "#in1" # This should be normalized out in2: in2 diff --git a/tests/test_graph_split.py b/tests/test_graph_split.py index eb4f9fa..37051cd 100644 --- a/tests/test_graph_split.py +++ b/tests/test_graph_split.py @@ -1,14 +1,18 @@ # SPDX-License-Identifier: Apache-2.0 """Test the CWL $graph document splitter tool.""" from io import StringIO +import json from pathlib import Path +import pytest import requests from cwl_utils.graph_split import graph_split from .util import get_path +from cwltool.tests.util import get_main_output + URI = ( "https://gist.githubusercontent.com/altairwei/" "6a0097db95cad23de36f825ed3b9f4b0/raw/" @@ -27,3 +31,46 @@ def test_graph_split_offline(tmp_path: Path) -> None: """Confirm that a local provided example produces no exception.""" with get_path("testdata/js-expr-req-wf.cwl").open() as handle: graph_split(handle, tmp_path, "yaml", "main.cwl", True) + target = tmp_path / "wf.cwl" + assert target.exists() + code, stdout, stderr = get_main_output(["--debug", str(target)]) + assert code == 0, stderr + assert ( + json.loads(stdout)["out"]["checksum"] + == "sha1$7448d8798a4380162d4b56f9b452e2f6f9e24e7a" + ) + + +def test_graph_split_json_offline(tmp_path: Path) -> None: + """Confirm that a local provided example produces no exception in JSON mode.""" + target = tmp_path / "subdir" / "wf.cwl" + with get_path("testdata/js-expr-req-wf.cwl").open() as handle: + graph_split(handle, target.parent, "json", "main.cwl", True) + assert target.exists() + code, stdout, stderr = get_main_output(["--debug", str(target)]) + assert code == 0, stderr + assert ( + json.loads(stdout)["out"]["checksum"] + == "sha1$7448d8798a4380162d4b56f9b452e2f6f9e24e7a" + ) + + +def test_graph_split_bad_path() -> None: + """Expect an exception when the target directory parent does not exist.""" + with get_path("testdata/js-expr-req-wf.cwl").open() as handle: + with pytest.raises(NotADirectoryError): + graph_split( + handle, Path("/__non_existent/tmp_path"), "json", "main.cwl", True + ) + + +def test_graph_split_complex1(tmp_path: Path) -> None: + """Split a more complex graph with SchemaDefRequirement and $import.""" + with get_path("testdata/remote-cwl/wf1-packed.cwl").open() as handle: + graph_split(handle, tmp_path, "yaml", "main.cwl", False) + + +def test_graph_split_complex2(tmp_path: Path) -> None: + """Split another complex graph with SchemaDefRequirement and $import.""" + with get_path("testdata/workflows/wf5-packed.cwl").open() as handle: + graph_split(handle, tmp_path, "yaml", "main.cwl", False) diff --git a/tox.ini b/tox.ini index 9127dcb..71e0ad1 100644 --- a/tox.ini +++ b/tox.ini @@ -9,9 +9,6 @@ envlist = isolated_build = True skip_missing_interpreters = True -[pytest] -addopts=-n auto - [gh-actions] python = 3.9: py39