Skip to content

Use yaml.dump over old dump command, stripped double .cwl, and fixed import issues #287

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -145,11 +145,11 @@ diff-cover.html: coverage.xml

## test : run the cwl-utils test suite
test: $(PYSOURCES)
python -m pytest -rsx ${PYTEST_EXTRA}
python -m pytest ${PYTEST_EXTRA}

## testcov : run the cwl-utils test suite and collect coverage
testcov: $(PYSOURCES)
pytest --cov ${PYTEST_EXTRA}
python -m pytest --cov ${PYTEST_EXTRA}

sloccount.sc: $(PYSOURCES) Makefile
sloccount --duplicates --wide --details $^ > $@
Expand Down
100 changes: 63 additions & 37 deletions cwl_utils/graph_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,32 @@

import argparse
import json
import logging
import os
import re
import sys
from collections.abc import MutableMapping
from typing import IO, TYPE_CHECKING, Any, Union, cast
from io import TextIOWrapper
from pathlib import Path
from typing import (
IO,
Any,
Union,
cast,
)

from cwlformat.formatter import stringify_dict
from ruamel.yaml.dumper import RoundTripDumper
from ruamel.yaml.main import YAML, dump
from ruamel.yaml.main import YAML
from ruamel.yaml.representer import RoundTripRepresenter
from schema_salad.sourceline import SourceLine, add_lc_filename

if TYPE_CHECKING:
from _typeshed import StrPath
from cwl_utils.loghandler import _logger as _cwlutilslogger

_logger = logging.getLogger("cwl-graph-split") # pylint: disable=invalid-name
defaultStreamHandler = logging.StreamHandler() # pylint: disable=invalid-name
_logger.addHandler(defaultStreamHandler)
_logger.setLevel(logging.INFO)
_cwlutilslogger.setLevel(100)


def arg_parser() -> argparse.ArgumentParser:
Expand Down Expand Up @@ -73,7 +86,7 @@ def run(args: list[str]) -> int:
with open(options.cwlfile) as source_handle:
graph_split(
source_handle,
options.outdir,
Path(options.outdir),
options.output_format,
options.mainfile,
options.pretty,
Expand All @@ -83,7 +96,7 @@ def run(args: list[str]) -> int:

def graph_split(
sourceIO: IO[str],
output_dir: "StrPath",
output_dir: Path,
output_format: str,
mainfile: str,
pretty: bool,
Expand All @@ -100,6 +113,13 @@ def graph_split(

version = source.pop("cwlVersion")

# Check outdir parent exists
if not output_dir.parent.is_dir():
raise NotADirectoryError(f"Parent directory of {output_dir} does not exist")
# If output_dir is not a directory, create it
if not output_dir.is_dir():
output_dir.mkdir()

def my_represent_none(
self: Any, data: Any
) -> Any: # pylint: disable=unused-argument
Expand All @@ -111,7 +131,7 @@ def my_represent_none(
for entry in source["$graph"]:
entry_id = entry.pop("id").lstrip("#")
entry["cwlVersion"] = version
imports = rewrite(entry, entry_id)
imports = rewrite(entry, entry_id, output_dir)
if imports:
for import_name in imports:
rewrite_types(entry, f"#{import_name}", False)
Expand All @@ -121,25 +141,28 @@ def my_represent_none(
else:
entry_id = mainfile

output_file = os.path.join(output_dir, entry_id + ".cwl")
output_file = output_dir / (re.sub(".cwl$", "", entry_id) + ".cwl")
if output_format == "json":
json_dump(entry, output_file)
elif output_format == "yaml":
yaml_dump(entry, output_file, pretty)
with output_file.open("w", encoding="utf-8") as output_handle:
yaml_dump(entry, output_handle, pretty)


def rewrite(document: Any, doc_id: str) -> set[str]:
def rewrite(
document: Any, doc_id: str, output_dir: Path, pretty: bool = False
) -> set[str]:
"""Rewrite the given element from the CWL $graph."""
imports = set()
if isinstance(document, list) and not isinstance(document, str):
for entry in document:
imports.update(rewrite(entry, doc_id))
imports.update(rewrite(entry, doc_id, output_dir, pretty))
elif isinstance(document, dict):
this_id = document["id"] if "id" in document else None
for key, value in document.items():
with SourceLine(document, key, Exception):
if key == "run" and isinstance(value, str) and value[0] == "#":
document[key] = f"{value[1:]}.cwl"
document[key] = f"{re.sub('.cwl$', '', value[1:])}.cwl"
elif key in ("id", "outputSource") and value.startswith("#" + doc_id):
document[key] = value[len(doc_id) + 2 :]
elif key == "out" and isinstance(value, list):
Expand Down Expand Up @@ -179,15 +202,15 @@ def rewrite_id(entry: Any) -> Union[MutableMapping[Any, Any], str]:
elif key == "$import":
rewrite_import(document)
elif key == "class" and value == "SchemaDefRequirement":
return rewrite_schemadef(document)
return rewrite_schemadef(document, output_dir, pretty)
else:
imports.update(rewrite(value, doc_id))
imports.update(rewrite(value, doc_id, output_dir, pretty))
return imports


def rewrite_import(document: MutableMapping[str, Any]) -> None:
"""Adjust the $import directive."""
external_file = document["$import"].split("/")[0][1:]
external_file = document["$import"].split("/")[0].lstrip("#")
document["$import"] = external_file


Expand Down Expand Up @@ -215,22 +238,25 @@ def rewrite_types(field: Any, entry_file: str, sameself: bool) -> None:
rewrite_types(entry, entry_file, sameself)


def rewrite_schemadef(document: MutableMapping[str, Any]) -> set[str]:
def rewrite_schemadef(
document: MutableMapping[str, Any], output_dir: Path, pretty: bool = False
) -> set[str]:
"""Dump the schemadefs to their own file."""
for entry in document["types"]:
if "$import" in entry:
rewrite_import(entry)
elif "name" in entry and "/" in entry["name"]:
entry_file, entry["name"] = entry["name"].split("/")
for field in entry["fields"]:
entry_file, entry["name"] = entry["name"].lstrip("#").split("/")
for field in entry.get("fields", []):
field["name"] = field["name"].split("/")[2]
rewrite_types(field, entry_file, True)
with open(entry_file[1:], "a", encoding="utf-8") as entry_handle:
dump([entry], entry_handle, Dumper=RoundTripDumper)
entry["$import"] = entry_file[1:]
with (output_dir / entry_file).open("a", encoding="utf-8") as entry_handle:
yaml_dump(entry, entry_handle, pretty)
entry["$import"] = entry_file
del entry["name"]
del entry["type"]
del entry["fields"]
if "fields" in entry:
del entry["fields"]
seen_imports = set()

def seen_import(entry: MutableMapping[str, Any]) -> bool:
Expand All @@ -247,26 +273,26 @@ def seen_import(entry: MutableMapping[str, Any]) -> bool:
return seen_imports


def json_dump(entry: Any, output_file: str) -> None:
def json_dump(entry: Any, output_file: Path) -> None:
"""Output object as JSON."""
with open(output_file, "w", encoding="utf-8") as result_handle:
with output_file.open("w", encoding="utf-8") as result_handle:
json.dump(entry, result_handle, indent=4)


def yaml_dump(entry: Any, output_file: str, pretty: bool) -> None:
def yaml_dump(
entry: Any,
output_handle: TextIOWrapper,
pretty: bool,
) -> None:
"""Output object as YAML."""
yaml = YAML(typ="rt")
if pretty:
output_handle.write(stringify_dict(entry))
return
yaml = YAML(typ="rt", pure=True)
yaml.default_flow_style = False
yaml.map_indent = 4
yaml.sequence_indent = 2
with open(output_file, "w", encoding="utf-8") as result_handle:
if pretty:
result_handle.write(stringify_dict(entry))
else:
yaml.dump(
entry,
result_handle,
)
yaml.indent = 4
yaml.block_seq_indent = 2
yaml.dump(entry, output_handle)


if __name__ == "__main__":
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ test = "pytest"

[tool.pytest.ini_options]
testpaths = ["tests"]
addopts = "-rsx -n auto"

[tool.setuptools]
packages = [
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ requests
schema-salad >= 8.8.20250205075315,<9
ruamel.yaml >= 0.17.6, < 0.19
typing_extensions;python_version<'3.10'
cwlformat >= 2022.2.18
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, this is brought in the the extra pretty; we don't want to require all users of cwl-utils to install cwlformat.

Suggested change
cwlformat >= 2022.2.18

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you elaborate on this please? It's been a while since I actually put in the PR.

Why don't we want cwlformat as a requirement?
What do you mean by the extra 'pretty'?

4 changes: 3 additions & 1 deletion test-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
pytest < 9
pytest-cov
pytest-xdist
pytest-xdist[psutil]
cwlformat
pytest-mock >= 1.10.0
jsonschema >= 4.21.1
udocker
cwltool
4 changes: 2 additions & 2 deletions testdata/lib.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ var foo = function(x) {
}

var bar = function(n, x) {
return `{n} engineers walk into a {x}`
}
return n + " engineers walk into a " + x
}
Loading