Skip to content

Commit f6e2293

Browse files
suecharoalexiswl
authored andcommitted
Add inputs_schema_gen impl.
1 parent d2af76f commit f6e2293

File tree

4 files changed

+242
-0
lines changed

4 files changed

+242
-0
lines changed

README.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,14 @@ Expressions into separate steps in the manner of cwl-expression-refactor.
118118
119119
cwl-normalizer directory/path/to/save/outputs path_to_my_workflow.cwl [more_workflows.cwl]
120120
121+
Generate for Workflow Parameters from a CWL document
122+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
123+
124+
``cwl-input-schema-gen`` generates a JSON Schema for workflow input parameters from a CWL document.
125+
126+
.. code:: bash
127+
128+
cwl-input-schema-gen path_to_my_workflow.cwl
121129
122130
Using the CWL Parsers
123131
~~~~~~~~~~~~~~~~~~~~~

cwl_utils/inputs_schema_gen.py

Lines changed: 231 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,231 @@
1+
#!/usr/bin/env python3
2+
# SPDX-License-Identifier: Apache-2.0
3+
"""Generate JSON Schema from CWL inputs object."""
4+
import argparse
5+
import logging
6+
import sys
7+
from json import dumps
8+
from typing import Any, List
9+
10+
from cwl_utils.loghandler import _logger as _cwlutilslogger
11+
from cwl_utils.parser import load_document_by_uri, save
12+
13+
_logger = logging.getLogger("cwl-inputs_schema_gen") # pylint: disable=invalid-name
14+
defaultStreamHandler = logging.StreamHandler() # pylint: disable=invalid-name
15+
_logger.addHandler(defaultStreamHandler)
16+
_logger.setLevel(logging.INFO)
17+
_cwlutilslogger.setLevel(100)
18+
19+
20+
def cwl_inputs_to_jsonschema(cwl_inputs: Any) -> Any:
21+
"""
22+
Converts a JSON-serialized CWL inputs object into a JSONSchema object.
23+
24+
Args:
25+
cwl_inputs: JSON-serialized CWL inputs object.
26+
27+
Returns:
28+
A JSONSchema object.
29+
30+
Example:
31+
cwl_obj = load_document_by_uri(<CWL_URL>)
32+
saved_obj = save(cwl_obj)
33+
cwl_inputs = saved_obj["inputs"]
34+
jsonschema = cwl_inputs_to_jsonschema(cwl_inputs)
35+
"""
36+
schema = {
37+
"$schema": "http://json-schema.org/draft-07/schema#",
38+
"type": "object",
39+
"properties": {},
40+
"required": [],
41+
"additionalProperties": False,
42+
}
43+
44+
# Refer to https://www.commonwl.org/v1.2/Workflow.html#WorkflowInputParameter for more details
45+
for input_item in cwl_inputs:
46+
input_id = input_item.get("id")
47+
input_type = input_item.get("type")
48+
if input_id is None or input_type is None:
49+
raise ValueError("Each item in the 'inputs' object must include 'id' and 'type' fields.")
50+
51+
prop_schema = _cwl_type_to_prop_schema(input_type)
52+
53+
if "secondaryFiles" in input_item:
54+
# TODO: do nothing?
55+
# secondaryFiles does not seem to affect the --make-template
56+
# For example, refer to $ cwltool --make-template https://github.com/common-workflow-language/cwl-v1.2/main/tests/stage-array.cwl
57+
pass
58+
59+
if "default" in input_item:
60+
prop_schema["default"] = input_item["default"]
61+
62+
schema["properties"][input_id] = prop_schema # type: ignore
63+
if "default" not in input_item and "null" not in input_type:
64+
schema["required"].append(input_id)
65+
66+
return schema
67+
68+
69+
def _cwl_type_to_prop_schema(input_type: Any) -> Any:
70+
"""
71+
This function converts the type of each item in a JSON-serialized CWL inputs object into a value in a JSONSchema property.
72+
The input type may not only be a string, but also a nested type information as a dict or list.
73+
Therefore, this function may be called recursively.
74+
"""
75+
76+
if isinstance(input_type, dict):
77+
nested_type = input_type.get("type")
78+
if nested_type is None:
79+
raise ValueError("The 'type' field is missing in the 'inputs.[].type' nested type object.")
80+
81+
if nested_type == "enum":
82+
enum = input_type.get("symbols")
83+
if enum is None:
84+
raise ValueError("The 'symbols' field is missing in the 'inputs.[].type' nested type object for enum.")
85+
return {
86+
"type": "string",
87+
"enum": enum,
88+
}
89+
90+
elif nested_type == "record":
91+
schema = {
92+
"type": "object",
93+
"properties": {},
94+
"required": [],
95+
"additionalProperties": False,
96+
}
97+
98+
fields = input_type.get("fields")
99+
if fields is None:
100+
raise ValueError("The 'fields' field is missing in the 'inputs.[].type' nested type object for record.")
101+
for field in fields:
102+
field_name = field.get("name")
103+
field_type = field.get("type")
104+
if field_name is None or field_type is None:
105+
raise ValueError("Both 'name' and 'type' fields are required in the 'inputs.[].type.[].fields' object for record.")
106+
field_id = field_name.split("#")[-1].split("/")[-1]
107+
schema["properties"][field_id] = _cwl_type_to_prop_schema(field_type) # type: ignore
108+
if "default" not in field:
109+
schema["required"].append(field_id)
110+
return schema
111+
112+
elif nested_type == "array":
113+
item_type = input_type.get("items")
114+
if item_type is None:
115+
raise ValueError("The 'items' field is missing in the 'inputs.[].type' nested type object for array.")
116+
return {
117+
"type": "array",
118+
"items": _cwl_type_to_prop_schema(item_type),
119+
"additionalItems": False
120+
}
121+
122+
else:
123+
raise ValueError(f"Unexpected value '{input_type}' encountered in 'inputs.[].type'.")
124+
125+
elif isinstance(input_type, list):
126+
if len(input_type) != 2 or "null" not in input_type:
127+
raise ValueError(f"Unexpected value '{input_type}' encountered in 'inputs.[].type'. 'null' is required when 'inputs.[].type' is a list.")
128+
original_type = [t for t in input_type if t != "null"][0]
129+
schema = _cwl_type_to_prop_schema(original_type)
130+
schema["nullable"] = True
131+
return schema
132+
133+
else:
134+
if input_type == "File":
135+
return {
136+
"type": "object",
137+
"properties": {
138+
"class": {"type": "string", "const": "File"},
139+
"path": {"type": "string"},
140+
"location": {"type": "string"}
141+
},
142+
"required": ["class"],
143+
"oneOf": [
144+
{"required": ["path"]},
145+
{"required": ["location"]}
146+
],
147+
"additionalProperties": False,
148+
}
149+
elif input_type == "Directory":
150+
return {
151+
"type": "object",
152+
"properties": {
153+
"class": {"type": "string", "const": "Directory"},
154+
"path": {"type": "string"},
155+
"location": {"type": "string"}
156+
},
157+
"required": ["class"],
158+
"oneOf": [
159+
{"required": ["path"]},
160+
{"required": ["location"]}
161+
],
162+
"additionalProperties": False,
163+
}
164+
elif input_type == "Any":
165+
return {
166+
"anyOf": [
167+
{"type": "boolean"},
168+
{"type": "integer"},
169+
{"type": "number"},
170+
{"type": "string"},
171+
{"type": "array"},
172+
{"type": "object"}
173+
]
174+
}
175+
elif input_type == "null":
176+
return {"type": "null"}
177+
else:
178+
if input_type in ["long", "float", "double"]:
179+
return {"type": "number"}
180+
elif input_type == "int":
181+
return {"type": "integer"}
182+
else:
183+
return {"type": input_type}
184+
185+
186+
def arg_parser() -> argparse.ArgumentParser:
187+
"""Build the argument parser."""
188+
parser = argparse.ArgumentParser(
189+
description="Generate JSON Schema from CWL inputs object."
190+
)
191+
parser.add_argument("cwl_url", help="URL of the CWL document.")
192+
parser.add_argument(
193+
"-o",
194+
"--output",
195+
type=argparse.FileType("w"),
196+
default=sys.stdout,
197+
help="Output file. Default is stdout.",
198+
)
199+
return parser
200+
201+
202+
def parse_args(args: List[str]) -> argparse.Namespace:
203+
"""Parse the command line arguments."""
204+
return arg_parser().parse_args(args)
205+
206+
207+
def main() -> None:
208+
"""Console entry point."""
209+
sys.exit(run(parse_args(sys.argv[1:])))
210+
211+
212+
def run(args: argparse.Namespace) -> int:
213+
"""Primary processing loop."""
214+
cwl_obj = load_document_by_uri(args.cwl_url)
215+
saved_obj = save(cwl_obj) # TODO: Use "typed CWL object" OR "saved object"?
216+
if "inputs" not in saved_obj:
217+
_logger.exception("Inputs object not found in the CWL document.")
218+
return 1
219+
json_serialized_inputs_obj = saved_obj["inputs"]
220+
try:
221+
jsonschema = cwl_inputs_to_jsonschema(json_serialized_inputs_obj)
222+
except Exception as e:
223+
_logger.exception("Failed to generate JSON Schema from CWL inputs object. Error: %s", e)
224+
return 1
225+
args.output.write(dumps(jsonschema, indent=2))
226+
227+
return 0
228+
229+
230+
if __name__ == "__main__":
231+
main()

docs/index.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ Included Utility Programs
3333
.. autoprogram:: cwl_utils.normalizer:arg_parser()
3434
:prog: cwl-normalizer
3535

36+
.. autoprogram:: cwl_utils.input_schema_gen:arg_parser()
37+
:prog: cwl-input-schema-gen
3638

3739
Indices and tables
3840
==================

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ cwl-docker-extract = "cwl_utils.docker_extract:main"
5353
cwl-expression-refactor = "cwl_utils.expression_refactor:main"
5454
cwl-graph-split = "cwl_utils.graph_split:main"
5555
cwl-normalizer = "cwl_utils.normalizer:main"
56+
cwl-inputs_schema_gen = "cwl_utils.inputs_schema_gen:main"
5657

5758
[tool.aliases]
5859
test = "pytest"

0 commit comments

Comments
 (0)