|
| 1 | +#!/usr/bin/env python3 |
| 2 | +# SPDX-License-Identifier: Apache-2.0 |
| 3 | +"""Generate JSON Schema from CWL inputs object.""" |
| 4 | +import argparse |
| 5 | +import logging |
| 6 | +import sys |
| 7 | +from json import dumps |
| 8 | +from typing import Any, List |
| 9 | + |
| 10 | +from cwl_utils.loghandler import _logger as _cwlutilslogger |
| 11 | +from cwl_utils.parser import load_document_by_uri, save |
| 12 | + |
| 13 | +_logger = logging.getLogger("cwl-inputs_schema_gen") # pylint: disable=invalid-name |
| 14 | +defaultStreamHandler = logging.StreamHandler() # pylint: disable=invalid-name |
| 15 | +_logger.addHandler(defaultStreamHandler) |
| 16 | +_logger.setLevel(logging.INFO) |
| 17 | +_cwlutilslogger.setLevel(100) |
| 18 | + |
| 19 | + |
| 20 | +def cwl_inputs_to_jsonschema(cwl_inputs: Any) -> Any: |
| 21 | + """ |
| 22 | + Converts a JSON-serialized CWL inputs object into a JSONSchema object. |
| 23 | +
|
| 24 | + Args: |
| 25 | + cwl_inputs: JSON-serialized CWL inputs object. |
| 26 | +
|
| 27 | + Returns: |
| 28 | + A JSONSchema object. |
| 29 | +
|
| 30 | + Example: |
| 31 | + cwl_obj = load_document_by_uri(<CWL_URL>) |
| 32 | + saved_obj = save(cwl_obj) |
| 33 | + cwl_inputs = saved_obj["inputs"] |
| 34 | + jsonschema = cwl_inputs_to_jsonschema(cwl_inputs) |
| 35 | + """ |
| 36 | + schema = { |
| 37 | + "$schema": "http://json-schema.org/draft-07/schema#", |
| 38 | + "type": "object", |
| 39 | + "properties": {}, |
| 40 | + "required": [], |
| 41 | + "additionalProperties": False, |
| 42 | + } |
| 43 | + |
| 44 | + # Refer to https://www.commonwl.org/v1.2/Workflow.html#WorkflowInputParameter for more details |
| 45 | + for input_item in cwl_inputs: |
| 46 | + input_id = input_item.get("id") |
| 47 | + input_type = input_item.get("type") |
| 48 | + if input_id is None or input_type is None: |
| 49 | + raise ValueError("Each item in the 'inputs' object must include 'id' and 'type' fields.") |
| 50 | + |
| 51 | + prop_schema = _cwl_type_to_prop_schema(input_type) |
| 52 | + |
| 53 | + if "secondaryFiles" in input_item: |
| 54 | + # TODO: do nothing? |
| 55 | + # secondaryFiles does not seem to affect the --make-template |
| 56 | + # For example, refer to $ cwltool --make-template https://github.com/common-workflow-language/cwl-v1.2/main/tests/stage-array.cwl |
| 57 | + pass |
| 58 | + |
| 59 | + if "default" in input_item: |
| 60 | + prop_schema["default"] = input_item["default"] |
| 61 | + |
| 62 | + schema["properties"][input_id] = prop_schema # type: ignore |
| 63 | + if "default" not in input_item and "null" not in input_type: |
| 64 | + schema["required"].append(input_id) |
| 65 | + |
| 66 | + return schema |
| 67 | + |
| 68 | + |
| 69 | +def _cwl_type_to_prop_schema(input_type: Any) -> Any: |
| 70 | + """ |
| 71 | + This function converts the type of each item in a JSON-serialized CWL inputs object into a value in a JSONSchema property. |
| 72 | + The input type may not only be a string, but also a nested type information as a dict or list. |
| 73 | + Therefore, this function may be called recursively. |
| 74 | + """ |
| 75 | + |
| 76 | + if isinstance(input_type, dict): |
| 77 | + nested_type = input_type.get("type") |
| 78 | + if nested_type is None: |
| 79 | + raise ValueError("The 'type' field is missing in the 'inputs.[].type' nested type object.") |
| 80 | + |
| 81 | + if nested_type == "enum": |
| 82 | + enum = input_type.get("symbols") |
| 83 | + if enum is None: |
| 84 | + raise ValueError("The 'symbols' field is missing in the 'inputs.[].type' nested type object for enum.") |
| 85 | + return { |
| 86 | + "type": "string", |
| 87 | + "enum": enum, |
| 88 | + } |
| 89 | + |
| 90 | + elif nested_type == "record": |
| 91 | + schema = { |
| 92 | + "type": "object", |
| 93 | + "properties": {}, |
| 94 | + "required": [], |
| 95 | + "additionalProperties": False, |
| 96 | + } |
| 97 | + |
| 98 | + fields = input_type.get("fields") |
| 99 | + if fields is None: |
| 100 | + raise ValueError("The 'fields' field is missing in the 'inputs.[].type' nested type object for record.") |
| 101 | + for field in fields: |
| 102 | + field_name = field.get("name") |
| 103 | + field_type = field.get("type") |
| 104 | + if field_name is None or field_type is None: |
| 105 | + raise ValueError("Both 'name' and 'type' fields are required in the 'inputs.[].type.[].fields' object for record.") |
| 106 | + field_id = field_name.split("#")[-1].split("/")[-1] |
| 107 | + schema["properties"][field_id] = _cwl_type_to_prop_schema(field_type) # type: ignore |
| 108 | + if "default" not in field: |
| 109 | + schema["required"].append(field_id) |
| 110 | + return schema |
| 111 | + |
| 112 | + elif nested_type == "array": |
| 113 | + item_type = input_type.get("items") |
| 114 | + if item_type is None: |
| 115 | + raise ValueError("The 'items' field is missing in the 'inputs.[].type' nested type object for array.") |
| 116 | + return { |
| 117 | + "type": "array", |
| 118 | + "items": _cwl_type_to_prop_schema(item_type), |
| 119 | + "additionalItems": False |
| 120 | + } |
| 121 | + |
| 122 | + else: |
| 123 | + raise ValueError(f"Unexpected value '{input_type}' encountered in 'inputs.[].type'.") |
| 124 | + |
| 125 | + elif isinstance(input_type, list): |
| 126 | + if len(input_type) != 2 or "null" not in input_type: |
| 127 | + raise ValueError(f"Unexpected value '{input_type}' encountered in 'inputs.[].type'. 'null' is required when 'inputs.[].type' is a list.") |
| 128 | + original_type = [t for t in input_type if t != "null"][0] |
| 129 | + schema = _cwl_type_to_prop_schema(original_type) |
| 130 | + schema["nullable"] = True |
| 131 | + return schema |
| 132 | + |
| 133 | + else: |
| 134 | + if input_type == "File": |
| 135 | + return { |
| 136 | + "type": "object", |
| 137 | + "properties": { |
| 138 | + "class": {"type": "string", "const": "File"}, |
| 139 | + "path": {"type": "string"}, |
| 140 | + "location": {"type": "string"} |
| 141 | + }, |
| 142 | + "required": ["class"], |
| 143 | + "oneOf": [ |
| 144 | + {"required": ["path"]}, |
| 145 | + {"required": ["location"]} |
| 146 | + ], |
| 147 | + "additionalProperties": False, |
| 148 | + } |
| 149 | + elif input_type == "Directory": |
| 150 | + return { |
| 151 | + "type": "object", |
| 152 | + "properties": { |
| 153 | + "class": {"type": "string", "const": "Directory"}, |
| 154 | + "path": {"type": "string"}, |
| 155 | + "location": {"type": "string"} |
| 156 | + }, |
| 157 | + "required": ["class"], |
| 158 | + "oneOf": [ |
| 159 | + {"required": ["path"]}, |
| 160 | + {"required": ["location"]} |
| 161 | + ], |
| 162 | + "additionalProperties": False, |
| 163 | + } |
| 164 | + elif input_type == "Any": |
| 165 | + return { |
| 166 | + "anyOf": [ |
| 167 | + {"type": "boolean"}, |
| 168 | + {"type": "integer"}, |
| 169 | + {"type": "number"}, |
| 170 | + {"type": "string"}, |
| 171 | + {"type": "array"}, |
| 172 | + {"type": "object"} |
| 173 | + ] |
| 174 | + } |
| 175 | + elif input_type == "null": |
| 176 | + return {"type": "null"} |
| 177 | + else: |
| 178 | + if input_type in ["long", "float", "double"]: |
| 179 | + return {"type": "number"} |
| 180 | + elif input_type == "int": |
| 181 | + return {"type": "integer"} |
| 182 | + else: |
| 183 | + return {"type": input_type} |
| 184 | + |
| 185 | + |
| 186 | +def arg_parser() -> argparse.ArgumentParser: |
| 187 | + """Build the argument parser.""" |
| 188 | + parser = argparse.ArgumentParser( |
| 189 | + description="Generate JSON Schema from CWL inputs object." |
| 190 | + ) |
| 191 | + parser.add_argument("cwl_url", help="URL of the CWL document.") |
| 192 | + parser.add_argument( |
| 193 | + "-o", |
| 194 | + "--output", |
| 195 | + type=argparse.FileType("w"), |
| 196 | + default=sys.stdout, |
| 197 | + help="Output file. Default is stdout.", |
| 198 | + ) |
| 199 | + return parser |
| 200 | + |
| 201 | + |
| 202 | +def parse_args(args: List[str]) -> argparse.Namespace: |
| 203 | + """Parse the command line arguments.""" |
| 204 | + return arg_parser().parse_args(args) |
| 205 | + |
| 206 | + |
| 207 | +def main() -> None: |
| 208 | + """Console entry point.""" |
| 209 | + sys.exit(run(parse_args(sys.argv[1:]))) |
| 210 | + |
| 211 | + |
| 212 | +def run(args: argparse.Namespace) -> int: |
| 213 | + """Primary processing loop.""" |
| 214 | + cwl_obj = load_document_by_uri(args.cwl_url) |
| 215 | + saved_obj = save(cwl_obj) # TODO: Use "typed CWL object" OR "saved object"? |
| 216 | + if "inputs" not in saved_obj: |
| 217 | + _logger.exception("Inputs object not found in the CWL document.") |
| 218 | + return 1 |
| 219 | + json_serialized_inputs_obj = saved_obj["inputs"] |
| 220 | + try: |
| 221 | + jsonschema = cwl_inputs_to_jsonschema(json_serialized_inputs_obj) |
| 222 | + except Exception as e: |
| 223 | + _logger.exception("Failed to generate JSON Schema from CWL inputs object. Error: %s", e) |
| 224 | + return 1 |
| 225 | + args.output.write(dumps(jsonschema, indent=2)) |
| 226 | + |
| 227 | + return 0 |
| 228 | + |
| 229 | + |
| 230 | +if __name__ == "__main__": |
| 231 | + main() |
0 commit comments