8
8
import logging
9
9
import sys
10
10
import json
11
+ from copy import deepcopy
11
12
from pathlib import Path
12
13
from typing import Any , List , Union , Dict , Optional
13
14
from urllib .parse import urlparse
18
19
from cwl_utils .parser import load_document_by_uri , InputArraySchemaTypes , \
19
20
InputEnumSchemaTypes , InputRecordSchemaTypes , File , Directory , WorkflowInputParameter , InputRecordSchema , \
20
21
InputEnumSchema , InputArraySchema , Workflow , CommandLineTool
21
- from cwl_utils .utils import sanitise_schema_field , is_uri , to_pascal_case , get_value_from_uri , is_local_uri , \
22
- load_schema_from_uri
22
+ from cwl_utils .utils import sanitise_schema_field , is_uri , to_pascal_case , get_value_from_uri , is_local_uri
23
23
24
24
_logger = logging .getLogger ("cwl-inputs-schema-gen" ) # pylint: disable=invalid-name
25
25
defaultStreamHandler = logging .StreamHandler () # pylint: disable=invalid-name
35
35
"string" : "string" ,
36
36
"int" : "integer" ,
37
37
"float" : "number" ,
38
+ "long" : "number" ,
39
+ "double" : "number" ,
40
+ "null" : "null"
38
41
}
39
42
40
43
JSON_TEMPLATE_PATH = Path (__file__ ).parent .joinpath (
@@ -123,7 +126,11 @@ def generate_type_dict_from_type(self, type_item) -> Dict:
123
126
return {
124
127
"type" : PRIMITIVE_TYPES_MAPPING [type_item ]
125
128
}
126
- elif type_item in ["File" , "Directory" ]:
129
+ elif type_item in ["stdin" ]:
130
+ return {
131
+ "$ref" : f"#/definitions/File"
132
+ }
133
+ elif type_item in ["File" , "Directory" , "Any" ]:
127
134
return {
128
135
"$ref" : f"#/definitions/{ type_item } "
129
136
}
@@ -148,7 +155,7 @@ def generate_type_dict_from_type(self, type_item) -> Dict:
148
155
return {
149
156
"type" : "object" ,
150
157
"properties" : {
151
- get_value_from_uri (prop .id ): self .generate_type_dict_from_type (prop .type_ )
158
+ get_value_from_uri (prop .name ): self .generate_type_dict_from_type (prop .type_ )
152
159
for prop in type_item .fields
153
160
}
154
161
}
@@ -162,6 +169,16 @@ def generate_type_dict_from_type(self, type_item) -> Dict:
162
169
}
163
170
else :
164
171
raise ValueError (f"Unknown type: { type_item } " )
172
+ elif isinstance (type_item , List ):
173
+ # Nested schema
174
+ return {
175
+ "oneOf" : list (
176
+ map (
177
+ lambda type_iter : self .generate_type_dict_from_type (type_iter ),
178
+ type_item
179
+ )
180
+ )
181
+ }
165
182
else :
166
183
raise ValueError (f"Unknown type: { type_item } " )
167
184
@@ -222,7 +239,7 @@ def generate_json_schema_property_from_input_parameter(input_parameter: Workflow
222
239
return JSONSchemaProperty (
223
240
name = input_name ,
224
241
type_ = input_parameter .type_ ,
225
- description = doc
242
+ description = doc if doc is not None else ""
226
243
)
227
244
228
245
@@ -235,10 +252,15 @@ def generate_definition_from_schema(schema: InputRecordSchema) -> Dict:
235
252
236
253
# Sanitise each field of the schema
237
254
sanitised_fields = {}
238
- for field_key , field_value in schema .type_ .get ("fields" ).items ():
255
+
256
+ for field in schema .fields :
239
257
sanitised_fields .update (
240
258
{
241
- field_key : sanitise_schema_field (field_value )
259
+ get_value_from_uri (field .name ): sanitise_schema_field (
260
+ {
261
+ "type" : field .type_
262
+ }
263
+ )
242
264
}
243
265
)
244
266
@@ -267,13 +289,13 @@ def generate_definition_from_schema(schema: InputRecordSchema) -> Dict:
267
289
prop = JSONSchemaProperty (
268
290
name = prop_name ,
269
291
type_ = prop_obj .get ("type" ),
270
- description = prop_obj .get ("doc" ),
292
+ description = prop_obj .get ("doc" , "" ),
271
293
required = required
272
294
)
273
295
property_list .append (prop )
274
296
275
297
return {
276
- to_pascal_case (schema .type_ . get ( ' name' )): {
298
+ to_pascal_case (get_value_from_uri ( schema .name )): {
277
299
"type" : "object" ,
278
300
"properties" : {
279
301
prop .name : prop .type_dict
@@ -307,13 +329,41 @@ def cwl_to_jsonschema(cwl_obj: Union[Workflow, CommandLineTool]) -> Any:
307
329
# Load in all $imports to be referred by complex input types
308
330
workflow_schema_definitions_list = list (
309
331
map (
310
- lambda import_iter : generate_definition_from_schema (
311
- load_schema_from_uri ( import_iter )
332
+ lambda kv_schema_tuple_iter : generate_definition_from_schema (
333
+ cwl_obj . loadingOptions . idx . get ( kv_schema_tuple_iter [ 1 ][ 0 ] )
312
334
),
313
- cwl_obj .loadingOptions .imports
335
+ filter (
336
+ lambda idx_iter :
337
+ isinstance (idx_iter [1 ][0 ], InputRecordSchemaTypes ) or
338
+ isinstance (idx_iter [1 ][0 ], InputArraySchemaTypes ),
339
+ cwl_obj .loadingOptions .idx
340
+ )
314
341
)
315
342
)
316
343
344
+ if cwl_obj .requirements is not None :
345
+ try :
346
+ schema_def_requirement = next (
347
+ filter (
348
+ lambda requirement_iter : requirement_iter .class_ == "SchemaDefRequirement" ,
349
+ cwl_obj .requirements
350
+ )
351
+ )
352
+
353
+ workflow_schema_definitions_list .extend (
354
+ list (
355
+ map (
356
+ lambda schema_def_iter : generate_definition_from_schema (
357
+ schema_def_iter
358
+ ),
359
+ schema_def_requirement .types
360
+ )
361
+ )
362
+ )
363
+
364
+ except StopIteration :
365
+ pass
366
+
317
367
# Convert schema definitions to dict
318
368
workflow_schema_definitions_dict = {}
319
369
for schema_definition in workflow_schema_definitions_list :
@@ -334,7 +384,19 @@ def cwl_to_jsonschema(cwl_obj: Union[Workflow, CommandLineTool]) -> Any:
334
384
{
335
385
"type" : "object" ,
336
386
"properties" : {
337
- prop .name : prop .type_dict
387
+ prop .name : {
388
+ "oneOf" : [
389
+ {
390
+ "type" : "null"
391
+ },
392
+ prop .type_dict
393
+ ]
394
+ }
395
+ if prop .required is False
396
+ else
397
+ {
398
+ prop .name : prop .type_dict
399
+ }
338
400
for prop in properties
339
401
},
340
402
"required" : [
@@ -350,6 +412,80 @@ def cwl_to_jsonschema(cwl_obj: Union[Workflow, CommandLineTool]) -> Any:
350
412
workflow_schema_definitions_dict
351
413
)
352
414
415
+ # Slim down the schema as required
416
+ input_json_schema = slim_definitions (input_json_schema )
417
+
418
+ return input_json_schema
419
+
420
+
421
+ def slim_definitions (input_json_schema : Dict ) -> Dict :
422
+ """
423
+ We have quite a few definitions that we're likely not using, particularly for a simple workflow.
424
+
425
+ Traverse the properties and return all definitions that are used.
426
+
427
+ Remove all other definitions
428
+ """
429
+
430
+ # Traverse the properties and return all definitions that are used
431
+ # https://stackoverflow.com/a/77537867/6946787
432
+ def _recursive_search (json_data , target_key , result = None ):
433
+ if result is None :
434
+ result = []
435
+
436
+ if isinstance (json_data , dict ):
437
+ for key , value in json_data .items ():
438
+ if key == target_key :
439
+ result .append (value )
440
+ else :
441
+ _recursive_search (value , target_key , result )
442
+ elif isinstance (json_data , list ):
443
+ for item in json_data :
444
+ _recursive_search (item , target_key , result )
445
+
446
+ return result
447
+
448
+ # Get all the property dependencies
449
+ def _get_all_ref_attributes (json_object ):
450
+ return _recursive_search (json_object , "$ref" )
451
+
452
+ def get_property_dependencies (
453
+ property_dict : Dict ,
454
+ input_json_schema : Dict ,
455
+ existing_property_dependencies : List = None
456
+ ) -> List [str ]:
457
+ # Initialise return list
458
+ if existing_property_dependencies is None :
459
+ existing_property_dependencies = []
460
+
461
+ # All reference attributes
462
+ for reference_attribute in _get_all_ref_attributes (property_dict ):
463
+ # Get the value from the reference attribute
464
+ reference_value = get_value_from_uri (reference_attribute )
465
+ # If the reference value is not in the existing property dependencies, add it
466
+ if reference_value not in existing_property_dependencies :
467
+ existing_property_dependencies .append (reference_value )
468
+ # Get the property dependencies of the reference value
469
+ existing_property_dependencies .extend (
470
+ get_property_dependencies (
471
+ input_json_schema ['definitions' ][reference_value ],
472
+ input_json_schema ,
473
+ existing_property_dependencies
474
+ )
475
+ )
476
+
477
+ return existing_property_dependencies
478
+
479
+ # Copy schema
480
+ input_json_schema = deepcopy (input_json_schema )
481
+
482
+ # Get required definitions
483
+ required_definitions = get_property_dependencies (input_json_schema .get ("properties" ), input_json_schema )
484
+
485
+ for definition_key in list (input_json_schema ['definitions' ].keys ()):
486
+ if definition_key not in required_definitions :
487
+ del input_json_schema ['definitions' ][definition_key ]
488
+
353
489
return input_json_schema
354
490
355
491
@@ -430,7 +566,7 @@ def run(args: argparse.Namespace) -> int:
430
566
except Exception as e :
431
567
_logger .exception ("Failed to generate JSON Schema from CWL inputs object. Error: %s" , e )
432
568
return 1
433
- args .output .write (json .dumps (jsonschema , indent = 2 ))
569
+ args .output .write (json .dumps (jsonschema , indent = 2 ) + " \n " )
434
570
435
571
return 0
436
572
0 commit comments