From 9f6ffee9bc2c8b7804bf097aced64e8da71722a1 Mon Sep 17 00:00:00 2001 From: "Mark A. Miller" Date: Mon, 26 Feb 2024 19:30:56 -0500 Subject: [PATCH] templated all SchemaDefinition slots including settings (#122) * tempalted all SchemaDefinition slots... except settings * handles schema Settings * add slot class assignments --- .gitignore | 1 + schemasheets/schemamaker.py | 54 ++++++++----- schemasheets/schemasheet_datamodel.py | 6 +- tests/test_121/input/class_defs.tsv | 5 ++ tests/test_121/input/prefix_defs.tsv | 6 ++ tests/test_121/input/schema_def.tsv | 5 ++ tests/test_121/input/setting_defs.tsv | 3 + tests/test_121/input/setting_defs.txt | 3 + .../test_121/input/slot_class_assignments.tsv | 4 + tests/test_121/input/slot_defs.tsv | 4 + tests/test_121/input/subset_defs.tsv | 4 + tests/test_121/output/mixs_test.yaml | 79 +++++++++++++++++++ tests/test_121/test_mixs_generation.py | 63 +++++++++++++++ 13 files changed, 218 insertions(+), 19 deletions(-) create mode 100644 tests/test_121/input/class_defs.tsv create mode 100644 tests/test_121/input/prefix_defs.tsv create mode 100644 tests/test_121/input/schema_def.tsv create mode 100644 tests/test_121/input/setting_defs.tsv create mode 100644 tests/test_121/input/setting_defs.txt create mode 100644 tests/test_121/input/slot_class_assignments.tsv create mode 100644 tests/test_121/input/slot_defs.tsv create mode 100644 tests/test_121/input/subset_defs.tsv create mode 100644 tests/test_121/output/mixs_test.yaml create mode 100644 tests/test_121/test_mixs_generation.py diff --git a/.gitignore b/.gitignore index 271b633..b200185 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .DS_Store +target examples/output diff --git a/schemasheets/schemamaker.py b/schemasheets/schemamaker.py index 8952096..73d1ccb 100644 --- a/schemasheets/schemamaker.py +++ b/schemasheets/schemamaker.py @@ -15,13 +15,13 @@ from linkml_runtime.dumpers import yaml_dumper from linkml_runtime.linkml_model import Annotation, Example from linkml_runtime.linkml_model.meta import SchemaDefinition, ClassDefinition, Prefix, \ - SlotDefinition, EnumDefinition, PermissibleValue, SubsetDefinition, TypeDefinition, Element + SlotDefinition, EnumDefinition, PermissibleValue, SubsetDefinition, TypeDefinition, Element, Setting from linkml_runtime.utils.schema_as_dict import schema_as_dict from linkml_runtime.utils.schemaview import SchemaView, re from schemasheets.schemasheet_datamodel import ColumnConfig, TableConfig, get_configmodel, get_metamodel, COL_NAME, \ DESCRIPTOR, \ - tmap, T_CLASS, T_PV, T_SLOT, T_SUBSET, T_SCHEMA, T_ENUM, T_PREFIX, T_TYPE, SchemaSheet + tmap, T_CLASS, T_PV, T_SLOT, T_SUBSET, T_SCHEMA, T_ENUM, T_PREFIX, T_TYPE, SchemaSheet, T_SETTING from schemasheets.conf.configschema import Cardinality from schemasheets.utils.google_sheets import gsheets_download_url from schemasheets.utils.prefixtool import guess_prefix_expansion @@ -83,7 +83,6 @@ def _tidy_slot_usage(self): c.slots.remove(sn) del c.slot_usage[sn] - def merge_sheet(self, file_name: str, delimiter='\t') -> None: """ Merge information from the given schema sheet into the current schema @@ -93,7 +92,7 @@ def merge_sheet(self, file_name: str, delimiter='\t') -> None: :return: """ logging.info(f'READING {file_name} D={delimiter}') - #with self.ensure_file(file_name) as tsv_file: + # with self.ensure_file(file_name) as tsv_file: # reader = csv.DictReader(tsv_file, delimiter=delimiter) with self.ensure_csvreader(file_name, delimiter=delimiter) as reader: schemasheet = SchemaSheet.from_dictreader(reader) @@ -101,7 +100,7 @@ def merge_sheet(self, file_name: str, delimiter='\t') -> None: schemasheet.load_table_config(self.table_config_path) line_num = schemasheet.start_line_number # TODO: check why this doesn't work - #while rows and all(x for x in rows[-1] if not x): + # while rows and all(x for x in rows[-1] if not x): # print(f'TRIMMING: {rows[-1]}') # rows.pop() logging.info(f'ROWS={len(schemasheet.rows)}') @@ -118,21 +117,29 @@ def add_row(self, row: Dict[str, Any], table_config: TableConfig): name = element.prefix_prefix elif isinstance(element, PermissibleValue): name = element.text + elif isinstance(element, Setting): + # print(f"\n{element = }") + name = element.setting_key else: logging.debug(f'EL={element} in {row}') name = element.name logging.debug(f'ADDING: {row} // {name}') for k, v in row.items(): + # print(f"\n{k = }") if k not in table_config.columns: raise ValueError(f'Expected to find {k} in {table_config.columns.keys()}') cc = table_config.columns[k] + # print(f"{cc = }") v = self.normalize_value(v, cc) if v: + # print(f"{v = }") # special case: class-context provided by settings if cc.settings.applies_to_class: actual_element = list(self.row_focal_element(row, table_config, column=k))[0] else: actual_element = element + # print(f"{cc.maps_to = }") + # print(f"{cc = }") logging.debug(f'SETTING {name} {cc.maps_to} = {v}') if cc.maps_to == 'cardinality': self.set_cardinality(actual_element, v) @@ -172,9 +179,13 @@ def add_row(self, row: Dict[str, Any], table_config: TableConfig): curr_val = getattr(curr_obj, cc.settings.inner_key, None) else: curr_val = getattr(actual_element, cc.maps_to) + # print(f"{curr_val = }") + # print(f"{v = }") + if curr_val and curr_val != 'TEMP' and curr_val != v and \ not isinstance(actual_element, SchemaDefinition) and \ - not isinstance(actual_element, Prefix): + not isinstance(actual_element, Prefix) and \ + not isinstance(actual_element, Setting): logging.warning(f'Overwriting value for {k}, was {curr_val}, now {v}') raise ValueError(f'Cannot reset value for {k}, was {curr_val}, now {v}') if cc.settings.inner_key: @@ -288,6 +299,12 @@ def row_focal_element(self, row: Dict[str, Any], table_config: TableConfig, pfx = Prefix(vs[0], 'TODO') self.schema.prefixes[pfx.prefix_prefix] = pfx vmap[k] = [pfx] + elif elt_cls == Setting: + if len(vs) != 1: + raise ValueError(f'Cardinality of setting col must be 1; got: {vs}') + stg = Setting(vs[0], 'TODO') + self.schema.settings[stg.setting_key] = stg + vmap[k] = [stg] elif elt_cls == SchemaDefinition: if len(vs) != 1: raise ValueError(f'Cardinality of schema col must be 1; got: {vs}') @@ -295,10 +312,12 @@ def row_focal_element(self, row: Dict[str, Any], table_config: TableConfig, vmap[k] = [self.schema] else: vmap[k] = [self.get_current_element(elt_cls(v)) for v in vs] + def check_excess(descriptors): diff = set(vmap.keys()) - set(descriptors + [T_SCHEMA]) if len(diff) > 0: raise ValueError(f'Excess slots: {diff}') + if column: cc = table_config.columns[column] if cc.settings.applies_to_class: @@ -351,7 +370,7 @@ def check_excess(descriptors): this_enum: EnumDefinition = vmap[T_ENUM][0] if T_PV in vmap: for pv in vmap[T_PV]: - #pv = PermissibleValue(text=v) + # pv = PermissibleValue(text=v) this_enum.permissible_values[pv.text] = pv yield pv else: @@ -368,6 +387,9 @@ def check_excess(descriptors): elif T_SCHEMA in vmap: for main_elt in vmap[T_SCHEMA]: yield main_elt + elif T_SETTING in vmap: + for main_elt in vmap[T_SETTING]: + yield main_elt else: raise ValueError(f'Could not find a focal element for {row}') @@ -419,7 +441,8 @@ def normalize_value(self, v: str, column_config: ColumnConfig = None) -> Any: v = None if column_config.settings.curie_prefix: if ':' in v: - logging.warning(f'Will not prefix {v} with {column_config.settings.curie_prefix} as it is already prefixed') + logging.warning( + f'Will not prefix {v} with {column_config.settings.curie_prefix} as it is already prefixed') else: v = f'{column_config.settings.curie_prefix}:{v}' if column_config.settings.prefix: @@ -537,8 +560,8 @@ def repair_schema(self, schema: SchemaDefinition) -> SchemaDefinition: :return: """ sv = SchemaView(schema) - #pfx = schema.default_prefix - #if pfx not in schema.prefixes: + # pfx = schema.default_prefix + # if pfx not in schema.prefixes: # schema.prefixes[pfx] = Prefix(pfx, f'http://example.org/{pfx}/') # logging.info(f'Set default prefix: {schema.prefixes[pfx]}') prefixes = set() @@ -622,7 +645,8 @@ def ensure_csvreader(self, file_name: str, delimiter=None) -> str: help="Google sheets ID. If this is specified then the arguments MUST be sheet names") @click.option("-v", "--verbose", count=True) @click.argument('tsv_files', nargs=-1) -def convert(tsv_files, gsheet_id, output: TextIO, name, repair, table_config_path: str, use_attributes: bool, unique_slots: bool, verbose: int, sort_keys: bool): +def convert(tsv_files, gsheet_id, output: TextIO, name, repair, table_config_path: str, use_attributes: bool, + unique_slots: bool, verbose: int, sort_keys: bool): """ Convert schemasheets to a LinkML schema @@ -653,14 +677,8 @@ def convert(tsv_files, gsheet_id, output: TextIO, name, repair, table_config_pat schema = sm.repair_schema(schema) schema_dict = schema_as_dict(schema) output.write(yaml.dump(schema_dict, sort_keys=sort_keys)) - #output.write(yaml_dumper.dumps(schema)) + # output.write(yaml_dumper.dumps(schema)) if __name__ == '__main__': convert() - - - - - - diff --git a/schemasheets/schemasheet_datamodel.py b/schemasheets/schemasheet_datamodel.py index 4d3a5b1..2610da7 100644 --- a/schemasheets/schemasheet_datamodel.py +++ b/schemasheets/schemasheet_datamodel.py @@ -8,6 +8,8 @@ import yaml from linkml_runtime.linkml_model import SlotDefinition, ClassDefinition, SchemaDefinition, \ PermissibleValue, EnumDefinition, TypeDefinition, SubsetDefinition, Prefix +from linkml_runtime.linkml_model.meta import Setting + from linkml_runtime.utils.schemaview import SchemaView from schemasheets.conf.configschema import ColumnSettings, Shortcuts @@ -26,6 +28,7 @@ T_TYPE = 'type' T_SUBSET = 'subset' T_PREFIX = 'prefix' +T_SETTING = 'setting' tmap = { T_SCHEMA: SchemaDefinition, @@ -35,7 +38,8 @@ T_PV: PermissibleValue, T_TYPE: TypeDefinition, T_SUBSET: SubsetDefinition, - T_PREFIX: Prefix + T_PREFIX: Prefix, + T_SETTING: Setting, } diff --git a/tests/test_121/input/class_defs.tsv b/tests/test_121/input/class_defs.tsv new file mode 100644 index 0000000..ccfaeb1 --- /dev/null +++ b/tests/test_121/input/class_defs.tsv @@ -0,0 +1,5 @@ +class description +>class description +Vehicle A machine, with or wihtout its own power source, that eases the transportation of people, materials, etc. +Airplane "A vehicle which flies through the air, obtaining lif from air flowing acoss fixed wings" +Boat A vehicle which moves through water \ No newline at end of file diff --git a/tests/test_121/input/prefix_defs.tsv b/tests/test_121/input/prefix_defs.tsv new file mode 100644 index 0000000..2487b80 --- /dev/null +++ b/tests/test_121/input/prefix_defs.tsv @@ -0,0 +1,6 @@ +prefix URI +> prefix prefix_reference +some_schema http://example.com/some_schema_path/ +data_prefix_1 http://example.com/data_prefix_1/ +data_prefix_2 http://example.com/data_prefix_2/ +non_data_prefix http://example.com/non_data_prefix/ \ No newline at end of file diff --git a/tests/test_121/input/schema_def.tsv b/tests/test_121/input/schema_def.tsv new file mode 100644 index 0000000..0015f44 --- /dev/null +++ b/tests/test_121/input/schema_def.tsv @@ -0,0 +1,5 @@ +id schema name aliases broad_mappings categories close_mappings comments conforms_to contributors created_by created_on date default_prefix default_range deprecated deprecated_element_has_exact_replacement deprecated_element_has_possible_replacement description exact_mappings implements imports in_language instantiates last_updated_on dat license mappings modified_by narrow_mappings notes rank related_mappings see_also slot_names_unique source status title todos version alt_description_source alt_description_text flavor annotation local name source local name value structured_aliases literal_form structured_aliases alias_predicate structured_aliases categories in subset id_prefixes emit_prefixes default_curi_maps +>id schema aliases broad_mappings categories close_mappings comments conforms_to contributors created_by ignore default_prefix default_range deprecated deprecated_element_has_exact_replacement deprecated_element_has_possible_replacement description exact_mappings implements ignore in_language ignore ignore license mappings modified_by narrow_mappings notes rank related_mappings see_also slot_names_unique source status title todos version alt_descriptions alt_descriptions annotations local_names local_names structured_aliases structured_aliases structured_aliases in_subset id_prefixes emit_prefixes default_curi_maps +> internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' +> inner_key: source inner_key: text inner_key: flavor inner_key: source inner_key: value inner_key: literal_form inner_key: alias_predicate inner_key: categories "" +http://example.com/some_schema some schema text1|text2 some_schema:1|some_schema:2 some_schema:1|some_schema:2 some_schema:1|some_schema:2 the overall usefulness of default_curi_maps is debatable|there may be some improper modeling in here like illegal ranges. what software will detect that?|what about numeric, date and booleans that get converted to strings?|what are implements and instantiates good for?|how well does including an imports statement in a schemsheets TSV work?|todo what about multivalued slots with multiple inner keys?|what does the schema repair method do? LinkML some_schema:1|some_schema:2 some_schema:1 some_schema float we all feel deprecated some times some_schema:1 some_schema:1 A schema that tests as many elements as possible. For use in testing YAML <-> sheets some_schema:1|some_schema:2 some_schema:1|some_schema:2 some_schema:1|some_schema:2 English some_schema:1|some_schema:2 MIT some_schema:1|some_schema:2 some_schema:1 some_schema:1|some_schema:2 text1|text2 3 some_schema:1|some_schema:2 some_schema:1|some_schema:2 TRUE some_schema:1 some_schema:1 See description SETTINGS! v0.0.1 wiktionary "An outline or image universally applicable to a general conception, under which it is likely to be presented to the mind" raspberry logic format schema_definition EXACT_SYNONYM some_schema:1|some_schema:2 main_subset|secret_subset data_prefix_1|data_prefix_2 data_prefix_1|data_prefix_2|non_data_prefix semweb_context|idot_context \ No newline at end of file diff --git a/tests/test_121/input/setting_defs.tsv b/tests/test_121/input/setting_defs.tsv new file mode 100644 index 0000000..644740b --- /dev/null +++ b/tests/test_121/input/setting_defs.tsv @@ -0,0 +1,3 @@ +setting name setting expansion +>setting setting_value +vowels [aeiouAEIOU]+ \ No newline at end of file diff --git a/tests/test_121/input/setting_defs.txt b/tests/test_121/input/setting_defs.txt new file mode 100644 index 0000000..644740b --- /dev/null +++ b/tests/test_121/input/setting_defs.txt @@ -0,0 +1,3 @@ +setting name setting expansion +>setting setting_value +vowels [aeiouAEIOU]+ \ No newline at end of file diff --git a/tests/test_121/input/slot_class_assignments.tsv b/tests/test_121/input/slot_class_assignments.tsv new file mode 100644 index 0000000..61d27aa --- /dev/null +++ b/tests/test_121/input/slot_class_assignments.tsv @@ -0,0 +1,4 @@ +class slot +>class slot +Vehicle exterior_color +Vehicle max_passengers diff --git a/tests/test_121/input/slot_defs.tsv b/tests/test_121/input/slot_defs.tsv new file mode 100644 index 0000000..1b5e866 --- /dev/null +++ b/tests/test_121/input/slot_defs.tsv @@ -0,0 +1,4 @@ +slot description +>slot description +exterior_color the primary color, using crayola names, of the exterior of the vehicle +max_passengers That maximum number of human passengers that can be safely transported by the vehicle diff --git a/tests/test_121/input/subset_defs.tsv b/tests/test_121/input/subset_defs.tsv new file mode 100644 index 0000000..7edc482 --- /dev/null +++ b/tests/test_121/input/subset_defs.tsv @@ -0,0 +1,4 @@ +Subset desc +> subset description +main_subset main subset +supplementary_subset supplementary subset \ No newline at end of file diff --git a/tests/test_121/output/mixs_test.yaml b/tests/test_121/output/mixs_test.yaml new file mode 100644 index 0000000..183f9f8 --- /dev/null +++ b/tests/test_121/output/mixs_test.yaml @@ -0,0 +1,79 @@ +name: some schema +conforms_to: LinkML +implements: +- some_schema:1 +- some_schema:2 +description: A schema that tests as many elements as possible. For use in testing + YAML <-> sheets +alt_descriptions: + text: An outline or image universally applicable to a general conception, under + which it is likely to be presented to the mind +title: See description +deprecated: we all feel deprecated some times +todos: +- text1 +- text2 +notes: +- text1 +- text2 +comments: +- text1 +- text2 +source: some_schema:1 +in_language: English +see_also: +- some_schema:1 +- some_schema:2 +deprecated_element_has_exact_replacement: some_schema:1 +deprecated_element_has_possible_replacement: some_schema:1 +aliases: +- text1 +- text2 +mappings: +- some_schema:1 +- some_schema:2 +exact_mappings: +- some_schema:1 +- some_schema:2 +close_mappings: +- some_schema:1 +- some_schema:2 +related_mappings: +- some_schema:1 +- some_schema:2 +narrow_mappings: +- some_schema:1 +- some_schema:2 +broad_mappings: +- some_schema:1 +- some_schema:2 +created_by: some_schema:1 +created_on: '2023-01-01' +last_updated_on: '2023-01-01' +modified_by: some_schema:1 +status: some_schema:1 +rank: '3' +id: http://example.com/some_schema +version: v0.0.1 +imports: +- linkml:types +license: MIT +prefixes: + some_schema: + prefix_prefix: some_schema + prefix_reference: http://example.com/some_schema_path/ + linkml: + prefix_prefix: linkml + prefix_reference: https://w3id.org/linkml/ +default_prefix: some_schema +default_range: float +slot_names_unique: true +categories: +- some_schema:1 +- some_schema:2 +keywords: +- text1 +- text2 +contributors: +- some_schema:1 +- some_schema:2 diff --git a/tests/test_121/test_mixs_generation.py b/tests/test_121/test_mixs_generation.py new file mode 100644 index 0000000..6cff4b8 --- /dev/null +++ b/tests/test_121/test_mixs_generation.py @@ -0,0 +1,63 @@ +import csv +import logging +import os +import pprint + +from linkml.generators.projectgen import ProjectGenerator, ProjectConfiguration +from linkml_runtime.dumpers import yaml_dumper +from linkml_runtime.utils.schema_as_dict import schema_as_dict +from linkml_runtime.utils.schemaview import SchemaView +from linkml.utils.helpers import write_to_file + +from schemasheets.schemamaker import SchemaMaker, get_metamodel, SchemaSheetRowException + +# todo what about assertions into read only slots? +# linkml2schemasheets-template --source-path "https://w3id.org/linkml/meta.yaml" --output-path meta.tsv --report-style exhaustive > meta_template_report.txt +# definition_uri +# from_schema +# generation_date +# imported_from +# metamodel_version +# source_file +# source_file_date +# source_file_size + +SCHEMA_NAME = 'mixs_test' + +ROOT = os.path.abspath(os.path.dirname(__file__)) # /Users/MAM/Documents/gitrepos/schemasheets/tests/test_121 +INPUT_DIR = os.path.join(ROOT, 'input') +OUTPUT_DIR = os.path.join(ROOT, 'output') + +# PROBLEM_DIR = os.path.join(INPUT_DIR, 'problem_cases') + +CLASS_DEFS_TSV = os.path.join(INPUT_DIR, 'class_defs.tsv') +PREFIX_DEFS_TSV = os.path.join(INPUT_DIR, 'prefix_defs.tsv') +SCHEMA_DEF_TSV = os.path.join(INPUT_DIR, 'schema_def.tsv') +SETTING_DEFS_TSV = os.path.join(INPUT_DIR, 'setting_defs.tsv') +SLOT_CLASS_ASSIGNMENTS_TSV = os.path.join(INPUT_DIR, 'slot_class_assignments.tsv') +SLOT_DEFS_TSV = os.path.join(INPUT_DIR, 'slot_defs.tsv') +SUBSET_DEFS_TSV = os.path.join(INPUT_DIR, 'subset_defs.tsv') + +SCHEMA_YAML = os.path.join(OUTPUT_DIR, f"{SCHEMA_NAME}.yaml") + + +def test_mixs_generation(): + sm = SchemaMaker(use_attributes=False, + unique_slots=True, + ) + schema = sm.create_schema([ + CLASS_DEFS_TSV, + PREFIX_DEFS_TSV, + SCHEMA_DEF_TSV, + SETTING_DEFS_TSV, + SLOT_CLASS_ASSIGNMENTS_TSV, + SLOT_DEFS_TSV, + SUBSET_DEFS_TSV, + ]) + schema = sm.repair_schema(schema) + print("\n") + print(yaml_dumper.dumps(schema)) + + # yaml_str = yaml_dumper.dumps(schemaview.schema) + # print(yaml_str) + # write_to_file(SCHEMA_YAML, yaml_str)