Skip to content

Commit

Permalink
GraphQL schema generation (#23)
Browse files Browse the repository at this point in the history
* Generation of graphql schema from transformation rules

* added tests

* added tests

* added repair of names

* bump version

* Update CHANGELOG.md

Co-authored-by: Anders Albert <60234212+doctrino@users.noreply.github.com>

* Update cognite/neat/core/extractors/rules_to_graphql.py

Co-authored-by: Anders Albert <60234212+doctrino@users.noreply.github.com>

* Fix name of fixture

* Update cognite/neat/core/extractors/rules_to_graphql.py

Co-authored-by: Anders Albert <60234212+doctrino@users.noreply.github.com>

* Update cognite/neat/core/extractors/rules_to_graphql.py

Co-authored-by: Anders Albert <60234212+doctrino@users.noreply.github.com>

* Update cognite/neat/core/extractors/rules_to_graphql.py

Co-authored-by: Anders Albert <60234212+doctrino@users.noreply.github.com>

* moved test to docstring

* switch from dataframe to dict/pydantic, added data model validation

* moving things around

* simpler logic

* Update cognite/neat/core/extractors/rules_to_graphql.py

Co-authored-by: Anders Albert <60234212+doctrino@users.noreply.github.com>

* blacked

---------

Co-authored-by: Anders Albert <60234212+doctrino@users.noreply.github.com>
  • Loading branch information
nikokaoja and doctrino committed May 19, 2023
1 parent dd79dd3 commit 6ff6df3
Show file tree
Hide file tree
Showing 12 changed files with 282 additions and 24 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,13 @@ Changes are grouped as follows
- `Fixed` for any bug fixes.
- `Security` in case of vulnerabilities.

## [0.11.2] - 15-05-23
### Added
* Generation of GraphQL schema from transformation rules
* Fixing names of classes/properties to be aligned to GraphQL allowed characters
* Allowing pure data modeling transformation rules, i.e. no data on mapping rules
## [0.11.1] - 08-05-23

### Fixed

* Set the license of the package in poetry build.
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
.PHONY: run-explorer run-tests run-linters build-ui build-python build-docker run-docker compose-up

version="0.11.1"
version="0.11.2"
run-explorer:
@echo "Running explorer API server..."
# open "http://localhost:8000/static/index.html" || true
Expand Down
2 changes: 1 addition & 1 deletion cognite/neat/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.11.1"
__version__ = "0.11.2"
1 change: 1 addition & 0 deletions cognite/neat/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,6 @@
EXAMPLE_WORKFLOWS = EXAMPLES_DIRECTORY / "workflows"

TNT_TRANSFORMATION_RULES = EXAMPLE_RULES / "Rules-Nordic44-to-TNT.xlsx"
SIMPLE_TRANSFORMATION_RULES = EXAMPLE_RULES / "sheet2cdf-transformation-rules.xlsx"
NORDIC44_KNOWLEDGE_GRAPH = EXAMPLE_SOURCE_GRAPHS / "Knowledge-Graph-Nordic44.xml"
UI_PATH = PACKAGE_DIRECTORY / "explorer-ui" / "neat-app" / "build"
66 changes: 47 additions & 19 deletions cognite/neat/core/data_classes/transformation_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import Dict, List, Optional, Self, Union

import pandas as pd
from graphql import GraphQLBoolean, GraphQLFloat, GraphQLInt, GraphQLString
from pydantic import BaseModel, Field, HttpUrl, ValidationError, root_validator, validator
from rdflib import XSD, Literal, Namespace, URIRef

Expand All @@ -15,20 +16,18 @@

# mapping of XSD types to Python and GraphQL types
DATA_TYPE_MAPPING = {
"boolean": {"python": "bool", "GraphQL": "Boolean"},
"float": {"python": "float", "GraphQL": "Float"},
"integer": {"python": "int", "GraphQL": "Int"},
"nonPositiveInteger": {"python": "int", "GraphQL": "Int"},
"nonNegativeInteger": {"python": "int", "GraphQL": "Int"},
"negativeInteger": {"python": "int", "GraphQL": "Int"},
"long": {"python": "int", "GraphQL": "Int"},
"string": {"python": "str", "GraphQL": "String"},
"anyURI": {"python": "str", "GraphQL": "String"},
"normalizedString": {"python": "str", "GraphQL": "String"},
"token": {"python": "str", "GraphQL": "String"},
"enumeration": {"python": "list", "GraphQL": "Enum"},
"boolean": {"python": "bool", "GraphQL": GraphQLBoolean},
"float": {"python": "float", "GraphQL": GraphQLFloat},
"integer": {"python": "int", "GraphQL": GraphQLInt},
"nonPositiveInteger": {"python": "int", "GraphQL": GraphQLInt},
"nonNegativeInteger": {"python": "int", "GraphQL": GraphQLInt},
"negativeInteger": {"python": "int", "GraphQL": GraphQLInt},
"long": {"python": "int", "GraphQL": GraphQLInt},
"string": {"python": "str", "GraphQL": GraphQLString},
"anyURI": {"python": "str", "GraphQL": GraphQLString},
"normalizedString": {"python": "str", "GraphQL": GraphQLString},
"token": {"python": "str", "GraphQL": GraphQLString},
}

METADATA_VALUE_MAX_LENGTH = 5120


Expand Down Expand Up @@ -100,8 +99,8 @@ class Property(Resource):
relationship_external_id_rule: str = Field(alias="Relationship ExternalID Rule", default=None)

# Transformation rule (domain to solution)
rule_type: RuleType = Field(alias="Rule Type")
rule: str = Field(alias="Rule")
rule_type: RuleType = Field(alias="Rule Type", default=None)
rule: str = Field(alias="Rule", default=None)
skip_rule: bool = Field(alias="Skip", default=False)

# Specialization of cdf_resource_type to allow definition of both
Expand All @@ -112,10 +111,6 @@ class Property(Resource):
def is_raw_lookup(self) -> bool:
return self.rule_type == RuleType.rawlookup

@validator("rule_type", pre=True)
def to_lowercase(cls, value):
return value.casefold()

@validator(
"max_count",
"min_count",
Expand All @@ -125,13 +120,19 @@ def to_lowercase(cls, value):
"relationship_external_id_rule",
"resource_type_property",
"skip_rule",
"rule",
"rule_type",
pre=True,
)
def replace_float_nan_with_default(cls, value, field):
if isinstance(value, float) and math.isnan(value):
return field.default
return value

@validator("rule_type", pre=True)
def to_lowercase(cls, value):
return value.casefold() if value else value

@validator("skip_rule", pre=True)
def from_string(cls, value):
if isinstance(value, str):
Expand Down Expand Up @@ -171,6 +172,13 @@ def set_property_type(cls, value, values):
else:
return "ObjectProperty"

@validator("skip_rule", pre=True, always=True)
def no_rule(cls, value, values):
if values.get("rule_type") is None:
return True
else:
return value


class Metadata(BaseModel):
prefix: str = Field(alias="shortName")
Expand Down Expand Up @@ -416,6 +424,18 @@ def get_classes_with_properties(self) -> dict[str, Property]:

return class_property_pairs

def check_data_model_definitions(self):
"""Check if data model definitions are valid."""
issues = set()
for class_, properties in self.get_classes_with_properties().items():
analyzed_properties = []
for property in properties:
if property.property_name not in analyzed_properties:
analyzed_properties.append(property.property_name)
else:
issues.add(f"Property {property.property_name} of class {class_} has been defined more than once!")
return issues

def reduce_data_model(self, desired_classes: set, skip_validation: bool = False) -> TransformationRules:
"""Reduce the data model to only include desired classes and their properties.
Expand Down Expand Up @@ -603,6 +623,14 @@ def define_relationships(self, stop_on_exception: bool = False) -> RelationshipD
relationships={},
)

def get_entity_names(self):
class_names = set()
property_names = set()
for class_, properties in self.to_dataframe().items():
class_names.add(class_)
property_names = property_names.union(set(properties.index))
return class_names.union(property_names)


class AssetClassMapping(BaseModel):
external_id: str
Expand Down
2 changes: 2 additions & 0 deletions cognite/neat/core/extractors/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from .labels import upload_labels
from .rdf_to_assets import categorize_assets, rdf2assets, upload_assets
from .rdf_to_relationships import categorize_relationships, rdf2relationships, upload_relationships
from .rules_to_graphql import rules2graphql_schema

__all__ = [
"rdf2relationships",
Expand All @@ -10,4 +11,5 @@
"categorize_relationships",
"upload_relationships",
"upload_labels",
"rules2graphql_schema",
]
171 changes: 171 additions & 0 deletions cognite/neat/core/extractors/rules_to_graphql.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
import logging
import re
import warnings

from graphql import GraphQLError, GraphQLField, GraphQLList, GraphQLNonNull, GraphQLObjectType, GraphQLSchema
from graphql import assert_name as assert_graphql_name
from graphql import print_schema

from cognite.neat.core.data_classes.transformation_rules import DATA_TYPE_MAPPING, Property, TransformationRules


def get_invalid_names(entity_names: set) -> set:
"""Returns a set of invalid entity names"""
invalid_names = set()
for entity_name in entity_names:
try:
assert_graphql_name(entity_name)
except GraphQLError:
invalid_names.add(entity_name)
return invalid_names


def repair_name(name: str, entity_type: str, fix_casing: bool = False) -> str:
"""
Repairs an entity name to conform to GraphQL naming convention
>>> repair_name("wind-speed", "property")
'windspeed'
>>> repair_name("Wind.Speed", "property", True)
'windSpeed'
>>> repair_name("windSpeed", "class", True)
'WindSpeed'
>>> repair_name("22windSpeed", "class")
'_22windSpeed'
"""

# Remove any non GraphQL compliant characters
repaired_string = re.sub(r"[^_a-zA-Z0-9/_]", "", name)

# Name must start with a letter or underscore
if repaired_string[0].isdigit():
repaired_string = f"_{repaired_string}"

if not fix_casing:
return repaired_string
# Property names must be camelCase
if entity_type == "property" and repaired_string[0].isupper():
return repaired_string[0].lower() + repaired_string[1:]
# Class names must be PascalCase
elif entity_type == "class" and repaired_string[0].islower():
return repaired_string[0].upper() + repaired_string[1:]
else:
return repaired_string


def _remove_query_type(schema_string: str) -> str:
"""Removes unnecessary Query types to conform to Cognite's GraphQL API"""
lines = schema_string.split("\n")

for _i, line in enumerate(lines):
if "}" in line:
break

return "\n".join(lines[_i + 2 :])


def _get_graphql_schema_string(schema: GraphQLSchema) -> str:
return _remove_query_type(print_schema(schema))


def rules2graphql_schema(
transformation_rules: TransformationRules,
stop_on_exception: bool = False,
fix_casing: bool = False,
) -> str:
"""Generates a GraphQL schema from an instance of TransformationRules
Parameters
----------
transformation_rules : TransformationRules
TransformationRules object
stop_on_exception : bool, optional
Stop on any exception, by default False
fix_casing : bool, optional
Whether to attempt to fix casing of entity names, by default False
Returns
-------
str
GraphQL schema string
"""
invalid_names: set = get_invalid_names(transformation_rules.get_entity_names())
data_model_issues: set = transformation_rules.check_data_model_definitions()

if invalid_names and stop_on_exception:
msg = "Entity names must only contain [_a-zA-Z0-9] characters and can start only with [_a-zA-Z]"
logging.error(f"{msg}, following entities {invalid_names} do not follow these rules!")
raise GraphQLError(f"{msg}, following entities {invalid_names} do not follow these rules!")
elif invalid_names and not stop_on_exception:
msg = "Entity names must only contain [_a-zA-Z0-9] characters and can start only with [_a-zA-Z]"
logging.warn(
f"{msg}, following entities {invalid_names} do not follow these rules! Attempting to repair names..."
)
warnings.warn(
f"{msg}, following entities {invalid_names} do not follow these rules! Attempting to repair names...",
stacklevel=2,
)

if data_model_issues and stop_on_exception:
msg = " ".join(data_model_issues)
logging.error(msg)
raise ValueError(msg)
elif data_model_issues and not stop_on_exception:
msg = " ".join(data_model_issues)
msg += " Redefinitions will be skipped!"
logging.warn(msg)
warnings.warn(
msg,
stacklevel=2,
)

def _define_fields(property_definitions: list[Property]) -> dict[str, GraphQLField]:
gql_field_definitions = {}
for property_ in property_definitions:
property_name = repair_name(property_.property_name, "property", fix_casing=fix_casing) # type: ignore

if property_name in gql_field_definitions:
logging.warn(f"Property {property_name} being redefined... skipping!")
warnings.warn(f"Property {property_name} being redefined... skipping!", stacklevel=2)
continue

# Node attribute
if property_.property_type == "DatatypeProperty":
value_type_gql = DATA_TYPE_MAPPING[property_.expected_value_type]["GraphQL"]

# Case: Mandatory, single value
if property_.min_count and property_.max_count == 1:
value = GraphQLNonNull(value_type_gql)
# Case: Mandatory, multiple value
elif property_.min_count and property_.max_count != 1:
value = GraphQLNonNull(GraphQLList(GraphQLNonNull(value_type_gql)))
# Case: Optional, single value
elif property_.max_count == 1:
value = value_type_gql
# Case: Optional, multiple value
else:
value = GraphQLList(value_type_gql)

# Node edge
else:
value = gql_type_definitions[repair_name(property_.expected_value_type, "class", fix_casing=fix_casing)]
is_one_to_many_edge = not (property_.min_count and property_.max_count == 1)
if is_one_to_many_edge:
value = GraphQLList(value)
gql_field_definitions[property_name] = GraphQLField(value)

return gql_field_definitions

gql_type_definitions: dict = {}
for class_, properties in transformation_rules.get_classes_with_properties().items():
gql_type_definitions[repair_name(class_, "class", fix_casing=fix_casing)] = GraphQLObjectType(
repair_name(class_, "class", fix_casing=fix_casing),
lambda properties=properties: _define_fields(properties),
)

# Needs this so we are able to generate the schema string
query_schema = GraphQLSchema(
query=GraphQLObjectType(
"Query", lambda: {type_name: GraphQLField(type_def) for type_name, type_def in gql_type_definitions.items()}
)
)
return _get_graphql_schema_string(query_schema)
16 changes: 14 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "cognite-neat"
version = "0.11.1"
version = "0.11.2"
readme = "README.md"
description = "Knowledge graph transformation"
authors = ["Nikola Vasiljevic <nikola.vasiljevic@cognite.com>",
Expand Down Expand Up @@ -54,6 +54,7 @@ fastapi = "^0.95"
schedule = "^1"
python-multipart = "^0.0.6"
oxrdflib = {version = "^0.3.3", extras = ["oxigraph"]}
graphql-core = "^3.2.3"

[tool.poetry.dev-dependencies]
twine = "*"
Expand Down
Loading

0 comments on commit 6ff6df3

Please sign in to comment.