Skip to content

Commit

Permalink
Neat 239 a has data filter can reference at most 10 views and or cont…
Browse files Browse the repository at this point in the history
…ainers (#445)

[0.76.3] - 10-05-24
Added
Added schema validator for performance, specifically if views map to too many containers.
  • Loading branch information
nikokaoja committed May 10, 2024
1 parent cb6500f commit c014821
Show file tree
Hide file tree
Showing 10 changed files with 213 additions and 4 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
.PHONY: run-explorer run-tests run-linters build-ui build-python build-docker run-docker compose-up

version="0.76.2"
version="0.76.3"
run-explorer:
@echo "Running explorer API server..."
# open "http://localhost:8000/static/index.html" || true
Expand Down
2 changes: 1 addition & 1 deletion cognite/neat/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.76.2"
__version__ = "0.76.3"
65 changes: 65 additions & 0 deletions cognite/neat/rules/issues/dms.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
__all__ = [
"DMSSchemaError",
"DMSSchemaWarning",
"IncompleteSchemaError",
"MissingSpaceError",
"MissingContainerError",
"MissingContainerPropertyError",
Expand All @@ -19,12 +20,14 @@
"DirectRelationMissingSourceWarning",
"ViewModelVersionNotMatchingWarning",
"ViewModelSpaceNotMatchingWarning",
"ViewMapsToTooManyContainersWarning",
"DuplicatedViewInDataModelError",
"ContainerPropertyUsedMultipleTimesError",
"EmptyContainerWarning",
"UnsupportedConnectionWarning",
"MultipleReferenceWarning",
"HasDataFilterOnNoPropertiesViewWarning",
"HasDataFilterAppliedToTooManyContainersWarning",
"ReverseRelationMissingOtherSideWarning",
"NodeTypeFilterOnParentViewWarning",
"ChangingContainerError",
Expand All @@ -40,6 +43,24 @@ class DMSSchemaError(NeatValidationError, ABC): ...
class DMSSchemaWarning(ValidationWarning, ABC): ...


@dataclass(frozen=True)
class IncompleteSchemaError(DMSSchemaError):
description = "This error is raised when the schema is claimed to be complete but missing some components"
fix = "Either provide the missing components or change the schema to partial"
missing_component: dm.ContainerId | dm.ViewId

def message(self) -> str:
return (
"The data model schema is set to be complete, however, "
f"the referred component {self.missing_component} is not preset."
)

def dump(self) -> dict[str, Any]:
output = super().dump()
output["missing_component"] = self.missing_component
return output


@dataclass(frozen=True)
class MissingSpaceError(DMSSchemaError):
description = "The spaced referred to by the Container/View/Node/Edge/DataModel does not exist"
Expand Down Expand Up @@ -250,6 +271,28 @@ def dump(self) -> dict[str, Any]:
return output


@dataclass(frozen=True)
class ViewMapsToTooManyContainersWarning(DMSSchemaWarning):
description = "The view maps to more than 10 containers which impacts read/write performance of data model"
fix = "Try to have as few containers as possible to which the view maps to"
error_name: ClassVar[str] = "ViewMapsToTooManyContainers"
view_id: dm.ViewId
container_ids: set[dm.ContainerId]

def message(self) -> str:
return (
f"The view {self.view_id} maps to total of {len(self.container_ids)},."
"Mapping to more than 10 containers is not recommended and can lead to poor performances."
"Re-iterate the data model design to reduce the number of containers to which the view maps to."
)

def dump(self) -> dict[str, Any]:
output = super().dump()
output["view_id"] = self.view_id.dump()
output["container_ids"] = [container_id.dump() for container_id in self.container_ids]
return output


@dataclass(frozen=True)
class ContainerPropertyUsedMultipleTimesError(DMSSchemaError):
description = "The container property is used multiple times by the same view property"
Expand Down Expand Up @@ -442,6 +485,28 @@ def dump(self) -> dict[str, Any]:
return output


@dataclass(frozen=True)
class HasDataFilterAppliedToTooManyContainersWarning(DMSSchemaWarning):
description = "The view filter hasData applied to more than 10 containers this will cause DMS API Error"
fix = "Do not map to more than 10 containers, alternatively override the filter by using rawFilter"
error_name: ClassVar[str] = "HasDataFilterAppliedToTooManyContainers"
view_id: dm.ViewId
container_ids: set[dm.ContainerId]

def message(self) -> str:
return (
f"The view {self.view_id} HasData filter applied to total of {len(self.container_ids)},."
"Applying HasData filter to more than 10 containers is not recommended and can lead to DMS API error."
"Re-iterate the data model design to reduce the number of containers to which the view maps to."
)

def dump(self) -> dict[str, Any]:
output = super().dump()
output["view_id"] = self.view_id.dump()
output["container_ids"] = [container_id.dump() for container_id in self.container_ids]
return output


@dataclass(frozen=True)
class NodeTypeFilterOnParentViewWarning(DMSSchemaWarning):
description = (
Expand Down
26 changes: 26 additions & 0 deletions cognite/neat/rules/models/dms/_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
DirectRelationMissingSourceWarning,
DMSSchemaError,
DuplicatedViewInDataModelError,
IncompleteSchemaError,
MissingContainerError,
MissingContainerPropertyError,
MissingEdgeViewError,
Expand All @@ -32,6 +33,7 @@
from cognite.neat.utils.cdf_loaders import ViewLoader
from cognite.neat.utils.cdf_loaders.data_classes import RawTableWrite, RawTableWriteList
from cognite.neat.utils.text import to_camel
from cognite.neat.utils.utils import get_inheritance_path

if sys.version_info >= (3, 11):
from typing import Self
Expand Down Expand Up @@ -60,6 +62,30 @@ class DMSSchema:
"node": "node_types",
}

def _get_mapped_container_from_view(self, view_id: dm.ViewId) -> set[dm.ContainerId]:
# index all views, including ones from reference
indexed_views = {
**{view.as_id(): view for view in self.views},
**({view.as_id(): view for view in self.reference.views} if self.reference else {}),
}

if view_id not in indexed_views:
raise ValueError(f"View {view_id} not found")

indexed_implemented_views = {id_: view.implements for id_, view in indexed_views.items()}
view_inheritance = get_inheritance_path(view_id, indexed_implemented_views)

directly_referenced_containers = indexed_views[view_id].referenced_containers()
inherited_referenced_containers = set()

for view_id in view_inheritance:
if implemented_view := indexed_views.get(view_id):
inherited_referenced_containers |= implemented_view.referenced_containers()
else:
raise IncompleteSchemaError(missing_component=view_id).as_exception()

return directly_referenced_containers | inherited_referenced_containers

@classmethod
def from_model_id(cls, client: CogniteClient, data_model_id: dm.DataModelIdentifier) -> "DMSSchema":
data_models = client.data_modeling.data_models.retrieve(data_model_id, inline_views=True)
Expand Down
33 changes: 33 additions & 0 deletions cognite/neat/rules/models/dms/_validation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from collections import defaultdict
from typing import Any

from cognite.client import data_modeling as dm

from cognite.neat.rules import issues
from cognite.neat.rules.issues import IssueList
from cognite.neat.rules.models._base import ExtensionCategory, SchemaCompleteness
Expand All @@ -27,6 +29,7 @@ def validate(self) -> IssueList:
self._referenced_views_and_containers_are_existing()
self._validate_extension()
self._validate_schema()
self._validate_performance()
return self.issue_list

def _consistent_container_properties(self) -> None:
Expand Down Expand Up @@ -208,6 +211,36 @@ def _validate_extension(self) -> None:
)
)

def _validate_performance(self) -> None:
# we can only validate performance on complete schemas due to the need
# to access all the container mappings
if self.metadata.schema_ is not SchemaCompleteness.complete:
return None

dms_schema = self.rules.as_schema()

for view in dms_schema.views:
mapped_containers = dms_schema._get_mapped_container_from_view(view.as_id())

if mapped_containers and len(mapped_containers) > 10:
self.issue_list.append(
issues.dms.ViewMapsToTooManyContainersWarning(
view_id=view.as_id(),
container_ids=mapped_containers,
)
)
if (
view.filter
and isinstance(view.filter, dm.filters.HasData)
and len(view.filter.dump()["hasData"]) > 10
):
self.issue_list.append(
issues.dms.HasDataFilterAppliedToTooManyContainersWarning(
view_id=view.as_id(),
container_ids=mapped_containers,
)
)

@staticmethod
def _changed_attributes_and_properties(
new_dumped: dict[str, Any], existing_dumped: dict[str, Any]
Expand Down
5 changes: 5 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ Changes are grouped as follows:
- [BREAKING] As a result of the above, in the `ExcelExporter` the parameter `is_reference` is replaced by `dump_as`.
To continue using the old behavior, set `dump_as='reference'`.

## [0.76.3] - 10-05-24
### Added
- Added schema validator for performance, specifically if views map to too many containers.


## [0.76.2] - 06-05-24
### Fixed
- Added missing "Is Reference" parameter back to the `ExcelExporter`step.
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "cognite-neat"
version = "0.76.2"
version = "0.76.3"
readme = "README.md"
description = "Knowledge graph transformation"
authors = [
Expand Down
Binary file not shown.
45 changes: 44 additions & 1 deletion tests/tests_unit/rules/test_importers/test_excel_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,46 @@ def invalid_rules_filepaths():
),
id="Missing container and view definition",
)
yield pytest.param(
EXCEL_IMPORTER_DATA / "too_many_containers_per_view.xlsx",
IssueList(
[
cognite.neat.rules.issues.dms.ViewMapsToTooManyContainersWarning(
view_id=ViewId(space="neat", external_id="Asset", version="1"),
container_ids={
ContainerId(space="neat", external_id="Asset1"),
ContainerId(space="neat", external_id="Asset2"),
ContainerId(space="neat", external_id="Asset3"),
ContainerId(space="neat", external_id="Asset4"),
ContainerId(space="neat", external_id="Asset5"),
ContainerId(space="neat", external_id="Asset6"),
ContainerId(space="neat", external_id="Asset7"),
ContainerId(space="neat", external_id="Asset8"),
ContainerId(space="neat", external_id="Asset9"),
ContainerId(space="neat", external_id="Asset10"),
ContainerId(space="neat", external_id="Asset11"),
},
),
cognite.neat.rules.issues.dms.HasDataFilterAppliedToTooManyContainersWarning(
view_id=ViewId(space="neat", external_id="Asset", version="1"),
container_ids={
ContainerId(space="neat", external_id="Asset1"),
ContainerId(space="neat", external_id="Asset2"),
ContainerId(space="neat", external_id="Asset3"),
ContainerId(space="neat", external_id="Asset4"),
ContainerId(space="neat", external_id="Asset5"),
ContainerId(space="neat", external_id="Asset6"),
ContainerId(space="neat", external_id="Asset7"),
ContainerId(space="neat", external_id="Asset8"),
ContainerId(space="neat", external_id="Asset9"),
ContainerId(space="neat", external_id="Asset10"),
ContainerId(space="neat", external_id="Asset11"),
},
),
]
),
id="Too many containers per view",
)


class TestExcelImporter:
Expand Down Expand Up @@ -109,5 +149,8 @@ def test_import_invalid_rules(self, filepath: Path, expected_issues: IssueList):

_, issues = importer.to_rules(errors="continue")

issues = sorted(issues)
expected_issues = sorted(expected_issues)

assert len(issues) == len(expected_issues)
assert sorted(issues) == sorted(expected_issues)
assert issues == expected_issues
37 changes: 37 additions & 0 deletions tests/tests_unit/rules/test_models/test_dms_architect_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -1626,3 +1626,40 @@ def test_default_filters_using_olav_dms_rules(self, olav_dms_rules: DMSRules) ->
polygon.filter.dump()
== dm.filters.Equals(["node", "type"], {"space": "power", "externalId": "Polygon"}).dump()
)


def test_dms_rules_validation_error():
with pytest.raises(ValidationError) as e:
dms_rules = DMSRulesInput(
metadata=DMSMetadataInput(
schema_="complete",
space="my_space",
external_id="my_data_model",
version="1",
creator="Anders",
created="2024-03-16",
updated="2024-03-16",
),
properties=[
DMSPropertyInput(
class_="WindFarm",
property_="name",
value_type="text",
container="Asset",
container_property="name",
view="WindFarm",
view_property="name",
),
],
views=[DMSViewInput(view="WindFarm", class_="WindFarm", implements="Sourceable,Describable")],
containers=[DMSContainerInput(container="Asset", class_="Asset", constraint="Sourceable,Describable")],
)

dms_rules.as_rules()

errors = e.value.errors()

assert errors[0]["msg"] == (
"Value error, The data model schema is set to be complete, however, "
"the referred component ViewId(space='my_space', external_id='Sourceable', version='1') is not preset."
)

0 comments on commit c014821

Please sign in to comment.