Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[NEAT-236] DMS Exporter Support downloading Solution with Enterprise 🧑‍🧒 #448

Merged
merged 15 commits into from
May 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
.PHONY: run-explorer run-tests run-linters build-ui build-python build-docker run-docker compose-up

version="0.76.3"
version="0.77.0"
run-explorer:
@echo "Running explorer API server..."
# open "http://localhost:8000/static/index.html" || true
Expand Down
2 changes: 1 addition & 1 deletion cognite/neat/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.76.3"
__version__ = "0.77.0"
73 changes: 57 additions & 16 deletions cognite/neat/rules/importers/_dms2rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from cognite.client import CogniteClient
from cognite.client import data_modeling as dm
from cognite.client.data_classes.data_modeling import DataModelIdentifier
from cognite.client.data_classes.data_modeling import DataModelId, DataModelIdentifier
from cognite.client.data_classes.data_modeling.containers import BTreeIndex, InvertedIndex
from cognite.client.data_classes.data_modeling.views import (
MultiEdgeConnectionApply,
Expand Down Expand Up @@ -51,12 +51,14 @@ def __init__(
schema: DMSSchema,
read_issues: Sequence[ValidationIssue] | None = None,
metadata: DMSMetadata | None = None,
ref_metadata: DMSMetadata | None = None,
):
# Calling this root schema to distinguish it from
# * User Schema
# * Reference Schema
self.root_schema = schema
self.metadata = metadata
self.ref_metadata = ref_metadata
self.issue_list = IssueList(read_issues)
self._all_containers_by_id = {container.as_id(): container for container in schema.containers}
if self.root_schema.reference:
Expand All @@ -65,51 +67,87 @@ def __init__(
)

@classmethod
def from_data_model_id(cls, client: CogniteClient, data_model_id: DataModelIdentifier) -> "DMSImporter":
def from_data_model_id(
cls,
client: CogniteClient,
data_model_id: DataModelIdentifier,
reference_model_id: DataModelIdentifier | None = None,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that being explicit with stating reference_mode_id. Later on we can add magic of inference, but my worry is if that we might bump into data models not created by neat that might be derived from having more than one reference.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is an interesting discussion, will add it to tech debt

) -> "DMSImporter":
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With this method we only support getting the latest version. We could have mismatch that for some reason user data model was created on top of reference data model, which version is not latest.

So maybe add NOTE.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, you can specify whatever datamodel version you want.

DataModelIdentifier = tuple[str, str] | tuple[str, str, strr] | dm.DataModelingId

"""Create a DMSImporter ready to convert the given data model to rules.

Args:
client: Instantiated CogniteClient to retrieve data model.
reference_model_id: The reference data model to retrieve. This is the data model that
the given data model is built on top of, typically, an enterprise data model.
data_model_id: Data Model to retrieve.

Returns:
DMSImporter: DMSImporter instance
"""
data_models = client.data_modeling.data_models.retrieve(data_model_id, inline_views=True)
if len(data_models) == 0:
data_model_ids = [data_model_id, reference_model_id] if reference_model_id else [data_model_id]
data_models = client.data_modeling.data_models.retrieve(data_model_ids, inline_views=True)

user_models = cls._find_model_in_list(data_models, data_model_id)
if len(user_models) == 0:
return cls(DMSSchema(), [issues.importing.NoDataModelError(f"Data model {data_model_id} not found")])
data_model = data_models.latest_version()
user_model = user_models.latest_version()

if reference_model_id:
ref_models = cls._find_model_in_list(data_models, reference_model_id)
if len(ref_models) == 0:
return cls(
DMSSchema(), [issues.importing.NoDataModelError(f"Data model {reference_model_id} not found")]
)
ref_model: dm.DataModel[dm.View] | None = ref_models.latest_version()
else:
ref_model = None

try:
schema = DMSSchema.from_data_model(client, data_model)
schema = DMSSchema.from_data_model(client, user_model, ref_model)
except Exception as e:
return cls(DMSSchema(), [issues.importing.APIError(str(e))])

created = ms_to_datetime(data_model.created_time)
updated = ms_to_datetime(data_model.last_updated_time)
metadata = cls._create_metadata_from_model(user_model)
ref_metadata = cls._create_metadata_from_model(ref_model) if ref_model else None

metadata = cls._create_metadata_from_model(data_model, created, updated)
return cls(schema, [], metadata, ref_metadata)

return cls(schema, [], metadata)
@classmethod
def _find_model_in_list(
cls, data_models: dm.DataModelList[dm.View], model_id: DataModelIdentifier
) -> dm.DataModelList[dm.View]:
identifier = DataModelId.load(model_id)
return dm.DataModelList[dm.View](
[
model
for model in data_models
if (model.space, model.external_id) == (identifier.space, identifier.external_id)
]
)

@classmethod
def _create_metadata_from_model(
cls,
model: dm.DataModel[dm.View] | dm.DataModelApply,
created: datetime | None = None,
updated: datetime | None = None,
) -> DMSMetadata:
description, creator = DMSMetadata._get_description_and_creator(model.description)
now = datetime.now().replace(microsecond=0)

if isinstance(model, dm.DataModel):
created = ms_to_datetime(model.created_time)
updated = ms_to_datetime(model.last_updated_time)
else:
now = datetime.now().replace(microsecond=0)
created = now
updated = now
return DMSMetadata(
schema_=SchemaCompleteness.complete,
extension=ExtensionCategory.addition,
space=model.space,
external_id=model.external_id,
name=model.name or model.external_id,
version=model.version or "0.1.0",
updated=updated or now,
created=created or now,
updated=updated,
created=created,
creator=creator,
description=description,
)
Expand Down Expand Up @@ -160,7 +198,10 @@ def to_rules(
# Reference should always be an enterprise model.
reference = DMSRules(
**self._create_rule_components(
ref_model, ref_schema, self._create_default_metadata(ref_schema.views), DataModelType.enterprise
ref_model,
ref_schema,
self.ref_metadata or self._create_default_metadata(ref_schema.views),
DataModelType.enterprise,
)
)
schema_completeness = SchemaCompleteness.extended
Expand Down
99 changes: 84 additions & 15 deletions cognite/neat/rules/models/dms/_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,18 +96,49 @@ def from_model_id(cls, client: CogniteClient, data_model_id: dm.DataModelIdentif
return cls.from_data_model(client, data_model)

@classmethod
def from_data_model(cls, client: CogniteClient, data_model: dm.DataModel) -> "DMSSchema":
def from_data_model(
cls,
client: CogniteClient,
data_model: dm.DataModel[dm.View],
reference_model: dm.DataModel[dm.View] | None = None,
) -> "DMSSchema":
"""Create a schema from a data model.

If a reference model is provided, the schema will include a reference schema. To determine which views,
and containers to put in the reference schema, the following rule is applied:

If a view or container space is different from the data model space,
it will be included in the reference schema.*

*One exception to this rule is if a view is directly referenced by the data model. In this case, the view will
be included in the data model schema, even if the space is different.

Args:
client: The Cognite client used for retrieving components referenced by the data model.
data_model: The data model to create the schema from.
reference_model: (Optional) The reference model to include in the schema.
This is typically the Enterprise model.

Returns:
DMSSchema: The schema created from the data model.
"""
views = dm.ViewList(data_model.views)

data_model_write = data_model.as_write()
data_model_write.views = list(views.as_ids())

if reference_model:
views.extend(reference_model.views)

container_ids = views.referenced_containers()
containers = client.data_modeling.containers.retrieve(list(container_ids))
cls._append_referenced_containers(client, containers)

space_read = client.data_modeling.spaces.retrieve(data_model.space)
if space_read is None:
raise ValueError(f"Space {data_model.space} not found")
space = space_read.as_write()
data_model_write = data_model.as_write()
data_model_write.views = list(views.as_write())
space_ids = [data_model.space, reference_model.space] if reference_model else [data_model.space]
space_read = client.data_modeling.spaces.retrieve(space_ids)
if len(space_read) != len(space_ids):
raise ValueError(f"Space(s) {space_read} not found")
space_write = space_read.as_write()

view_loader = ViewLoader(client)
# We need to include parent views in the schema to make sure that the schema is valid.
Expand All @@ -122,11 +153,41 @@ def from_data_model(cls, client: CogniteClient, data_model: dm.DataModel) -> "DM
# The ViewLoader as_write method looks up parents and remove properties from them.
view_write = dm.ViewApplyList([view_loader.as_write(view) for view in views])

container_write = containers.as_write()
user_space = data_model.space
if reference_model:
user_model_view_ids = set(data_model_write.views)
ref_model_write = reference_model.as_write()
ref_model_write.views = [view.as_id() for view in reference_model.views]

ref_views = dm.ViewApplyList(
[view for view in view_write if (view.space != user_space) or (view.as_id() not in user_model_view_ids)]
)
view_write = dm.ViewApplyList(
[view for view in view_write if view.space == user_space or view.as_id() in user_model_view_ids]
)

ref_containers = dm.ContainerApplyList(
[container for container in container_write if container.space != user_space]
)
container_write = dm.ContainerApplyList(
[container for container in container_write if container.space == user_space]
)

ref_schema: DMSSchema | None = cls(
spaces=dm.SpaceApplyList([s for s in space_write if s.space != user_space]),
data_model=ref_model_write,
views=ref_views,
containers=ref_containers,
)
else:
ref_schema = None
return cls(
spaces=dm.SpaceApplyList([space]),
spaces=dm.SpaceApplyList([s for s in space_write if s.space == user_space]),
data_model=data_model_write,
views=view_write,
containers=containers.as_write(),
containers=container_write,
reference=ref_schema,
)

@classmethod
Expand Down Expand Up @@ -531,17 +592,25 @@ def _append_referenced_containers(cls, client: CogniteClient, containers: dm.Con
)
return None

def referenced_spaces(self) -> set[str]:
referenced_spaces = {container.space for container in self.containers}
referenced_spaces |= {view.space for view in self.views}
referenced_spaces |= {container.space for view in self.views for container in view.referenced_containers()}
referenced_spaces |= {parent.space for view in self.views for parent in view.implements or []}
def referenced_spaces(self, include_indirect_references: bool = True) -> set[str]:
"""Get the spaces referenced by the schema.

Args:
include_indirect_references (bool): If True, the spaces referenced by as view.implements, and
view.referenced_containers will be included in the output.
Returns:
set[str]: The spaces referenced by the schema.
"""
referenced_spaces = {view.space for view in self.views}
referenced_spaces |= {container.space for container in self.containers}
if include_indirect_references:
referenced_spaces |= {container.space for view in self.views for container in view.referenced_containers()}
referenced_spaces |= {parent.space for view in self.views for parent in view.implements or []}
referenced_spaces |= {node.space for node in self.node_types}
if self.data_model:
referenced_spaces |= {self.data_model.space}
referenced_spaces |= {view.space for view in self.data_model.views or []}
referenced_spaces |= {s.space for s in self.spaces}

return referenced_spaces


Expand Down
4 changes: 3 additions & 1 deletion docs/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ Changes are grouped as follows:
- `Fixed` for any bug fixes.
- `Security` in case of vulnerabilities.

## TBD
## [0.77.0] - 13-05-24
### Changed
- [BREAKING] The subpackage `cognite.neat.rules.models` is reorganized. All imports using this subpackage must be
updated.
Expand All @@ -24,6 +24,8 @@ Changes are grouped as follows:
- Support for exporting/importing `Last` spreadsheets in the `ExcelExporter` and `ExcelImporter`.
- [BREAKING] As a result of the above, in the `ExcelExporter` the parameter `is_reference` is replaced by `dump_as`.
To continue using the old behavior, set `dump_as='reference'`.
- In the `DMSImporter.from_data_model_id`, now supports setting `reference_model_id` to download a solution model
with a reference model.

## [0.76.3] - 10-05-24
### Added
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "cognite-neat"
version = "0.76.3"
version = "0.77.0"
readme = "README.md"
description = "Knowledge graph transformation"
authors = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,40 @@ def alice_data_model_id(alice_rules: DMSRules) -> DataModelId:
return alice_rules.metadata.as_data_model_id()


@pytest.fixture(scope="session")
def olav_rules() -> DMSRules:
filepath = DOC_RULES / "dms-analytics-olav.xlsx"

excel_importer = ExcelImporter(filepath)

return excel_importer.to_rules(errors="raise", role=RoleTypes.dms_architect)


@pytest.fixture(scope="session")
def olav_data_model_id(olav_rules: DMSRules) -> DataModelId:
return olav_rules.metadata.as_data_model_id()


class TestDMSImporter:
def test_import_from_cdf(self, cognite_client: CogniteClient, alice_data_model_id: DataModelId):
def test_import_alice_from_cdf(self, cognite_client: CogniteClient, alice_data_model_id: DataModelId):
dms_exporter = DMSImporter.from_data_model_id(cognite_client, alice_data_model_id)

rules = dms_exporter.to_rules(errors="raise", role=RoleTypes.information_architect)

assert isinstance(rules, InformationRules)

def test_import_olav_from_cdf(
self, cognite_client: CogniteClient, olav_data_model_id: DataModelId, alice_data_model_id: DataModelId
):
dms_exporter = DMSImporter.from_data_model_id(cognite_client, olav_data_model_id, alice_data_model_id)

assert dms_exporter.root_schema.referenced_spaces(include_indirect_references=False) == {
olav_data_model_id.space
}, "The direct reference should be the data model space."

rules = dms_exporter.to_rules(errors="raise", role=RoleTypes.dms_architect)

assert isinstance(rules, DMSRules)
assert rules.metadata.as_data_model_id() == olav_data_model_id
assert isinstance(rules.reference, DMSRules)
assert rules.reference.metadata.as_data_model_id() == alice_data_model_id
Loading