Skip to content

Commit

Permalink
Neat 317 transformer asset relationship connector (#506)
Browse files Browse the repository at this point in the history
* added AssetRelationshipConnector

* fix failing test

* added tests

* bump version added changelog

* removing uncessary ifs
  • Loading branch information
nikokaoja committed Jun 19, 2024
1 parent 97d64a1 commit a07773a
Show file tree
Hide file tree
Showing 11 changed files with 238 additions and 105 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
.PHONY: run-explorer run-tests run-linters build-ui build-python build-docker run-docker compose-up
version="0.81.10"
version="0.81.11"
run-explorer:
@echo "Running explorer API server..."
# open "http://localhost:8000/static/index.html" || true
Expand Down
2 changes: 1 addition & 1 deletion cognite/neat/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.81.10"
__version__ = "0.81.11"
7 changes: 4 additions & 3 deletions cognite/neat/graph/extractors/_classic_cdf/_assets.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from datetime import datetime, timezone
from pathlib import Path
from typing import cast
from urllib.parse import quote

from cognite.client import CogniteClient
from cognite.client.data_classes import Asset, AssetList
Expand All @@ -11,7 +10,7 @@
from cognite.neat.constants import DEFAULT_NAMESPACE
from cognite.neat.graph.extractors._base import BaseExtractor
from cognite.neat.graph.models import Triple
from cognite.neat.utils.utils import string_to_ideal_type
from cognite.neat.utils.utils import create_sha256_hash, string_to_ideal_type


class AssetsExtractor(BaseExtractor):
Expand Down Expand Up @@ -86,7 +85,9 @@ def _asset2triples(cls, asset: Asset, namespace: Namespace) -> list[Triple]:
for label in asset.labels:
# external_id can create ill-formed URIs, so we create websafe URIs
# since labels do not have internal ids, we use the external_id as the id
triples.append((id_, namespace.label, namespace[f"Label_{quote(label.dump()['externalId'])}"]))
triples.append(
(id_, namespace.label, namespace[f"Label_{create_sha256_hash(label.dump()['externalId'])}"])
)

if asset.metadata:
for key, value in asset.metadata.items():
Expand Down
40 changes: 22 additions & 18 deletions cognite/neat/graph/extractors/_classic_cdf/_labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from datetime import datetime, timezone
from pathlib import Path
from typing import cast
from urllib.parse import quote

from cognite.client import CogniteClient
from cognite.client.data_classes import LabelDefinition, LabelDefinitionList
Expand All @@ -11,6 +10,7 @@
from cognite.neat.constants import DEFAULT_NAMESPACE
from cognite.neat.graph.extractors._base import BaseExtractor
from cognite.neat.graph.models import Triple
from cognite.neat.utils.utils import create_sha256_hash


class LabelsExtractor(BaseExtractor):
Expand Down Expand Up @@ -44,28 +44,32 @@ def extract(self) -> Iterable[Triple]:

@classmethod
def _labels2triples(cls, label: LabelDefinition, namespace: Namespace) -> list[Triple]:
id_ = namespace[f"Label_{quote(label.dump()['externalId'])}"]

# Set rdf type
triples: list[Triple] = [(id_, RDF.type, namespace.Label)]
if label.external_id:
id_ = namespace[f"Label_{create_sha256_hash(label.external_id)}"]

# Create attributes
# Set rdf type
triples: list[Triple] = [(id_, RDF.type, namespace.Label)]

if label.external_id:
# Create attributes
triples.append((id_, namespace.external_id, Literal(label.external_id)))

if label.name:
triples.append((id_, namespace.name, Literal(label.name)))
if label.name:
triples.append((id_, namespace.name, Literal(label.name)))

if label.description:
triples.append((id_, namespace.description, Literal(label.description)))
if label.description:
triples.append((id_, namespace.description, Literal(label.description)))

if label.created_time:
triples.append(
(id_, namespace.created_time, Literal(datetime.fromtimestamp(label.created_time / 1000, timezone.utc)))
)
if label.created_time:
triples.append(
(
id_,
namespace.created_time,
Literal(datetime.fromtimestamp(label.created_time / 1000, timezone.utc)),
)
)

if label.data_set_id:
triples.append((id_, namespace.data_set_id, namespace[f"Dataset_{label.data_set_id}"]))
if label.data_set_id:
triples.append((id_, namespace.data_set_id, namespace[f"Dataset_{label.data_set_id}"]))

return triples
return triples
return []
140 changes: 70 additions & 70 deletions cognite/neat/graph/extractors/_classic_cdf/_relationships.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import uuid
from collections.abc import Iterable
from datetime import datetime, timezone
from pathlib import Path
Expand All @@ -12,6 +11,7 @@
from cognite.neat.constants import DEFAULT_NAMESPACE
from cognite.neat.graph.extractors._base import BaseExtractor
from cognite.neat.graph.models import Triple
from cognite.neat.utils.utils import create_sha256_hash


class RelationshipsExtractor(BaseExtractor):
Expand Down Expand Up @@ -47,36 +47,36 @@ def extract(self) -> Iterable[Triple]:
def _relationship2triples(cls, relationship: Relationship, namespace: Namespace) -> list[Triple]:
"""Converts an asset to triples."""

# relationships do not have an internal id, so we generate one
id_ = namespace[f"Relationship_{uuid.uuid4()}"]

# Set rdf type
triples: list[Triple] = [(id_, RDF.type, namespace["Relationship"])]

# Set source and target types
if source_type := relationship.source_type:
triples.append(
(
id_,
namespace.source_type,
namespace[source_type.title()],
if relationship.external_id and relationship.source_external_id and relationship.target_external_id:
# relationships do not have an internal id, so we generate one
id_ = namespace[f"Relationship_{create_sha256_hash(relationship.external_id)}"]

# Set rdf type
triples: list[Triple] = [(id_, RDF.type, namespace["Relationship"])]

# Set source and target types
if source_type := relationship.source_type:
triples.append(
(
id_,
namespace.source_type,
namespace[source_type.title()],
)
)
)

if target_type := relationship.target_type:
triples.append(
(
id_,
namespace.target_type,
namespace[target_type.title()],
if target_type := relationship.target_type:
triples.append(
(
id_,
namespace.target_type,
namespace[target_type.title()],
)
)
)

# Create attributes
if relationship.external_id:
# Create attributes

triples.append((id_, namespace.external_id, Literal(relationship.external_id)))

if relationship.source_external_id:
triples.append(
(
id_,
Expand All @@ -85,7 +85,6 @@ def _relationship2triples(cls, relationship: Relationship, namespace: Namespace)
)
)

if relationship.target_external_id:
triples.append(
(
id_,
Expand All @@ -94,59 +93,60 @@ def _relationship2triples(cls, relationship: Relationship, namespace: Namespace)
)
)

if relationship.start_time:
triples.append(
(
id_,
namespace.start_time,
Literal(datetime.fromtimestamp(relationship.start_time / 1000, timezone.utc)),
if relationship.start_time:
triples.append(
(
id_,
namespace.start_time,
Literal(datetime.fromtimestamp(relationship.start_time / 1000, timezone.utc)),
)
)
)

if relationship.end_time:
triples.append(
(
id_,
namespace.end_time,
Literal(datetime.fromtimestamp(relationship.end_time / 1000, timezone.utc)),
if relationship.end_time:
triples.append(
(
id_,
namespace.end_time,
Literal(datetime.fromtimestamp(relationship.end_time / 1000, timezone.utc)),
)
)
)

if relationship.created_time:
triples.append(
(
id_,
namespace.created_time,
Literal(datetime.fromtimestamp(relationship.created_time / 1000, timezone.utc)),
if relationship.created_time:
triples.append(
(
id_,
namespace.created_time,
Literal(datetime.fromtimestamp(relationship.created_time / 1000, timezone.utc)),
)
)
)

if relationship.last_updated_time:
triples.append(
(
id_,
namespace.last_updated_time,
Literal(datetime.fromtimestamp(relationship.last_updated_time / 1000, timezone.utc)),
if relationship.last_updated_time:
triples.append(
(
id_,
namespace.last_updated_time,
Literal(datetime.fromtimestamp(relationship.last_updated_time / 1000, timezone.utc)),
)
)
)

if relationship.confidence:
triples.append(
(
id_,
namespace.confidence,
Literal(relationship.confidence),
if relationship.confidence:
triples.append(
(
id_,
namespace.confidence,
Literal(relationship.confidence),
)
)
)

if relationship.labels:
for label in relationship.labels:
# external_id can create ill-formed URIs, so we create websafe URIs
# since labels do not have internal ids, we use the external_id as the id
triples.append((id_, namespace.label, namespace[f"Label_{quote(label.dump()['externalId'])}"]))
if relationship.labels:
for label in relationship.labels:
# external_id can create ill-formed URIs, so we create websafe URIs
# since labels do not have internal ids, we use the external_id as the id
triples.append((id_, namespace.label, namespace[f"Label_{quote(label.dump()['externalId'])}"]))

# Create connection
if relationship.data_set_id:
triples.append((id_, namespace.dataset, namespace[f"Dataset_{relationship.data_set_id}"]))
# Create connection
if relationship.data_set_id:
triples.append((id_, namespace.dataset, namespace[f"Dataset_{relationship.data_set_id}"]))

return triples
return triples
return []
9 changes: 8 additions & 1 deletion cognite/neat/graph/transformers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
AddAssetDepth,
AssetEventConnector,
AssetFileConnector,
AssetRelationshipConnector,
AssetSequenceConnector,
AssetTimeSeriesConnector,
)
Expand All @@ -12,8 +13,14 @@
"AssetSequenceConnector",
"AssetFileConnector",
"AssetEventConnector",
"AssetRelationshipConnector",
]

Transformers = (
AddAssetDepth | AssetTimeSeriesConnector | AssetSequenceConnector | AssetFileConnector | AssetEventConnector
AddAssetDepth
| AssetTimeSeriesConnector
| AssetSequenceConnector
| AssetFileConnector
| AssetEventConnector
| AssetRelationshipConnector
)
61 changes: 61 additions & 0 deletions cognite/neat/graph/transformers/_classic_cdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,3 +205,64 @@ def transform(self, graph: Graph) -> None:
# files can be connected to multiple assets in the graph
for (asset_id,) in cast(list[tuple], assets_id_res):
graph.add((asset_id, DEFAULT_NAMESPACE.event, event_id))


class AssetRelationshipConnector(BaseTransformer):
description: str = "Connects assets via relationships"
_use_only_once: bool = True
_need_changes = frozenset(
{str(extractors.AssetsExtractor.__name__), str(extractors.RelationshipsExtractor.__name__)}
)
_asset_template: str = """SELECT ?source ?target WHERE {{
<{relationship_id}> <{relationship_source_xid_prop}> ?source_xid .
?source <{asset_xid_property}> ?source_xid .
?source a <{asset_type}> .
<{relationship_id}> <{relationship_target_xid_prop}> ?target_xid .
?target <{asset_xid_property}> ?target_xid .
?target a <{asset_type}> .}}"""

def __init__(
self,
asset_type: URIRef | None = None,
relationship_type: URIRef | None = None,
relationship_source_xid_prop: URIRef | None = None,
relationship_target_xid_prop: URIRef | None = None,
asset_xid_property: URIRef | None = None,
):
self.asset_type = asset_type or DEFAULT_NAMESPACE.Asset
self.relationship_type = relationship_type or DEFAULT_NAMESPACE.Relationship
self.relationship_source_xid_prop = relationship_source_xid_prop or DEFAULT_NAMESPACE.source_external_id
self.relationship_target_xid_prop = relationship_target_xid_prop or DEFAULT_NAMESPACE.target_external_id
self.asset_xid_property = asset_xid_property or DEFAULT_NAMESPACE.external_id

def transform(self, graph: Graph) -> None:
for relationship_id_result in graph.query(
f"SELECT DISTINCT ?relationship_id WHERE {{?relationship_id a <{self.relationship_type}>}}"
):
relationship_id: URIRef = cast(tuple, relationship_id_result)[0]

if assets_id_res := list(
graph.query(
self._asset_template.format(
relationship_id=relationship_id,
asset_xid_property=self.asset_xid_property,
relationship_source_xid_prop=self.relationship_source_xid_prop,
relationship_target_xid_prop=self.relationship_target_xid_prop,
asset_type=self.asset_type,
)
)
):
# files can be connected to multiple assets in the graph
for source_asset_id, target_asset_id in cast(list[tuple], assets_id_res):
# create a relationship between the two assets
graph.add((source_asset_id, DEFAULT_NAMESPACE.relationship, relationship_id))
graph.add((target_asset_id, DEFAULT_NAMESPACE.relationship, relationship_id))

# add source and target to the relationship
graph.add((relationship_id, DEFAULT_NAMESPACE.source, source_asset_id))
graph.add((relationship_id, DEFAULT_NAMESPACE.target, target_asset_id))

# remove properties that are not needed, specifically the external ids
graph.remove((relationship_id, self.relationship_source_xid_prop, None))
graph.remove((relationship_id, self.relationship_target_xid_prop, None))
7 changes: 7 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@ Changes are grouped as follows:
- `Security` in case of vulnerabilities.


## [0.81.11] - 19-06-24
### Added
- `AssetRelationshipConnector` transformer added
### Improved
- Handling of ids for labels and relationships


## [0.81.10] - 19-06-24
### Added
- `AssetEventConnector` transformer added
Expand Down
Loading

0 comments on commit a07773a

Please sign in to comment.