Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Neat 347 extend depth transformer with typing #538

Merged
merged 8 commits into from
Jul 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
.PHONY: run-explorer run-tests run-linters build-ui build-python build-docker run-docker compose-up
version="0.85.9"
version="0.85.10"
run-explorer:
@echo "Running explorer API server..."
# open "http://localhost:8000/static/index.html" || true
Expand Down
2 changes: 1 addition & 1 deletion cognite/neat/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.85.9"
__version__ = "0.85.10"
29 changes: 27 additions & 2 deletions cognite/neat/graph/queries/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import Literal, cast, overload

from rdflib import RDF, Graph, URIRef
from rdflib import Literal as RdfLiteral
from rdflib.query import ResultRow

from cognite.neat.rules.models.entities import ClassEntity
Expand All @@ -18,6 +19,24 @@ def __init__(self, graph: Graph, rules: InformationRules | None = None):
self.graph = graph
self.rules = rules

def summarize_instances(self) -> list[tuple]:
"""Summarize instances in the graph store by class and count"""

query_statement = """ SELECT ?class (COUNT(?instance) AS ?instanceCount)
WHERE {
?instance a ?class .
}
GROUP BY ?class
ORDER BY DESC(?instanceCount) """

return [
(
remove_namespace_from_uri(cast(URIRef, cast(tuple, res)[0])),
cast(RdfLiteral, cast(tuple, res)[1]).value,
)
for res in list(self.graph.query(query_statement))
]

def list_instances_ids_of_class(self, class_uri: URIRef, limit: int = -1) -> list[URIRef]:
"""Get instances ids for a given class

Expand Down Expand Up @@ -68,7 +87,10 @@ def triples_of_type_instances(self, rdf_type: str) -> list[tuple[str, str, str]]
# We cannot include the RDF.type in case there is a neat:type property
return [remove_namespace_from_uri(*triple) for triple in result if triple[1] != RDF.type] # type: ignore[misc, index]
else:
warnings.warn("No rules found for the graph store, returning empty list.", stacklevel=2)
warnings.warn(
"No rules found for the graph store, returning empty list.",
stacklevel=2,
)
return []

def construct_instances_of_class(self, class_: str, properties_optional: bool = True) -> list[tuple[str, str, str]]:
Expand All @@ -95,7 +117,10 @@ def construct_instances_of_class(self, class_: str, properties_optional: bool =
# We cannot include the RDF.type in case there is a neat:type property
return [remove_namespace_from_uri(*triple) for triple in result if triple[1] != RDF.type] # type: ignore[misc, index]
else:
warnings.warn("No rules found for the graph store, returning empty list.", stacklevel=2)
warnings.warn(
"No rules found for the graph store, returning empty list.",
stacklevel=2,
)
return []

def list_triples(self, limit: int = 25) -> list[ResultRow]:
Expand Down
12 changes: 12 additions & 0 deletions cognite/neat/graph/stores/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from pathlib import Path
from typing import cast

import pandas as pd
from rdflib import Graph, Namespace, URIRef
from rdflib.plugins.stores.sparqlstore import SPARQLUpdateStore

Expand Down Expand Up @@ -283,12 +284,23 @@ def transform(self, transformer: Transformers) -> None:
)
)

@property
def summary(self) -> pd.DataFrame:
return pd.DataFrame(self.queries.summarize_instances(), columns=["Type", "Occurrence"])

def _repr_html_(self) -> str:
provenance = self.provenance._repr_html_()
summary: pd.DataFrame = self.summary
summary_text = (
"<br /><strong>Graph is empty</strong><br />"
if summary.empty
else f"<br /><strong>Graph content</strong><br />{cast(pd.DataFrame, summary)._repr_html_()}" # type: ignore[operator]
)

return (
f"<strong>{type(self).__name__}</strong> A graph store is a container for storing triples. "
"It can be queried and transformed to extract information.<br />"
"<strong>Provenance</strong> Provenance is a record of changes that have occurred in the graph store.<br />"
f"{provenance}"
f"{summary_text}"
)
65 changes: 56 additions & 9 deletions cognite/neat/graph/transformers/_classic_cdf.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import cast

from rdflib import Graph, Literal, URIRef
from rdflib import RDF, Graph, Literal, URIRef

from cognite.neat.constants import DEFAULT_NAMESPACE
from cognite.neat.graph import extractors
Expand All @@ -21,11 +21,16 @@ class AddAssetDepth(BaseTransformer):
<{asset_id}> <{root_prop}> ?root .}}"""

def __init__(
self, asset_type: URIRef | None = None, root_prop: URIRef | None = None, parent_prop: URIRef | None = None
self,
asset_type: URIRef | None = None,
root_prop: URIRef | None = None,
parent_prop: URIRef | None = None,
depth_typing: dict[int, str] | None = None,
):
self.asset_type = asset_type or DEFAULT_NAMESPACE.Asset
self.root_prop = root_prop or DEFAULT_NAMESPACE.root
self.parent_prop = parent_prop or DEFAULT_NAMESPACE.parent
self.depth_typing = depth_typing

def transform(self, graph: Graph) -> None:
"""Adds depth of asset in the asset hierarchy to the graph."""
Expand All @@ -34,6 +39,13 @@ def transform(self, graph: Graph) -> None:
if depth := self.get_depth(graph, asset_id, self.root_prop, self.parent_prop):
graph.add((asset_id, DEFAULT_NAMESPACE.depth, Literal(depth)))

if self.depth_typing and (type_ := self.depth_typing.get(depth, None)):
# remove existing type
graph.remove((asset_id, RDF.type, None))

# add new type
graph.add((asset_id, RDF.type, DEFAULT_NAMESPACE[type_]))

@classmethod
def get_depth(
cls,
Expand Down Expand Up @@ -62,7 +74,12 @@ def get_depth(
class AssetTimeSeriesConnector(BaseTransformer):
description: str = "Connects assets to timeseries, thus forming bi-directional connection"
_use_only_once: bool = True
_need_changes = frozenset({str(extractors.AssetsExtractor.__name__), str(extractors.TimeSeriesExtractor.__name__)})
_need_changes = frozenset(
{
str(extractors.AssetsExtractor.__name__),
str(extractors.TimeSeriesExtractor.__name__),
}
)
_asset_template: str = """SELECT ?asset_id WHERE {{
<{timeseries_id}> <{asset_prop}> ?asset_id .
?asset_id a <{asset_type}>}}"""
Expand Down Expand Up @@ -100,7 +117,12 @@ def transform(self, graph: Graph) -> None:
class AssetSequenceConnector(BaseTransformer):
description: str = "Connects assets to sequences, thus forming bi-directional connection"
_use_only_once: bool = True
_need_changes = frozenset({str(extractors.AssetsExtractor.__name__), str(extractors.SequencesExtractor.__name__)})
_need_changes = frozenset(
{
str(extractors.AssetsExtractor.__name__),
str(extractors.SequencesExtractor.__name__),
}
)
_asset_template: str = """SELECT ?asset_id WHERE {{
<{sequence_id}> <{asset_prop}> ?asset_id .
?asset_id a <{asset_type}>}}"""
Expand Down Expand Up @@ -138,7 +160,12 @@ def transform(self, graph: Graph) -> None:
class AssetFileConnector(BaseTransformer):
description: str = "Connects assets to files, thus forming bi-directional connection"
_use_only_once: bool = True
_need_changes = frozenset({str(extractors.AssetsExtractor.__name__), str(extractors.FilesExtractor.__name__)})
_need_changes = frozenset(
{
str(extractors.AssetsExtractor.__name__),
str(extractors.FilesExtractor.__name__),
}
)
_asset_template: str = """SELECT ?asset_id WHERE {{
<{file_id}> <{asset_prop}> ?asset_id .
?asset_id a <{asset_type}>}}"""
Expand Down Expand Up @@ -174,7 +201,12 @@ def transform(self, graph: Graph) -> None:
class AssetEventConnector(BaseTransformer):
description: str = "Connects assets to events, thus forming bi-directional connection"
_use_only_once: bool = True
_need_changes = frozenset({str(extractors.AssetsExtractor.__name__), str(extractors.EventsExtractor.__name__)})
_need_changes = frozenset(
{
str(extractors.AssetsExtractor.__name__),
str(extractors.EventsExtractor.__name__),
}
)
_asset_template: str = """SELECT ?asset_id WHERE {{
<{event_id}> <{asset_prop}> ?asset_id .
?asset_id a <{asset_type}>}}"""
Expand Down Expand Up @@ -211,7 +243,10 @@ class AssetRelationshipConnector(BaseTransformer):
description: str = "Connects assets via relationships"
_use_only_once: bool = True
_need_changes = frozenset(
{str(extractors.AssetsExtractor.__name__), str(extractors.RelationshipsExtractor.__name__)}
{
str(extractors.AssetsExtractor.__name__),
str(extractors.RelationshipsExtractor.__name__),
}
)
_asset_template: str = """SELECT ?source ?target WHERE {{
<{relationship_id}> <{relationship_source_xid_prop}> ?source_xid .
Expand Down Expand Up @@ -256,8 +291,20 @@ def transform(self, graph: Graph) -> None:
# files can be connected to multiple assets in the graph
for source_asset_id, target_asset_id in cast(list[tuple], assets_id_res):
# create a relationship between the two assets
graph.add((source_asset_id, DEFAULT_NAMESPACE.relationship, relationship_id))
graph.add((target_asset_id, DEFAULT_NAMESPACE.relationship, relationship_id))
graph.add(
(
source_asset_id,
DEFAULT_NAMESPACE.relationship,
relationship_id,
)
)
graph.add(
(
target_asset_id,
DEFAULT_NAMESPACE.relationship,
relationship_id,
)
)

# add source and target to the relationship
graph.add((relationship_id, DEFAULT_NAMESPACE.source, source_asset_id))
Expand Down
5 changes: 5 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ Changes are grouped as follows:
- `Fixed` for any bug fixes.
- `Security` in case of vulnerabilities.

## [0.85.10] - 10-07-24
### Added
- Depth-based typing in `AddAssetDepth` transformer
- Graph summary repr

## [0.85.9] - 09-07-24
### Added
- Option for checking for JSON value type when inferring data model
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "cognite-neat"
version = "0.85.9"
version = "0.85.10"
readme = "README.md"
description = "Knowledge graph transformation"
authors = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from tests.config import CLASSIC_CDF_EXTRACTOR_DATA


def test_asset_depth_transformer():
def test_asset_depth_transformer_without_typing():
store = NeatGraphStore.from_memory_store()
extractor = extractors.AssetsExtractor.from_file(CLASSIC_CDF_EXTRACTOR_DATA / "assets.yaml")
store.write(extractor)
Expand All @@ -24,6 +24,30 @@ def test_asset_depth_transformer():
)


def test_asset_depth_transformer_with_typing():
store = NeatGraphStore.from_memory_store()
extractor = extractors.AssetsExtractor.from_file(CLASSIC_CDF_EXTRACTOR_DATA / "assets.yaml")
store.write(extractor)

transformer = transformers.AddAssetDepth(
depth_typing={
1: "RootCimNode",
2: "GeographicalRegion",
3: "SubGeographicalRegion",
4: "Substation",
}
)

store.transform(transformer)

assert set(store.queries.summarize_instances()) == {
("GeographicalRegion", 1),
("RootCimNode", 1),
("SubGeographicalRegion", 1),
("Substation", 1),
}


def test_asset_depth_transformer_warning():
store = NeatGraphStore.from_memory_store()

Expand All @@ -38,5 +62,8 @@ def test_asset_depth_transformer_warning():
store.write(extractor)
store.transform(transformer)

with pytest.warns(UserWarning, match="Cannot transform graph store with AddAssetDepth, already applied"):
with pytest.warns(
UserWarning,
match="Cannot transform graph store with AddAssetDepth, already applied",
):
store.transform(transformer)
Loading