Introduce anomaly detection pipeline with tuned clustering

JohT · JohT · commit 8e9d7f56ebc9 · 2025-06-28T21:44:37.000+02:00
diff --git a/domains/anomaly-detection/AnomalyDetection.ipynb b/domains/anomaly-detection/AnomalyDetection.ipynb
diff --git a/domains/anomaly-detection/anomalyDetectionPipeline.sh b/domains/anomaly-detection/anomalyDetectionPipeline.sh
@@ -0,0 +1,220 @@
+#!/usr/bin/env bash
+
+# Pipeline that coordinates anomaly detection using the Graph Data Science Library of Neo4j.
+# It requires an already running Neo4j graph database with already scanned and analyzed artifacts.
+# The results will be written into the sub directory reports/anomaly-detection.
+
+# Note that "scripts/prepareAnalysis.sh" is required to run prior to this script.
+
+# Requires executeQueryFunctions.sh, projectionFunctions.sh, cleanupAfterReportGeneration.sh
+
+# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
+set -o errexit -o pipefail
+
+# Overrideable Constants (defaults also defined in sub scripts)
+REPORTS_DIRECTORY=${REPORTS_DIRECTORY:-"reports"}
+
+## Get this "scripts/reports" directory if not already set
+# Even if $BASH_SOURCE is made for Bourne-like shells it is also supported by others and therefore here the preferred solution.
+# CDPATH reduces the scope of the cd command to potentially prevent unintended directory changes.
+# This way non-standard tools like readlink aren't needed.
+ANOMALY_DETECTION_SCRIPT_DIR=${ANOMALY_DETECTION_SCRIPT_DIR:-$(CDPATH=. cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)}
+echo "anomalyDetectionPipeline: ANOMALY_DETECTION_SCRIPT_DIR=${ANOMALY_DETECTION_SCRIPT_DIR}"
+# Get the "scripts" directory by taking the path of this script and going one directory up.
+SCRIPTS_DIR=${SCRIPTS_DIR:-"${ANOMALY_DETECTION_SCRIPT_DIR}/../../scripts"} # Repository directory containing the shell scripts
+# Get the "cypher" query directory for gathering features.
+ANOMALY_DETECTION_FEATURE_CYPHER_DIR=${ANOMALY_DETECTION_FEATURE_CYPHER_DIR:-"${ANOMALY_DETECTION_SCRIPT_DIR}/features"}
+ANOMALY_DETECTION_QUERY_CYPHER_DIR=${ANOMALY_DETECTION_QUERY_CYPHER_DIR:-"${ANOMALY_DETECTION_SCRIPT_DIR}/queries"}
+
+# Function to display script usage
+usage() {
+  echo -e "${COLOR_ERROR}" >&2
+  echo "Usage: $0 [--verbose]" >&2
+  echo -e "${COLOR_DEFAULT}" >&2
+  exit 1
+}
+
+# Default values
+verboseMode="" # either "" or "--verbose"
+
+# Parse command line arguments
+while [[ $# -gt 0 ]]; do
+  key="$1"
+  value="${2}"
+
+  case ${key} in
+    --verbose)
+      verboseMode="--verbose"
+      ;;
+    *)
+      echo -e "${COLOR_ERROR}anomalyDetectionPipeline: Error: Unknown option: ${key}${COLOR_DEFAULT}" >&2
+      usage
+      ;;
+  esac
+  shift || true # ignore error when there are no more arguments
+done
+
+# Define functions to execute a cypher query from within a given file (first and only argument) like "execute_cypher"
+source "${SCRIPTS_DIR}/executeQueryFunctions.sh"
+
+# Define functions to create and delete Graph Projections like "createUndirectedDependencyProjection"
+source "${SCRIPTS_DIR}/projectionFunctions.sh"
+
+# Query or recalculate features.
+# 
+# Required Parameters:
+# - projection_name=...
+#   Name prefix for the in-memory projection name. Example: "package-anomaly-detection"
+# - projection_node_label=...
+#   Label of the nodes that will be used for the projection. Example: "Package"
+# - projection_weight_property=...
+#   Name of the node property that contains the dependency weight. Example: "weight"
+anomaly_detection_features() {
+    # Determine the Betweenness centrality (with the directed graph projection) if not already done
+    execute_cypher_queries_until_results "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-Betweenness-Exists.cypher" \
+                                         "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-Betweenness-Write.cypher" "${@}"
+    # Determine the local clustering coefficient if not already done
+    execute_cypher_queries_until_results "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-LocalClusteringCoefficient-Exists.cypher" \
+                                         "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-LocalClusteringCoefficient-Write.cypher" "${@}"
+    # Determine the page rank if not already done
+    execute_cypher_queries_until_results "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-PageRank-Exists.cypher" \
+                                         "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-PageRank-Write.cypher" "${@}"
+    # Determine the article rank if not already done
+    execute_cypher_queries_until_results "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-ArticleRank-Exists.cypher" \
+                                         "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-PageRank-Write.cypher" "${@}"
+}
+# Run queries to find anomalies in the graph.
+# 
+# Required Parameters:
+# - projection_node_label=...
+#   Label of the nodes that will be used for the projection. Example: "Package"
+anomaly_detection_queries() {
+    local nodeLabel
+    nodeLabel=$( extractQueryParameter "projection_node_label" "${@}" )
+    # Determine the article rank if not already done
+    execute_cypher "${ANOMALY_DETECTION_QUERY_CYPHER_DIR}/AnomalyDetectionPageToArticleRankDifference.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}AnomalyDetection_ArticleVsPageRankDifference.csv"
+}
+
+# Run the anomaly detection pipeline.
+# 
+# Required Parameters:
+# - projection_name=...
+#   Name prefix for the in-memory projection name. Example: "package-anomaly-detection"
+# - projection_node_label=...
+#   Label of the nodes that will be used for the projection. Example: "Package"
+# - projection_weight_property=...
+#   Name of the node property that contains the dependency weight. Example: "weight"
+anomaly_detection_pipeline() {
+    time anomaly_detection_features "${@}"
+    time anomaly_detection_queries "${@}"
+    # Get tuned Leiden communities as a reference to tune clustering
+    time "${ANOMALY_DETECTION_SCRIPT_DIR}/tunedLeidenCommunityDetection.py" "${@}" ${verboseMode}
+    # Tuned Fast Random Projection and tuned HDBSCAN clustering 
+    time "${ANOMALY_DETECTION_SCRIPT_DIR}/tunedNodeEmbeddingClustering.py" "${@}" ${verboseMode}
+    # Reduce the dimensionality of the node embeddings down to 2D for visualization using UMAP
+    time "${ANOMALY_DETECTION_SCRIPT_DIR}/umap2dNodeEmbeddings.py" "${@}" ${verboseMode}
+    
+    # Query Results: Output all collected features into a CSV file.
+    local nodeLabel
+    nodeLabel=$( extractQueryParameter "projection_node_label" "${@}" )
+    execute_cypher "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeatures.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}AnomalyDetectionFeatures.csv"
+}
+
+# Create report directory
+REPORT_NAME="anomaly-detection"
+FULL_REPORT_DIRECTORY="${REPORTS_DIRECTORY}/${REPORT_NAME}"
+mkdir -p "${FULL_REPORT_DIRECTORY}"
+
+# Query Parameter key pairs for projection and algorithm side
+PROJECTION_NAME="dependencies_projection"
+ALGORITHM_PROJECTION="projection_name"
+
+PROJECTION_NODE="dependencies_projection_node"
+ALGORITHM_NODE="projection_node_label"
+
+PROJECTION_WEIGHT="dependencies_projection_weight_property"
+ALGORITHM_WEIGHT="projection_weight_property"
+
+# Code independent algorithm parameters
+COMMUNITY_PROPERTY="community_property=communityLeidenIdTuned"
+
+# Query (or recalculate) features.
+# 
+# Required Parameters:
+# - projection_name=...
+#   Name prefix for the in-memory projection name. Example: "package-anomaly-detection"
+# - projection_node_label=...
+#   Label of the nodes that will be used for the projection. Example: "Package"
+# - projection_weight_property=...
+#   Name of the node property that contains the dependency weight. Example: "weight"
+anomaly_detection_features() {
+    # Query Feature: Determine the Betweenness centrality (with the directed graph projection) if not already done
+    execute_cypher_queries_until_results "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-Betweenness-Exists.cypher" \
+                                         "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-Betweenness-Write.cypher" "${@}"
+    # Query Feature: Determine the local clustering coefficient if not already done
+    execute_cypher_queries_until_results "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-LocalClusteringCoefficient-Exists.cypher" \
+                                         "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-LocalClusteringCoefficient-Write.cypher" "${@}"
+    # Query Feature: Determine the page rank if not already done
+    execute_cypher_queries_until_results "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-PageRank-Exists.cypher" \
+                                         "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-PageRank-Write.cypher" "${@}"
+    # Query Feature: Determine the article rank if not already done
+    execute_cypher_queries_until_results "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-ArticleRank-Exists.cypher" \
+                                         "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-ArticleRank-Write.cypher" "${@}"
+}
+
+# Run the anomaly detection pipeline.
+# 
+# Required Parameters:
+# - projection_name=...
+#   Name prefix for the in-memory projection name. Example: "package-anomaly-detection"
+# - projection_node_label=...
+#   Label of the nodes that will be used for the projection. Example: "Package"
+# - projection_weight_property=...
+#   Name of the node property that contains the dependency weight. Example: "weight"
+anomaly_detection_pipeline() {
+    time anomaly_detection_features "${@}"
+    # Run Python: Get tuned Leiden communities as a reference to tune clustering
+    time "${ANOMALY_DETECTION_SCRIPT_DIR}/tunedLeidenCommunityDetection.py" "${@}" ${verboseMode}
+    # Run Python: Tuned Fast Random Projection and tuned HDBSCAN clustering 
+    time "${ANOMALY_DETECTION_SCRIPT_DIR}/tunedNodeEmbeddingClustering.py" "${@}" ${verboseMode}
+    
+    # Query Results: Output all collected features into a CSV file.
+    local nodeLabel
+    nodeLabel=$( extractQueryParameter "projection_node_label" "${@}" )
+    execute_cypher "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeatures.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}AnomalyDetection.csv"
+}
+
+# -- Java Artifact Node Embeddings -------------------------------
+
+if createUndirectedDependencyProjection "${PROJECTION_NAME}=artifact-anomaly-detection" "${PROJECTION_NODE}=Artifact" "${PROJECTION_WEIGHT}=weight"; then
+    createDirectedDependencyProjection "${PROJECTION_NAME}=artifact-anomaly-detection-directed" "${PROJECTION_NODE}=Artifact" "${PROJECTION_WEIGHT}=weight"
+    anomaly_detection_pipeline "${ALGORITHM_PROJECTION}=artifact-anomaly-detection" "${ALGORITHM_NODE}=Artifact" "${ALGORITHM_WEIGHT}=weight" "${COMMUNITY_PROPERTY}"
+fi
+
+# -- Java Package Node Embeddings --------------------------------
+
+if createUndirectedDependencyProjection "${PROJECTION_NAME}=package-anomaly-detection" "${PROJECTION_NODE}=Package" "${PROJECTION_WEIGHT}=weight25PercentInterfaces"; then
+    createDirectedDependencyProjection "${PROJECTION_NAME}=package-anomaly-detection-directed" "${PROJECTION_NODE}=Package" "${PROJECTION_WEIGHT}=weight25PercentInterfaces"
+    anomaly_detection_pipeline "${ALGORITHM_PROJECTION}=package-anomaly-detection" "${ALGORITHM_NODE}=Package" "${ALGORITHM_WEIGHT}=weight25PercentInterfaces" "${COMMUNITY_PROPERTY}"
+fi
+
+# -- Java Type Node Embeddings -----------------------------------
+
+if createUndirectedJavaTypeDependencyProjection "${PROJECTION_NAME}=type-anomaly-detection"; then
+    createDirectedJavaTypeDependencyProjection "${PROJECTION_NAME}=type-anomaly-detection-directed"
+    anomaly_detection_pipeline "${ALGORITHM_PROJECTION}=type-anomaly-detection" "${ALGORITHM_NODE}=Type" "${ALGORITHM_WEIGHT}=weight" "${COMMUNITY_PROPERTY}" "${EMBEDDING_PROPERTY}"
+fi
+
+# -- Typescript Module Node Embeddings ---------------------------
+
+if createUndirectedDependencyProjection "${PROJECTION_NAME}=typescript-module-embedding" "${PROJECTION_NODE}=Module" "${PROJECTION_WEIGHT}=lowCouplingElement25PercentWeight"; then
+    createDirectedDependencyProjection "${PROJECTION_NAME}=typescript-module-embedding-directed" "${PROJECTION_NODE}=Module" "${PROJECTION_WEIGHT}=lowCouplingElement25PercentWeight"
+    anomaly_detection_pipeline "${ALGORITHM_PROJECTION}=typescript-module-embedding" "${ALGORITHM_NODE}=Module" "${ALGORITHM_WEIGHT}=lowCouplingElement25PercentWeight" "${COMMUNITY_PROPERTY}" "${EMBEDDING_PROPERTY}"
+fi
+
+# ---------------------------------------------------------------
+
+# Clean-up after report generation. Empty reports will be deleted.
+source "${SCRIPTS_DIR}/cleanupAfterReportGeneration.sh" "${FULL_REPORT_DIRECTORY}"
+
+echo "anomalyDetectionPipeline: $(date +'%Y-%m-%dT%H:%M:%S%z') Successfully finished."
diff --git a/domains/anomaly-detection/features/AnomalyDetectionFeature-ArticleRank-Exists.cypher b/domains/anomaly-detection/features/AnomalyDetectionFeature-ArticleRank-Exists.cypher
@@ -0,0 +1,9 @@
+// Return the first node with a centralityArticleRank if it exists
+
+   MATCH (codeUnit)
+   WHERE $projection_node_label IN labels(codeUnit)
+     AND codeUnit.centralityArticleRank      IS NOT NULL
+  RETURN codeUnit.name                       AS shortCodeUnitName
+        ,elementId(codeUnit)                 AS nodeElementId
+        ,codeUnit.centralityArticleRank      AS articleRank
+   LIMIT 1
diff --git a/domains/anomaly-detection/features/AnomalyDetectionFeature-ArticleRank-Write.cypher b/domains/anomaly-detection/features/AnomalyDetectionFeature-ArticleRank-Write.cypher
@@ -0,0 +1,10 @@
+// Calculates and writes the Article Rank centrality score for anomaly detection
+
+CALL gds.articleRank.write(
+ $projection_name + '-directed-cleaned', {
+   maxIterations: 50
+  ,relationshipWeightProperty: $projection_weight_property
+  ,writeProperty: 'centralityArticleRank'
+})
+ YIELD nodePropertiesWritten, ranIterations, didConverge, preProcessingMillis, computeMillis, postProcessingMillis, writeMillis
+RETURN nodePropertiesWritten, ranIterations, didConverge, preProcessingMillis, computeMillis, postProcessingMillis, writeMillis
diff --git a/domains/anomaly-detection/features/AnomalyDetectionFeature-Betweenness-Exists.cypher b/domains/anomaly-detection/features/AnomalyDetectionFeature-Betweenness-Exists.cypher
@@ -0,0 +1,9 @@
+// Return the first node with a centralityBetweenness if it exists
+
+   MATCH (codeUnit)
+   WHERE $projection_node_label IN labels(codeUnit)
+     AND codeUnit.centralityBetweenness      IS NOT NULL
+  RETURN codeUnit.name                       AS shortCodeUnitName
+        ,elementId(codeUnit)                 AS nodeElementId
+        ,codeUnit.centralityBetweenness      AS pageRank
+   LIMIT 1
diff --git a/domains/anomaly-detection/features/AnomalyDetectionFeature-Betweenness-Write.cypher b/domains/anomaly-detection/features/AnomalyDetectionFeature-Betweenness-Write.cypher
@@ -0,0 +1,9 @@
+// Calculates and writes the Betweeness centrality score for anomaly detection
+
+CALL gds.betweenness.write(
+ $projection_name + '-directed-cleaned', {
+    relationshipWeightProperty: $projection_weight_property
+   ,writeProperty: 'centralityBetweenness'
+})
+ YIELD nodePropertiesWritten, preProcessingMillis, computeMillis, postProcessingMillis, writeMillis
+RETURN nodePropertiesWritten, preProcessingMillis, computeMillis, postProcessingMillis, writeMillis
diff --git a/domains/anomaly-detection/features/AnomalyDetectionFeature-LocalClusteringCoefficient-Exists.cypher b/domains/anomaly-detection/features/AnomalyDetectionFeature-LocalClusteringCoefficient-Exists.cypher
@@ -0,0 +1,9 @@
+// Return the first node with a clusteringCoefficient if it exists
+
+   MATCH (codeUnit)
+   WHERE $projection_node_label IN labels(codeUnit)
+     AND codeUnit.communityLocalClusteringCoefficient         IS NOT NULL
+  RETURN codeUnit.name                                        AS shortCodeUnitName
+        ,elementId(codeUnit)                                  AS nodeElementId
+        ,codeUnit.communityLocalClusteringCoefficient         AS clusteringCoefficient
+   LIMIT 1
diff --git a/domains/anomaly-detection/features/AnomalyDetectionFeature-LocalClusteringCoefficient-Write.cypher b/domains/anomaly-detection/features/AnomalyDetectionFeature-LocalClusteringCoefficient-Write.cypher
@@ -0,0 +1,8 @@
+// Calculates and writes the local clustering coefficient for anomaly detection
+
+CALL gds.localClusteringCoefficient.write(
+ $projection_name + '-cleaned', {
+    writeProperty: 'communityLocalClusteringCoefficient'
+})
+ YIELD averageClusteringCoefficient, nodeCount, nodePropertiesWritten, preProcessingMillis, computeMillis, postProcessingMillis, writeMillis
+RETURN averageClusteringCoefficient, nodeCount, nodePropertiesWritten, preProcessingMillis, computeMillis, postProcessingMillis, writeMillis
diff --git a/domains/anomaly-detection/features/AnomalyDetectionFeature-PageRank-Exists.cypher b/domains/anomaly-detection/features/AnomalyDetectionFeature-PageRank-Exists.cypher
@@ -0,0 +1,9 @@
+// Return the first node with a centralityPageRank if it exists
+
+   MATCH (codeUnit)
+   WHERE $projection_node_label IN labels(codeUnit)
+     AND codeUnit.centralityPageRank         IS NOT NULL
+  RETURN codeUnit.name                       AS shortCodeUnitName
+        ,elementId(codeUnit)                 AS nodeElementId
+        ,codeUnit.centralityPageRank         AS pageRank
+   LIMIT 1
diff --git a/domains/anomaly-detection/features/AnomalyDetectionFeature-PageRank-Write.cypher b/domains/anomaly-detection/features/AnomalyDetectionFeature-PageRank-Write.cypher
@@ -0,0 +1,10 @@
+// Calculates and writes the Article Rank centrality score for anomaly detection
+
+CALL gds.pageRank.write(
+ $projection_name + '-directed-cleaned', {
+   maxIterations: 50
+  ,relationshipWeightProperty: $projection_weight_property
+  ,writeProperty: 'centralityPageRank'
+})
+ YIELD nodePropertiesWritten, ranIterations, didConverge, preProcessingMillis, computeMillis, postProcessingMillis, writeMillis
+RETURN nodePropertiesWritten, ranIterations, didConverge, preProcessingMillis, computeMillis, postProcessingMillis, writeMillis
diff --git a/domains/anomaly-detection/features/AnomalyDetectionFeatures-Reset.cypher b/domains/anomaly-detection/features/AnomalyDetectionFeatures-Reset.cypher
@@ -0,0 +1,8 @@
+// Reset all features related to anomaly detection for code units to force a recalculation
+
+  MATCH (codeUnit)
+  WHERE $projection_node_label IN labels(codeUnit)
+ REMOVE codeUnit.communityLocalClusteringCoefficient
+       ,codeUnit.centralityArticleRank
+       ,codeUnit.centralityPageRank
+       ,codeUnit.centralityBetweenness
diff --git a/domains/anomaly-detection/features/AnomalyDetectionFeatures.cypher b/domains/anomaly-detection/features/AnomalyDetectionFeatures.cypher
@@ -0,0 +1,39 @@
+// Query code unit nodes with their anomaly detection
+
+   MATCH (codeUnit)
+   WHERE $projection_node_label IN labels(codeUnit)
+     AND codeUnit[$community_property]                        IS NOT NULL
+     AND codeUnit.incomingDependencies                        IS NOT NULL
+     AND codeUnit.outgoingDependencies                        IS NOT NULL
+     AND codeUnit.communityLocalClusteringCoefficient         IS NOT NULL
+     AND codeUnit.centralityArticleRank                       IS NOT NULL
+     AND codeUnit.centralityPageRank                          IS NOT NULL
+     AND codeUnit.centralityBetweenness                       IS NOT NULL
+     AND codeUnit.clusteringHDBSCANLabel                      IS NOT NULL
+     AND codeUnit.clusteringHDBSCANProbability                IS NOT NULL
+     AND codeUnit.clusteringHDBSCANNoise                      IS NOT NULL
+     AND codeUnit.embeddingFastRandomProjectionVisualizationX IS NOT NULL
+     AND codeUnit.embeddingFastRandomProjectionVisualizationY IS NOT NULL
+OPTIONAL MATCH (artifact:Java:Artifact)-[:CONTAINS]->(codeUnit)
+    WITH *, artifact.name AS artifactName
+OPTIONAL MATCH (projectRoot:Directory)<-[:HAS_ROOT]-(proj:TS:Project)-[:CONTAINS]->(codeUnit)
+    WITH *, last(split(projectRoot.absoluteFileName, '/')) AS projectName   
+  RETURN DISTINCT 
+         coalesce(codeUnit.fqn, codeUnit.globalFqn, codeUnit.fileName, codeUnit.signature, codeUnit.name) AS codeUnitName
+        ,codeUnit.name                                        AS shortCodeUnitName
+        ,elementId(codeUnit)                                  AS nodeElementId
+        ,coalesce(artifactName, projectName)                  AS projectName
+        ,codeUnit.incomingDependencies                        AS incomingDependencies
+        ,codeUnit.outgoingDependencies                        AS outgoingDependencies
+        ,codeUnit[$community_property]                        AS communityId
+        ,codeUnit.communityLocalClusteringCoefficient         AS clusteringCoefficient
+        ,codeUnit.centralityArticleRank                       AS articleRank
+        ,codeUnit.centralityPageRank                          AS pageRank
+        ,codeUnit.centralityPageRank - codeUnit.centralityArticleRank AS pageToArticleRankDifference
+        ,codeUnit.centralityBetweenness                       AS betweenness
+        ,codeUnit.clusteringHDBSCANLabel                      AS clusteringLabel
+        ,codeUnit.clusteringHDBSCANProbability                AS clusteringProbability
+        ,codeUnit.clusteringHDBSCANNoise                      AS clusteringIsNoise
+        ,codeUnit.embeddingFastRandomProjectionVisualizationX AS visualizationX
+        ,codeUnit.embeddingFastRandomProjectionVisualizationY AS visualizationY
+        ,coalesce(codeUnit.centralityPageRank, 0.00001)       AS centrality
diff --git a/domains/anomaly-detection/features/Set_Parameters_Manual.cypher b/domains/anomaly-detection/features/Set_Parameters_Manual.cypher
diff --git a/domains/anomaly-detection/queries/AnomalyDetectionPageToArticleRankDifference.cypher b/domains/anomaly-detection/queries/AnomalyDetectionPageToArticleRankDifference.cypher
diff --git a/domains/anomaly-detection/tunedLeidenCommunityDetection.py b/domains/anomaly-detection/tunedLeidenCommunityDetection.py
diff --git a/domains/anomaly-detection/tunedNodeEmbeddingClustering.py b/domains/anomaly-detection/tunedNodeEmbeddingClustering.py