1
+ #! /usr/bin/env bash
2
+
3
+ # Pipeline that coordinates anomaly detection using the Graph Data Science Library of Neo4j.
4
+ # It requires an already running Neo4j graph database with already scanned and analyzed artifacts.
5
+ # The results will be written into the sub directory reports/anomaly-detection.
6
+
7
+ # Note that "scripts/prepareAnalysis.sh" is required to run prior to this script.
8
+
9
+ # Requires executeQueryFunctions.sh, projectionFunctions.sh, cleanupAfterReportGeneration.sh
10
+
11
+ # Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
12
+ set -o errexit -o pipefail
13
+
14
+ # Overrideable Constants (defaults also defined in sub scripts)
15
+ REPORTS_DIRECTORY=${REPORTS_DIRECTORY:- " reports" }
16
+
17
+ # # Get this "scripts/reports" directory if not already set
18
+ # Even if $BASH_SOURCE is made for Bourne-like shells it is also supported by others and therefore here the preferred solution.
19
+ # CDPATH reduces the scope of the cd command to potentially prevent unintended directory changes.
20
+ # This way non-standard tools like readlink aren't needed.
21
+ ANOMALY_DETECTION_SCRIPT_DIR=${ANOMALY_DETECTION_SCRIPT_DIR:- $(CDPATH=. cd -- " $( dirname -- " ${BASH_SOURCE[0]} " ) " && pwd -P)}
22
+ echo " anomalyDetectionCsv: ANOMALY_DETECTION_SCRIPT_DIR=${ANOMALY_DETECTION_SCRIPT_DIR} "
23
+ # Get the "scripts" directory by taking the path of this script and going one directory up.
24
+ SCRIPTS_DIR=${SCRIPTS_DIR:- " ${ANOMALY_DETECTION_SCRIPT_DIR} /../../scripts" } # Repository directory containing the shell scripts
25
+ # Get the "cypher" query directory for gathering features.
26
+ ANOMALY_DETECTION_FEATURE_CYPHER_DIR=${ANOMALY_DETECTION_FEATURE_CYPHER_DIR:- " ${ANOMALY_DETECTION_SCRIPT_DIR} /features" }
27
+ ANOMALY_DETECTION_QUERY_CYPHER_DIR=${ANOMALY_DETECTION_QUERY_CYPHER_DIR:- " ${ANOMALY_DETECTION_SCRIPT_DIR} /queries" }
28
+
29
+ # Define functions to execute a cypher query from within a given file (first and only argument) like "execute_cypher"
30
+ source " ${SCRIPTS_DIR} /executeQueryFunctions.sh"
31
+
32
+ # Define functions to create and delete Graph Projections like "createUndirectedDependencyProjection"
33
+ source " ${SCRIPTS_DIR} /projectionFunctions.sh"
34
+
35
+ # Query or recalculate features.
36
+ #
37
+ # Required Parameters:
38
+ # - projection_name=...
39
+ # Name prefix for the in-memory projection name. Example: "package-anomaly-detection"
40
+ # - projection_node_label=...
41
+ # Label of the nodes that will be used for the projection. Example: "Package"
42
+ # - projection_weight_property=...
43
+ # Name of the node property that contains the dependency weight. Example: "weight"
44
+ anomaly_detection_features () {
45
+ local nodeLabel
46
+ nodeLabel=$( extractQueryParameter " projection_node_label" " ${@ } " )
47
+
48
+ echo " anomalyDetectionCsv: $( date +' %Y-%m-%dT%H:%M:%S%z' ) Collecting features for ${nodeLabel} nodes..."
49
+
50
+ # Determine the Betweenness centrality (with the directed graph projection) if not already done
51
+ execute_cypher_queries_until_results " ${ANOMALY_DETECTION_FEATURE_CYPHER_DIR} /AnomalyDetectionFeature-Betweenness-Exists.cypher" \
52
+ " ${ANOMALY_DETECTION_FEATURE_CYPHER_DIR} /AnomalyDetectionFeature-Betweenness-Write.cypher" " ${@ } "
53
+ # Determine the local clustering coefficient if not already done
54
+ execute_cypher_queries_until_results " ${ANOMALY_DETECTION_FEATURE_CYPHER_DIR} /AnomalyDetectionFeature-LocalClusteringCoefficient-Exists.cypher" \
55
+ " ${ANOMALY_DETECTION_FEATURE_CYPHER_DIR} /AnomalyDetectionFeature-LocalClusteringCoefficient-Write.cypher" " ${@ } "
56
+ # Determine the page rank if not already done
57
+ execute_cypher_queries_until_results " ${ANOMALY_DETECTION_FEATURE_CYPHER_DIR} /AnomalyDetectionFeature-PageRank-Exists.cypher" \
58
+ " ${ANOMALY_DETECTION_FEATURE_CYPHER_DIR} /AnomalyDetectionFeature-PageRank-Write.cypher" " ${@ } "
59
+ # Determine the article rank if not already done
60
+ execute_cypher_queries_until_results " ${ANOMALY_DETECTION_FEATURE_CYPHER_DIR} /AnomalyDetectionFeature-ArticleRank-Exists.cypher" \
61
+ " ${ANOMALY_DETECTION_FEATURE_CYPHER_DIR} /AnomalyDetectionFeature-PageRank-Write.cypher" " ${@ } "
62
+ }
63
+ # Run queries to find anomalies in the graph.
64
+ #
65
+ # Required Parameters:
66
+ # - projection_node_label=...
67
+ # Label of the nodes that will be used for the projection. Example: "Package"
68
+ anomaly_detection_queries () {
69
+ local nodeLabel
70
+ nodeLabel=$( extractQueryParameter " projection_node_label" " ${@ } " )
71
+
72
+ echo " anomalyDetectionCsv: $( date +' %Y-%m-%dT%H:%M:%S%z' ) Executing Queries for ${nodeLabel} nodes..."
73
+ execute_cypher " ${ANOMALY_DETECTION_QUERY_CYPHER_DIR} /AnomalyDetectionPotentialImbalancedRoles.cypher" " ${@ } " > " ${FULL_REPORT_DIRECTORY} /${nodeLabel} AnomalyDetection_PotentialImbalancedRoles.csv"
74
+ execute_cypher " ${ANOMALY_DETECTION_QUERY_CYPHER_DIR} /AnomalyDetectionPotentialOverEngineerOrIsolated.cypher" " ${@ } " > " ${FULL_REPORT_DIRECTORY} /${nodeLabel} AnomalyDetection_PotentialOverEngineerOrIsolated.csv"
75
+
76
+ execute_cypher " ${ANOMALY_DETECTION_QUERY_CYPHER_DIR} /AnomalyDetectionHiddenBridgeNodes.cypher" " ${@ } " > " ${FULL_REPORT_DIRECTORY} /${nodeLabel} AnomalyDetection_HiddenBridgeNodes.csv"
77
+ execute_cypher " ${ANOMALY_DETECTION_QUERY_CYPHER_DIR} /AnomalyDetectionPopularBottlenecks.cypher" " ${@ } " > " ${FULL_REPORT_DIRECTORY} /${nodeLabel} AnomalyDetection_PopularBottlenecks.csv"
78
+ execute_cypher " ${ANOMALY_DETECTION_QUERY_CYPHER_DIR} /AnomalyDetectionSilentCoordinators.cypher" " ${@ } " > " ${FULL_REPORT_DIRECTORY} /${nodeLabel} AnomalyDetection_SilentCoordinators.csv"
79
+ execute_cypher " ${ANOMALY_DETECTION_QUERY_CYPHER_DIR} /AnomalyDetectionOverReferencesUtilities.cypher" " ${@ } " > " ${FULL_REPORT_DIRECTORY} /${nodeLabel} AnomalyDetection_OverReferencesUtilities.csv"
80
+ execute_cypher " ${ANOMALY_DETECTION_QUERY_CYPHER_DIR} /AnomalyDetectionFragileStructuralBridges.cypher" " ${@ } " > " ${FULL_REPORT_DIRECTORY} /${nodeLabel} AnomalyDetection_FragileStructuralBridges.csv"
81
+ execute_cypher " ${ANOMALY_DETECTION_QUERY_CYPHER_DIR} /AnomalyDetectionDependencyHungryOrchestrators.cypher" " ${@ } " > " ${FULL_REPORT_DIRECTORY} /${nodeLabel} AnomalyDetection_DependencyHungryOrchestrators.csv"
82
+ execute_cypher " ${ANOMALY_DETECTION_QUERY_CYPHER_DIR} /AnomalyDetectionUnexpectedCentralNodes.cypher" " ${@ } " > " ${FULL_REPORT_DIRECTORY} /${nodeLabel} AnomalyDetection_UnexpectedCentralNodes.csv"
83
+ }
84
+
85
+ # Run the anomaly detection pipeline.
86
+ #
87
+ # Required Parameters:
88
+ # - projection_name=...
89
+ # Name prefix for the in-memory projection name. Example: "package-anomaly-detection"
90
+ # - projection_node_label=...
91
+ # Label of the nodes that will be used for the projection. Example: "Package"
92
+ # - projection_weight_property=...
93
+ # Name of the node property that contains the dependency weight. Example: "weight"
94
+ anomaly_detection_csv_reports () {
95
+ time anomaly_detection_features " ${@ } "
96
+ time anomaly_detection_queries " ${@ } "
97
+ }
98
+
99
+ # Create report directory
100
+ REPORT_NAME=" anomaly-detection"
101
+ FULL_REPORT_DIRECTORY=" ${REPORTS_DIRECTORY} /${REPORT_NAME} "
102
+ mkdir -p " ${FULL_REPORT_DIRECTORY} "
103
+
104
+ # Query Parameter key pairs for projection and algorithm side
105
+ PROJECTION_NAME=" dependencies_projection"
106
+ ALGORITHM_PROJECTION=" projection_name"
107
+
108
+ PROJECTION_NODE=" dependencies_projection_node"
109
+ ALGORITHM_NODE=" projection_node_label"
110
+
111
+ PROJECTION_WEIGHT=" dependencies_projection_weight_property"
112
+ ALGORITHM_WEIGHT=" projection_weight_property"
113
+
114
+ # Code independent algorithm parameters
115
+ COMMUNITY_PROPERTY=" community_property=communityLeidenIdTuned"
116
+ EMBEDDING_PROPERTY=" embedding_property=embeddingsFastRandomProjectionTunedForClustering"
117
+
118
+ # -- Java Artifact Node Embeddings -------------------------------
119
+
120
+ if createUndirectedDependencyProjection " ${PROJECTION_NAME} =artifact-anomaly-detection" " ${PROJECTION_NODE} =Artifact" " ${PROJECTION_WEIGHT} =weight" ; then
121
+ createDirectedDependencyProjection " ${PROJECTION_NAME} =artifact-anomaly-detection-directed" " ${PROJECTION_NODE} =Artifact" " ${PROJECTION_WEIGHT} =weight"
122
+ anomaly_detection_csv_reports " ${ALGORITHM_PROJECTION} =artifact-anomaly-detection" " ${ALGORITHM_NODE} =Artifact" " ${ALGORITHM_WEIGHT} =weight" " ${COMMUNITY_PROPERTY} " " ${EMBEDDING_PROPERTY} "
123
+ fi
124
+
125
+ # -- Java Package Node Embeddings --------------------------------
126
+
127
+ if createUndirectedDependencyProjection " ${PROJECTION_NAME} =package-anomaly-detection" " ${PROJECTION_NODE} =Package" " ${PROJECTION_WEIGHT} =weight25PercentInterfaces" ; then
128
+ createDirectedDependencyProjection " ${PROJECTION_NAME} =package-anomaly-detection-directed" " ${PROJECTION_NODE} =Package" " ${PROJECTION_WEIGHT} =weight25PercentInterfaces"
129
+ anomaly_detection_csv_reports " ${ALGORITHM_PROJECTION} =package-anomaly-detection" " ${ALGORITHM_NODE} =Package" " ${ALGORITHM_WEIGHT} =weight25PercentInterfaces" " ${COMMUNITY_PROPERTY} " " ${EMBEDDING_PROPERTY} "
130
+ fi
131
+
132
+ # -- Java Type Node Embeddings -----------------------------------
133
+
134
+ if createUndirectedJavaTypeDependencyProjection " ${PROJECTION_NAME} =type-anomaly-detection" ; then
135
+ createDirectedJavaTypeDependencyProjection " ${PROJECTION_NAME} =type-anomaly-detection-directed"
136
+ anomaly_detection_csv_reports " ${ALGORITHM_PROJECTION} =type-anomaly-detection" " ${ALGORITHM_NODE} =Type" " ${ALGORITHM_WEIGHT} =weight" " ${COMMUNITY_PROPERTY} " " ${EMBEDDING_PROPERTY} "
137
+ fi
138
+
139
+ # -- Typescript Module Node Embeddings ---------------------------
140
+
141
+ if createUndirectedDependencyProjection " ${PROJECTION_NAME} =typescript-module-embedding" " ${PROJECTION_NODE} =Module" " ${PROJECTION_WEIGHT} =lowCouplingElement25PercentWeight" ; then
142
+ createDirectedDependencyProjection " ${PROJECTION_NAME} =typescript-module-embedding-directed" " ${PROJECTION_NODE} =Module" " ${PROJECTION_WEIGHT} =lowCouplingElement25PercentWeight"
143
+ anomaly_detection_csv_reports " ${ALGORITHM_PROJECTION} =typescript-module-embedding" " ${ALGORITHM_NODE} =Module" " ${ALGORITHM_WEIGHT} =lowCouplingElement25PercentWeight" " ${COMMUNITY_PROPERTY} " " ${EMBEDDING_PROPERTY} "
144
+ fi
145
+
146
+ # ---------------------------------------------------------------
147
+
148
+ # Clean-up after report generation. Empty reports will be deleted.
149
+ source " ${SCRIPTS_DIR} /cleanupAfterReportGeneration.sh" " ${FULL_REPORT_DIRECTORY} "
150
+
151
+ echo " anomalyDetectionCsv: $( date +' %Y-%m-%dT%H:%M:%S%z' ) Successfully finished."
0 commit comments