You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
if createUndirectedDependencyProjection "${PROJECTION_NAME}=package-anomaly-detection""${PROJECTION_NODE}=Package""${PROJECTION_WEIGHT}=weight25PercentInterfaces";then
# if createUndirectedDependencyProjection "${PROJECTION_NAME}=typescript-module-embedding" "${PROJECTION_NODE}=Module" "${PROJECTION_WEIGHT}=lowCouplingElement25PercentWeight"; then
Copy file name to clipboardExpand all lines: domains/anomaly-detection/tunedNodeEmbeddingClustering.py
+28-66Lines changed: 28 additions & 66 deletions
Original file line number
Diff line number
Diff line change
@@ -4,7 +4,6 @@
4
4
# This is useful for understanding code structure, detecting modular boundaries, and identifying anomalies or outliers in large software systems without requiring manual labeling.
5
5
# It takes the code structure as a graph in Neo4j and generates node embeddings using Fast Random Projection (FastRP).
6
6
# These embeddings capture structural similarity and are clustered using HDBSCAN to assign labels or detect noise.
7
-
# For visualization, the embeddings are reduced to 2D using t-SNE.
8
7
# All results - including embeddings, cluster labels, and 2D coordinates — are written back to Neo4j for further use.
9
8
10
9
# Prerequisite:
@@ -25,9 +24,7 @@
25
24
26
25
fromneo4jimportGraphDatabase, Driver
27
26
28
-
fromopenTSNE.sklearnimportTSNE
29
-
30
-
fromsklearn.baseimportBaseEstimator
27
+
# from sklearn.base import BaseEstimator # Extend from sklearn BaseEstimator to use e.g. GridSearchCV for hyperparameter tuning.
print("HDBSCAN clustered labels by their size descending (top 10):", clustering_results.clustering_results_distribution.head(10))
673
-
print("HDBSCAN clustered labels by their probability descending (top 10):", clustering_results.clustering_results_distribution.sort_values(by='probability', ascending=False).head(10))
print("HDBSCAN clustered labels by their size descending (top 10):", clustering_results.clustering_results_distribution.head(10))
648
+
print("HDBSCAN clustered labels by their probability descending (top 10):", clustering_results.clustering_results_distribution.sort_values(by='probability', ascending=False).head(10))
0 commit comments