|
556 | 556 | " AND codeUnit.clusteringHDBSCANLabel IS NOT NULL\n",
|
557 | 557 | " AND codeUnit.clusteringHDBSCANProbability IS NOT NULL\n",
|
558 | 558 | " AND codeUnit.clusteringHDBSCANNoise IS NOT NULL\n",
|
| 559 | + " AND codeUnit.clusteringHDBSCANMedoid IS NOT NULL\n", |
559 | 560 | " AND codeUnit.embeddingFastRandomProjectionVisualizationX IS NOT NULL\n",
|
560 | 561 | " AND codeUnit.embeddingFastRandomProjectionVisualizationY IS NOT NULL\n",
|
561 | 562 | " RETURN DISTINCT \n",
|
|
568 | 569 | " ,codeUnit.clusteringHDBSCANLabel AS clusteringHDBSCANLabel\n",
|
569 | 570 | " ,codeUnit.clusteringHDBSCANProbability AS clusteringHDBSCANProbability\n",
|
570 | 571 | " ,codeUnit.clusteringHDBSCANNoise AS clusteringHDBSCANNoise\n",
|
| 572 | + " ,codeUnit.clusteringHDBSCANMedoid AS clusteringHDBSCANMedoid\n", |
571 | 573 | " ,codeUnit.embeddingFastRandomProjectionVisualizationX AS embeddingVisualizationX\n",
|
572 | 574 | " ,codeUnit.embeddingFastRandomProjectionVisualizationY AS embeddingVisualizationY\n",
|
573 | 575 | "\"\"\"\n",
|
|
605 | 607 | " title: str,\n",
|
606 | 608 | " main_color_map: str = \"tab20\",\n",
|
607 | 609 | " cluster_label_column_name: str = \"clusteringHDBSCANLabel\",\n",
|
| 610 | + " cluster_medoid_column_name: str = \"clusteringHDBSCANMedoid\",\n", |
608 | 611 | " centrality_column_name: str = \"pageRank\",\n",
|
609 | 612 | " x_position_column = 'embeddingVisualizationX',\n",
|
610 | 613 | " y_position_column = 'embeddingVisualizationY'\n",
|
|
668 | 671 | " label=f\"Cluster {cluster_label}\"\n",
|
669 | 672 | " )\n",
|
670 | 673 | "\n",
|
| 674 | + " # Annotate medoids of the cluster\n", |
| 675 | + " medoids = cluster_nodes[cluster_nodes[cluster_medoid_column_name] == 1]\n", |
| 676 | + " for index, row in medoids.iterrows():\n", |
| 677 | + " plot.annotate(\n", |
| 678 | + " text=f\"{row['shortCodeUnitName']} ({row[cluster_label_column_name]})\",\n", |
| 679 | + " xy=(row[x_position_column], row[y_position_column]),\n", |
| 680 | + " xytext=(5, 5), # Offset y position for better visibility\n", |
| 681 | + " **plot_annotation_style\n", |
| 682 | + " )\n", |
| 683 | + "\n", |
671 | 684 | " # Plot noise points in gray\n",
|
672 | 685 | " plot.scatter(\n",
|
673 | 686 | " x=node_embeddings_noise_only[x_position_column],\n",
|
|
697 | 710 | " title: str,\n",
|
698 | 711 | " main_color_map: str = \"tab20\",\n",
|
699 | 712 | " cluster_label_column_name: str = \"clusteringHDBSCANLabel\",\n",
|
| 713 | + " cluster_medoid_column_name: str = \"clusteringHDBSCANMedoid\",\n", |
700 | 714 | " centrality_column_name: str = \"pageRank\",\n",
|
701 | 715 | " x_position_column = 'embeddingVisualizationX',\n",
|
702 | 716 | " y_position_column = 'embeddingVisualizationY'\n",
|
|
764 | 778 | " label=f\"Cluster {cluster_label}\"\n",
|
765 | 779 | " )\n",
|
766 | 780 | "\n",
|
767 |
| - " # Plot noise points in gray\n", |
| 781 | + " # Annotate medoids of the cluster\n", |
| 782 | + " medoids = cluster_nodes[cluster_nodes[cluster_medoid_column_name] == 1]\n", |
| 783 | + " for index, row in medoids.iterrows():\n", |
| 784 | + " axis.annotate(\n", |
| 785 | + " text=f\"{row['shortCodeUnitName']} ({row[cluster_label_column_name]})\",\n", |
| 786 | + " xy=(row[x_position_column], row[y_position_column]),\n", |
| 787 | + " xytext=(5, 5), # Offset y position for better visibility\n", |
| 788 | + " **plot_annotation_style\n", |
| 789 | + " )\n", |
| 790 | + "\n", |
| 791 | + " # Plot noise points in gray\n", |
768 | 792 | " axis.scatter(\n",
|
769 | 793 | " x=node_embeddings_noise_only[x_position_column],\n",
|
770 | 794 | " y=node_embeddings_noise_only[y_position_column],\n",
|
|
986 | 1010 | " AND codeUnit.clusteringHDBSCANLabel IS NOT NULL\n",
|
987 | 1011 | " AND codeUnit.clusteringHDBSCANProbability IS NOT NULL\n",
|
988 | 1012 | " AND codeUnit.clusteringHDBSCANNoise IS NOT NULL\n",
|
| 1013 | + " AND codeUnit.clusteringHDBSCANMedoid IS NOT NULL\n", |
989 | 1014 | " AND codeUnit.embeddingFastRandomProjectionVisualizationX IS NOT NULL\n",
|
990 | 1015 | " AND codeUnit.embeddingFastRandomProjectionVisualizationY IS NOT NULL\n",
|
991 | 1016 | " RETURN DISTINCT \n",
|
|
998 | 1023 | " ,codeUnit.clusteringHDBSCANLabel AS clusteringHDBSCANLabel\n",
|
999 | 1024 | " ,codeUnit.clusteringHDBSCANProbability AS clusteringHDBSCANProbability\n",
|
1000 | 1025 | " ,codeUnit.clusteringHDBSCANNoise AS clusteringHDBSCANNoise\n",
|
| 1026 | + " ,codeUnit.clusteringHDBSCANMedoid AS clusteringHDBSCANMedoid\n", |
1001 | 1027 | " ,codeUnit.embeddingFastRandomProjectionVisualizationX AS embeddingVisualizationX\n",
|
1002 | 1028 | " ,codeUnit.embeddingFastRandomProjectionVisualizationY AS embeddingVisualizationY\n",
|
1003 | 1029 | "\"\"\"\n",
|
|
0 commit comments