From 1d5d66a53278e07be7956b74583c866d6dab3680 Mon Sep 17 00:00:00 2001
From: markus583 <markus.frohmann@gmail.com>
Date: Tue, 14 May 2024 12:23:31 +0000
Subject: [PATCH] add avg

---
 .../evaluation/evaluate_sepp_nlg_2021_subtask1.py    | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/wtpsplit/evaluation/evaluate_sepp_nlg_2021_subtask1.py b/wtpsplit/evaluation/evaluate_sepp_nlg_2021_subtask1.py
index 192e2f4e..fb73bd94 100644
--- a/wtpsplit/evaluation/evaluate_sepp_nlg_2021_subtask1.py
+++ b/wtpsplit/evaluation/evaluate_sepp_nlg_2021_subtask1.py
@@ -10,6 +10,9 @@
 
 def evaluate_subtask1(splits, langs, prediction_dir: str, supervisions, include_n_documents) -> None:
     results = {}
+    avg_holder = {}
+    for supervision in supervisions:
+        avg_holder[supervision] = 0
     for lang_code in langs:
         results[lang_code] = {}
         for split in splits:
@@ -53,8 +56,15 @@ def evaluate_subtask1(splits, langs, prediction_dir: str, supervisions, include_
                     all_predicted_labels.extend(pred_labels)
 
                 eval_result = classification_report(all_gt_labels, all_predicted_labels, output_dict=True)
-                pprint(eval_result, indent=4)
+                # pprint(eval_result, indent=4)
+                print(eval_result["1"]["f1-score"])
+                avg_holder[supervision] += eval_result["1"]["f1-score"]
                 results[lang_code][split][supervision] = eval_result
+    results["avg"] = {}
+    for supervision in supervisions:
+        avg_holder[supervision] /= len(langs)
+        results["avg"][supervision] = avg_holder[supervision]
+    print(avg_holder)
     json.dump(
         results,
         open(