From 74f860298c96b1fff6cb56383a75a1f349bd4233 Mon Sep 17 00:00:00 2001 From: Stefan Schweter Date: Thu, 13 Jul 2023 23:42:46 +0200 Subject: [PATCH] tests: sync MasakhaPOS tests with upstream repo --- tests/test_datasets.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/tests/test_datasets.py b/tests/test_datasets.py index ce2224aa0e..56d524d041 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -822,13 +822,11 @@ def test_masakha_pos_corpus(tasks_base_path): "ibo", "kin", "lug", - #"luo", "mos", "pcm", "nya", "sna", "swa", - #"tsn", "twi", "wol", "xho", @@ -839,25 +837,23 @@ def test_masakha_pos_corpus(tasks_base_path): africa_pos_stats = { "v1": { - "bam": {"train": 793, "dev": 158, "test": 634}, + "bam": {"train": 775, "dev": 154, "test": 619}, "bbj": {"train": 750, "dev": 149, "test": 599}, "ewe": {"train": 728, "dev": 145, "test": 582}, - "fon": {"train": 798, "dev": 159, "test": 637}, + "fon": {"train": 810, "dev": 161, "test": 646}, "hau": {"train": 753, "dev": 150, "test": 601}, "ibo": {"train": 803, "dev": 160, "test": 642}, "kin": {"train": 757, "dev": 151, "test": 604}, "lug": {"train": 733, "dev": 146, "test": 586}, - #"luo": {"train": 757, "dev": 151, "test": 604}, "mos": {"train": 757, "dev": 151, "test": 604}, "pcm": {"train": 752, "dev": 150, "test": 600}, "nya": {"train": 728, "dev": 145, "test": 582}, "sna": {"train": 747, "dev": 149, "test": 596}, - "swa": {"train": 675, "dev": 134, "test": 539}, - #"tsn": {"train": 753, "dev": 150, "test": 602}, - "twi": {"train": 775, "dev": 154, "test": 618}, - "wol": {"train": 770, "dev": 154, "test": 616}, + "swa": {"train": 693, "dev": 138, "test": 553}, + "twi": {"train": 785, "dev": 157, "test": 628}, + "wol": {"train": 782, "dev": 156, "test": 625}, "xho": {"train": 752, "dev": 150, "test": 601}, - "yor": {"train": 875, "dev": 174, "test": 698}, + "yor": {"train": 893, "dev": 178, "test": 713}, "zul": {"train": 753, "dev": 150, "test": 601}, }, } @@ -873,7 +869,7 @@ def check_number_sentences(reference: int, actual: int, split_name: str, languag check_number_sentences(len(corpus.train), gold_stats["train"], "train", language, version) check_number_sentences(len(corpus.dev), gold_stats["dev"], "dev", language, version) - #check_number_sentences(len(corpus.test), gold_stats["test"], "test", language, version) + check_number_sentences(len(corpus.test), gold_stats["test"], "test", language, version) def test_multi_file_jsonl_corpus_should_use_label_type(tasks_base_path):