Skip to content

Commit

Permalink
tests: sync MasakhaPOS tests with upstream repo
Browse files Browse the repository at this point in the history
  • Loading branch information
stefan-it committed Aug 8, 2023
1 parent 849309e commit 74f8602
Showing 1 changed file with 7 additions and 11 deletions.
18 changes: 7 additions & 11 deletions tests/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -822,13 +822,11 @@ def test_masakha_pos_corpus(tasks_base_path):
"ibo",
"kin",
"lug",
#"luo",
"mos",
"pcm",
"nya",
"sna",
"swa",
#"tsn",
"twi",
"wol",
"xho",
Expand All @@ -839,25 +837,23 @@ def test_masakha_pos_corpus(tasks_base_path):

africa_pos_stats = {
"v1": {
"bam": {"train": 793, "dev": 158, "test": 634},
"bam": {"train": 775, "dev": 154, "test": 619},
"bbj": {"train": 750, "dev": 149, "test": 599},
"ewe": {"train": 728, "dev": 145, "test": 582},
"fon": {"train": 798, "dev": 159, "test": 637},
"fon": {"train": 810, "dev": 161, "test": 646},
"hau": {"train": 753, "dev": 150, "test": 601},
"ibo": {"train": 803, "dev": 160, "test": 642},
"kin": {"train": 757, "dev": 151, "test": 604},
"lug": {"train": 733, "dev": 146, "test": 586},
#"luo": {"train": 757, "dev": 151, "test": 604},
"mos": {"train": 757, "dev": 151, "test": 604},
"pcm": {"train": 752, "dev": 150, "test": 600},
"nya": {"train": 728, "dev": 145, "test": 582},
"sna": {"train": 747, "dev": 149, "test": 596},
"swa": {"train": 675, "dev": 134, "test": 539},
#"tsn": {"train": 753, "dev": 150, "test": 602},
"twi": {"train": 775, "dev": 154, "test": 618},
"wol": {"train": 770, "dev": 154, "test": 616},
"swa": {"train": 693, "dev": 138, "test": 553},
"twi": {"train": 785, "dev": 157, "test": 628},
"wol": {"train": 782, "dev": 156, "test": 625},
"xho": {"train": 752, "dev": 150, "test": 601},
"yor": {"train": 875, "dev": 174, "test": 698},
"yor": {"train": 893, "dev": 178, "test": 713},
"zul": {"train": 753, "dev": 150, "test": 601},
},
}
Expand All @@ -873,7 +869,7 @@ def check_number_sentences(reference: int, actual: int, split_name: str, languag

check_number_sentences(len(corpus.train), gold_stats["train"], "train", language, version)
check_number_sentences(len(corpus.dev), gold_stats["dev"], "dev", language, version)
#check_number_sentences(len(corpus.test), gold_stats["test"], "test", language, version)
check_number_sentences(len(corpus.test), gold_stats["test"], "test", language, version)


def test_multi_file_jsonl_corpus_should_use_label_type(tasks_base_path):
Expand Down

0 comments on commit 74f8602

Please sign in to comment.