From b119b5ef904c54221f3966da471b011489e2d50f Mon Sep 17 00:00:00 2001 From: markus583 Date: Sat, 11 May 2024 06:41:01 +0000 Subject: [PATCH] skip en legal laws --- wtpsplit/train/train_adapter.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/wtpsplit/train/train_adapter.py b/wtpsplit/train/train_adapter.py index 5022fe9f..3d499f02 100644 --- a/wtpsplit/train/train_adapter.py +++ b/wtpsplit/train/train_adapter.py @@ -392,9 +392,13 @@ def maybe_pad(text): if "legal" in dataset_name and not ("laws" in dataset_name or "judgements" in dataset_name): print("SKIP: ", lang, dataset_name) continue + if lang == "en" and dataset_name == "legal-all-laws": + # not available. + print("SKIP: ", lang, dataset_name) + continue print("RUNNING:", dataset_name, lang) # skip langs starting with a, b, ..., k - # if not lang.startswith(tuple("k")) and not "en-de" in lang: + # if lang.startswith(tuple("abcd")): # print(f"Skipping {lang} {dataset_name}") # continue # do model stuff here; otherwise, head params would be overwritten every time