Skip to content

Commit

Permalink
skip non/empty lists
Browse files Browse the repository at this point in the history
  • Loading branch information
markus583 committed May 14, 2024
1 parent 1d5d66a commit 34de88a
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 0 deletions.
4 changes: 4 additions & 0 deletions wtpsplit/evaluation/intrinsic_pairwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,8 @@ def load_or_compute_logits(args, model, eval_data, valid_data=None, save_str: st

if "test_logits" not in dset_group:
test_sentences = dataset["data"][: args.max_n_test_sentences]
if not test_sentences:
continue
if isinstance(test_sentences[0], list):
continue
all_pairs_test = generate_k_mers(
Expand Down Expand Up @@ -428,6 +430,8 @@ def main(args):

for dataset_name, dataset in dsets["sentence"].items():
sentences = dataset["data"][: args.max_n_test_sentences]
if not sentences:
continue
if isinstance(sentences[0], list):
continue
sent_k_mers = generate_k_mers(
Expand Down
6 changes: 6 additions & 0 deletions wtpsplit/train/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ class Args:
lookahead_split_layers: Optional[int] = None
sample_non_whitespace: int = 1


def collate_fn(batch, args, label_args, label_dict, tokenizer, add_lang_ids: bool = False):
all_input_ids = []
all_labels = []
Expand Down Expand Up @@ -585,6 +586,11 @@ def compute_metrics(trainer):
for dataset_name, dataset in lang_data["sentence"].items():
# if "corrupt" in dataset_name:
# continue
if not dataset["data"][0]:
continue

if isinstance(dataset["data"][0], list):
continue
score, info = evaluate_sentence(
lang_code,
dataset["data"],
Expand Down

0 comments on commit 34de88a

Please sign in to comment.