-
Notifications
You must be signed in to change notification settings - Fork 0
/
report-mt-new.sh
executable file
·95 lines (83 loc) · 2.21 KB
/
report-mt-new.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/usr/bin/env bash
set -euo pipefail
# pip install sacrebleu-macrof # for macrof1 support
# <script.sh> exp1 exp2 exp3 ...
[[ $# -eq 0 ]] && {
echo "Usage:: $ ./report-mt.sh <exp1> <exp2> <exp3>"
echo " <exp1> ... positional args are path to experiment dirs"
exit 1
}
function sacre_bleu {
hyp=$1
ref=$2
if [[ ! -f $hyp ]]; then
echo "NA-Hyp"
elif [[ ! -f $ref ]]; then
echo "NA-Ref"
else
echo $(cut -f1 $hyp | sed 's/<unk>//g' | python -m sacrebleu -m bleu -b $ref)
fi
}
function macro_f1 {
hyp=$1
ref=$2
if [[ ! -f $hyp ]]; then
echo "NA-Hyp"
elif [[ ! -f $ref ]]; then
echo "NA-Ref"
else
echo $(cut -f1 $hyp | sed 's/<unk>//g' | python -m sacrebleu -m macrof -b $ref)
fi
}
function detokenize {
lang=$1
if [[ $lang == 'hin' ]]; then
python -m indicnlp.tokenize.indic_detokenize
elif [[ $lang == 'lit' ]]; then
sacremoses -l lt detokenize
elif [[ $lang == 'deu' ]]; then
sacremoses -l de detokenize
else
sacremoses detokenize
fi
}
echo "Reporting BLEU and MacroF1 "
delim=${delim:-','}
#delim='\t'
# extract test names automatically
names=$(for i in ${@}; do
[[ -d $(echo $i/test_*) ]] || continue
for j in ${i}/test_*/*.ref ; do
basename $j; done
done | sed 's/.ref$//' | sort | uniq )
names_str=$(echo $names | sed "s/ /$delim/g")
printf "Experiment${delim}BLEU: ${names_str}MacroF1: ${names_str}\n"
for d in ${@}; do
for td in $d/test_*; do
for t in $names; do
hyp=${td}/$t.out.tsv
hyp_detok=${td}/$t.out.detok
ref=${td}/$t.ref
[[ -f $ref ]] || continue
if [[ -f $hyp && ! -f $hyp_detok ]]; then
p=$(readlink $ref)
ext=${p##*.} # longest match from beginning until .
cut -f1 $hyp | detokenize $ext > $hyp_detok.tmp && mv $hyp_detok.tmp $hyp_detok
fi
done
printf "$td"
for t in $names; do
hyp_detok=${td}/$t.out.detok
ref=${td}/$t.ref
score=$(sacre_bleu $hyp_detok $ref)
printf "${delim}${score}"
done
for t in $names; do
hyp_detok=${td}/$t.out.detok
ref=${td}/$t.ref
score=$(macro_f1 $hyp_detok $ref)
printf "${delim}${score}"
done
printf "\n"
done
done