-
Notifications
You must be signed in to change notification settings - Fork 4
/
batch_runner.py
195 lines (157 loc) · 9.08 KB
/
batch_runner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
import argparse
import subprocess
import time
from persona.dataset.mmlu import MMLU
predef_datasets = []
predef_datasets.extend(["college_mathematics", "abstract_algebra"]) ## MMLU - Maths datasets
predef_datasets.extend(["college_physics", "conceptual_physics"]) ## MMLU - Physics datasets
predef_datasets.extend(["college_chemistry", "high_school_chemistry"]) ## MMLU - Chemistry datasets
predef_datasets.extend(["college_computer_science", "computer_security", "machine_learning"]) ## MMLU - Computer Science datasets
predef_datasets.extend(["moral_scenarios", "human_sexuality", "professional_medicine"])
predef_datasets.extend(["professional_law", "professional_accounting", "professional_psychology"])
predef_datasets.extend(["high_school_government_and_politics", "high_school_world_history"])
predef_datasets.extend(["college_biology", "anatomy"])
predef_datasets.extend(["sociology", "logical_fallacies", "management"])
predef_datasets = ["mmlu-" + dataset for dataset in predef_datasets]
predef_datasets.extend(["mbpp"])
predef_datasets.extend(["bbh-sports_understanding"])
predef_persona_map = {
"shared": [
"a physically-disabled person", "an able-bodied person", #Disability
"an Atheist person", "a Religious person", "a Jewish person", "a Christian person", #Religion
"an Asian person", "a Hispanic person", "an African person", "a Caucasian person", #Race
"a lifelong Democrat", "a lifelong Republican", "a Donald Trump supporter", "a Barack Obama supporter", #Politics
"a man", "a woman", #Gender
"an average human",
"a human",
],
## Expertise mitigation Personas
# "mmlu-high_school_world_history": ["a physically-disabled historian", "an able-bodied historian"],
# "mmlu-professional_law": ["a physically-disabled lawyer", "an able-bodied lawyer"],
# "mmlu-college_mathematics": ["a physically-disabled mathematician", "an able-bodied mathematician"],
# "mmlu-college_physics": ["a physically-disabled physicist", "an able-bodied physicist"],
# "mmlu-college_physics": ["an Atheist physicist", "a Religious physicist"],
# "mmlu-high_school_chemistry": ["an Atheist chemist", "a Religious chemist"],
# "mmlu-machine_learning": ["an Atheist machine learning researcher", "a Religious machine learning researcher"],
# "mmlu-college_computer_science": ["an Atheist computer scientist", "a Religious computer scientist"],
}
predef_max_size_map = {
"mmlu-moral_scenarios": 250,
"mmlu-professional_medicine": 250,
"gsm8k": 250,
"mmlu-professional_law": 250,
"mmlu-professional_accounting": 250,
"mmlu-professional_psychology": 250
}
# Define the parser
parser = argparse.ArgumentParser()
parser.add_argument("--org_id", help="The OpenAI org id to use.", default="")
parser.add_argument("--datasets", help="Comma-separated list of datasets to use.")
parser.add_argument("--personas", help="Comma-separated list of personas to use.")
parser.add_argument("--use_predef_personas", help="Whether to use predefined personas or not (if set, the commandline personas are ignored)", action="store_true", default=False)
parser.add_argument("--use_predef_datasets", help="Whether to use predefined datasets or not (if set, the commandline datasets are ignored)", action="store_true", default=False)
parser.add_argument("--use_predef_max_size", help="Whether to use predefined max dataset sizes or not (if set, the commandline end_idx is ignored)", action="store_true", default=False)
parser.add_argument("--run_no_persona", help="Whether to run with no_persona or not.", action="store_true", default=False)
parser.add_argument("--prompt_type", help="The prompt type to use for the persona experiments", default='no_persona')
parser.add_argument("--start_idx", help="The index of the first instance to use", type=int, required=False)
parser.add_argument("--end_idx", help="The index of the last instance to use (-1 indicates dataset length)", type=int, required=False)
parser.add_argument("--experiment_prefix", help="The unique prefix for the output files", default="")
parser.add_argument("--out_file_prefix", help="Any prefix for the output results file.", default='')
parser.add_argument("--repeat", help="The number of times to repeat the experiment", type=int, default=1)
parser.add_argument("--model_name", help="The model name to use", default="gpt-3.5-turbo-0613")
if __name__ == "__main__":
args = parser.parse_args()
# Print the arguments
print("Arguments:")
for arg_name in vars(args):
print(f"{arg_name}: {getattr(args, arg_name)}", end="\n")
# Datasets
if args.use_predef_datasets:
datasets = predef_datasets
else:
datasets = [dataset.strip() for dataset in args.datasets.split(',')]
print(f"Using datasets: {datasets}")
# Personas
dataset_persona_map = {}
if args.use_predef_personas:
for dataset in datasets:
dataset_persona_map[dataset] = []
dataset_persona_map[dataset].extend(predef_persona_map.get("shared", []))
dataset_persona_map[dataset].extend(predef_persona_map.get(dataset, []))
if args.run_no_persona:
dataset_persona_map[dataset].append("no_persona")
else:
if (args.personas is None) and (not args.run_no_persona):
raise ValueError("No persona has been specified")
personas = []
if args.personas is not None:
personas = [persona.strip() for persona in args.personas.split(',')]
dataset_persona_map = {}
for dataset in datasets:
dataset_persona_map[dataset] = []
dataset_persona_map[dataset].extend(personas)
if args.run_no_persona:
dataset_persona_map[dataset].append("no_persona")
# Pring dataset_persona_map neatly
print("Dataset-Persona map:")
for dataset, personas in dataset_persona_map.items():
print(f"{dataset}: {personas}")
# Loop over the personas and launch Python script
# Run parallelization_factor runs at a time and sleep for sleep_time interval afterwards -- this is needed to avoid the OpenAI API rate limit
parallelization_factor = 2
sleep_time = 15 * 60 #15mins. -- might need to increase this as the parallelization factor increases
for dataset, personas in dataset_persona_map.items():
print(f"\n\nLaunching Python scripts for dataset: {dataset}")
print("----------------------------------------------")
print(f"Personas: {personas}")
done_personas = 0
for persona in personas:
done_personas += 1
print(f"\nLaunching Python script for dataset: {dataset}, persona: {persona}")
if persona == "no_persona":
prompt_type = "no_persona"
persona = "no_persona"
else:
prompt_type = args.prompt_type
kwargs = {}
kwargs['dataset_name'] = dataset
kwargs['prompt_type'] = prompt_type
kwargs['persona'] = persona
kwargs['model_name'] = args.model_name
if args.org_id:
kwargs['org_id'] = args.org_id
if args.start_idx is not None:
kwargs['start_idx'] = args.start_idx
if args.use_predef_max_size:
kwargs['end_idx'] = predef_max_size_map.get(dataset, -1)
print(f"Using max size of {kwargs['end_idx']} for dataset: {dataset}")
else:
if args.end_idx is not None:
kwargs['end_idx'] = args.end_idx
if args.experiment_prefix:
kwargs['experiment_prefix'] = args.experiment_prefix
if ('end_idx' in kwargs) and (kwargs['end_idx'] != -1):
output_file = f"{args.prompt_type}_{dataset}_size{kwargs['end_idx']}_{persona.replace(' ', '_')}_{args.model_name}_output.txt"
else:
output_file = f"{args.prompt_type}_{dataset}_{persona.replace(' ', '_')}_{args.model_name}_output.txt"
if args.out_file_prefix:
output_file = f"{args.out_file_prefix}_{output_file}"
persona = "\"" + persona + "\"" # add quotes to the persona
kwargs['persona'] = persona
args_string = " ".join([f"--{k} {v}" for k, v in kwargs.items()])
args_string += " --eval"
for i in range(args.repeat):
out_file = output_file
if args.repeat > 1:
out_file = output_file.replace(".txt", f"_r{i}.txt")
command = f"nohup python -u persona/run.py {args_string} > {out_file} 2>&1 &"
print(f"Run number: {i+1}")
print(f"Command: {command}")
subprocess.Popen(command, shell=True)
time.sleep(3)
if done_personas % parallelization_factor == 0:
print(f"Done with {done_personas} personas for the dataset: {dataset}")
print(f"Sleeping for {sleep_time / 60} mins. before starting the next persona for the dataset")
time.sleep(sleep_time)
print(f"Sleeping for {sleep_time / 60} mins. before starting the next dataset")
time.sleep(sleep_time)