From b984a02932a3a39699f69239187c1e1307478e93 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Thu, 26 Oct 2023 16:13:54 +0200 Subject: [PATCH 01/69] first commit --- .../text_generation/gpt/run.py | 44 +++++++++++++++++++ .../text_generation/gpt/run_example3.py | 23 ++++++++++ 2 files changed, 67 insertions(+) create mode 100644 natural_language_processing/text_generation/gpt/run.py create mode 100644 natural_language_processing/text_generation/gpt/run_example3.py diff --git a/natural_language_processing/text_generation/gpt/run.py b/natural_language_processing/text_generation/gpt/run.py new file mode 100644 index 00000000..c2d7d4a6 --- /dev/null +++ b/natural_language_processing/text_generation/gpt/run.py @@ -0,0 +1,44 @@ +import torch +from transformers import GPT2Tokenizer, GPT2LMHeadModel + +from utils.benchmark import run_model +from utils.nlp.lambada import Lambada + + +def run_pytorch_fp32(model_name, batch_size, num_runs, timeout, lambada_path, **kwargs): + from utils.pytorch import PyTorchRunner + + def run_single_pass(pytorch_runner, lambada): + start_ids = lambada.get_input_array()[0] + output = pytorch_runner.run(batch_size, start_ids, num_beams=2, no_repeat_ngram_size=2, + early_stopping=True, max_new_tokens=5) + output = detokenize(output[0]) + + for i in range(batch_size): + first_new_word = output.replace(detokenize(start_ids[0]), '').split()[0] + lambada.submit_prediction(i, first_new_word) + + tokenizer = GPT2Tokenizer.from_pretrained(model_name) + + def detokenize(answer): + return tokenizer.decode(answer, skip_special_tokens=True) + + def tokenize(text): + return tokenizer.encode(text, return_tensors='pt') + + model = GPT2LMHeadModel.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id) + dataset = Lambada(batch_size, tokenize, detokenize, lambada_path) + runner = PyTorchRunner(model, disable_jit_freeze=True, func="generate") + + return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) + + +if __name__ == "__main__": + from utils.helpers import DefaultArgParser + + gpt_variants = ["gpt2"] + parser = DefaultArgParser(["pytorch"]) + parser.require_model_name(gpt_variants) + parser.ask_for_batch_size() + parser.add_argument('--lambada_path', type=str, required=True, help="Path to Lambada dataset") + run_pytorch_fp32(**vars(parser.parse())) diff --git a/natural_language_processing/text_generation/gpt/run_example3.py b/natural_language_processing/text_generation/gpt/run_example3.py new file mode 100644 index 00000000..62de48c0 --- /dev/null +++ b/natural_language_processing/text_generation/gpt/run_example3.py @@ -0,0 +1,23 @@ +from transformers import GPT2LMHeadModel, GPT2Tokenizer + +tokenizer = GPT2Tokenizer.from_pretrained('gpt2') +model = GPT2LMHeadModel.from_pretrained('gpt2', pad_token_id=tokenizer.eos_token_id) + +print(tokenizer.decode(tokenizer.eos_token_id)) + +sentence = "hey, how are you?" +input_ids = tokenizer.encode(sentence, return_tensors='pt') +print(tokenizer.decode(input_ids[0], skip_special_tokens=True)) +quit() +# +# print(input_ids) +# print(type(input_ids)) +# quit() + +print('1') +output = model.generate(input_ids, max_length=30, num_beams=2, no_repeat_ngram_size=2, early_stopping=True) + +print(output) +print(type(output)) + +print(tokenizer.decode(output[0], skip_special_tokens=True)) From c78ce12344d871e62214b8d266a6f5e2b48e1e50 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Mon, 30 Oct 2023 14:20:19 +0100 Subject: [PATCH 02/69] wip --- natural_language_processing/text_generation/gpt/run.py | 4 +++- utils/benchmark.py | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/natural_language_processing/text_generation/gpt/run.py b/natural_language_processing/text_generation/gpt/run.py index c2d7d4a6..9cec89ad 100644 --- a/natural_language_processing/text_generation/gpt/run.py +++ b/natural_language_processing/text_generation/gpt/run.py @@ -1,4 +1,3 @@ -import torch from transformers import GPT2Tokenizer, GPT2LMHeadModel from utils.benchmark import run_model @@ -12,6 +11,9 @@ def run_single_pass(pytorch_runner, lambada): start_ids = lambada.get_input_array()[0] output = pytorch_runner.run(batch_size, start_ids, num_beams=2, no_repeat_ngram_size=2, early_stopping=True, max_new_tokens=5) + print(output.shape[1]) + print(start_ids.shape[1]) + pytorch_runner.set_task_size(output.shape[1] - start_ids.shape[1]) output = detokenize(output[0]) for i in range(batch_size): diff --git a/utils/benchmark.py b/utils/benchmark.py index b6f61f71..280ba8b6 100644 --- a/utils/benchmark.py +++ b/utils/benchmark.py @@ -124,6 +124,10 @@ def set_task_size(self, new_task_size): """ if new_task_size is None: return + + print(len(self._finish_times)) + print(len(self._workload_size)) + print(len(self._finish_times) - len(self._workload_size)) assert len(self._finish_times) - len(self._workload_size) in [1, 0] self._workload_size.append(new_task_size) From 065d7de97bb47fe76fbff0f879b7d1d8968e15c5 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Mon, 30 Oct 2023 15:00:40 +0100 Subject: [PATCH 03/69] wip --- natural_language_processing/text_generation/gpt/run.py | 4 +--- utils/benchmark.py | 3 --- utils/pytorch.py | 2 +- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/natural_language_processing/text_generation/gpt/run.py b/natural_language_processing/text_generation/gpt/run.py index 9cec89ad..17c51964 100644 --- a/natural_language_processing/text_generation/gpt/run.py +++ b/natural_language_processing/text_generation/gpt/run.py @@ -9,10 +9,8 @@ def run_pytorch_fp32(model_name, batch_size, num_runs, timeout, lambada_path, ** def run_single_pass(pytorch_runner, lambada): start_ids = lambada.get_input_array()[0] - output = pytorch_runner.run(batch_size, start_ids, num_beams=2, no_repeat_ngram_size=2, + output = pytorch_runner.run(None, start_ids, num_beams=2, no_repeat_ngram_size=2, early_stopping=True, max_new_tokens=5) - print(output.shape[1]) - print(start_ids.shape[1]) pytorch_runner.set_task_size(output.shape[1] - start_ids.shape[1]) output = detokenize(output[0]) diff --git a/utils/benchmark.py b/utils/benchmark.py index 280ba8b6..1895cc03 100644 --- a/utils/benchmark.py +++ b/utils/benchmark.py @@ -125,9 +125,6 @@ def set_task_size(self, new_task_size): if new_task_size is None: return - print(len(self._finish_times)) - print(len(self._workload_size)) - print(len(self._finish_times) - len(self._workload_size)) assert len(self._finish_times) - len(self._workload_size) in [1, 0] self._workload_size.append(new_task_size) diff --git a/utils/pytorch.py b/utils/pytorch.py index 8ed76cdc..aee09e5f 100644 --- a/utils/pytorch.py +++ b/utils/pytorch.py @@ -92,7 +92,7 @@ def runner_func(): self._start_times.append(start) self._finish_times.append(finish) - self._workload_size.append(task_size) + self.set_task_size(task_size) self._times_invoked += 1 return output From 66ec296ac36a7c95ffa61783baff4785dc3dfce1 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Mon, 6 Nov 2023 13:37:46 +0100 Subject: [PATCH 04/69] first commit --- .../text_generation/gpt-j/run.py | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 natural_language_processing/text_generation/gpt-j/run.py diff --git a/natural_language_processing/text_generation/gpt-j/run.py b/natural_language_processing/text_generation/gpt-j/run.py new file mode 100644 index 00000000..4069f4e8 --- /dev/null +++ b/natural_language_processing/text_generation/gpt-j/run.py @@ -0,0 +1,25 @@ +from transformers import AutoTokenizer, AutoModelForCausalLM +from transformers import GPT2LMHeadModel, GPT2Tokenizer + +tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B") +model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B") +# model = GPT2LMHeadModel.from_pretrained('gpt2', pad_token_id=tokenizer.eos_token_id) + +print(tokenizer.decode(tokenizer.eos_token_id)) + +sentence = "hey, how are you?" +input_ids = tokenizer.encode(sentence, return_tensors='pt') +print(tokenizer.decode(input_ids[0], skip_special_tokens=True)) +quit() +# +# print(input_ids) +# print(type(input_ids)) +# quit() + +print('1') +output = model.generate(input_ids, max_length=30, num_beams=2, no_repeat_ngram_size=2, early_stopping=True) + +print(output) +print(type(output)) + +print(tokenizer.decode(output[0], skip_special_tokens=True)) \ No newline at end of file From 53500702fc7e867d9f5019647ab17dab9050a871 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Mon, 6 Nov 2023 14:18:01 +0100 Subject: [PATCH 05/69] wip --- .../text_generation/gpt-j/run.py | 57 +++++++++++++------ .../text_generation/gpt-j/run_example.py | 24 ++++++++ 2 files changed, 63 insertions(+), 18 deletions(-) create mode 100644 natural_language_processing/text_generation/gpt-j/run_example.py diff --git a/natural_language_processing/text_generation/gpt-j/run.py b/natural_language_processing/text_generation/gpt-j/run.py index 4069f4e8..2eba88a8 100644 --- a/natural_language_processing/text_generation/gpt-j/run.py +++ b/natural_language_processing/text_generation/gpt-j/run.py @@ -1,25 +1,46 @@ from transformers import AutoTokenizer, AutoModelForCausalLM -from transformers import GPT2LMHeadModel, GPT2Tokenizer -tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B") -model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B") -# model = GPT2LMHeadModel.from_pretrained('gpt2', pad_token_id=tokenizer.eos_token_id) +from utils.benchmark import run_model +from utils.nlp.lambada import Lambada -print(tokenizer.decode(tokenizer.eos_token_id)) -sentence = "hey, how are you?" -input_ids = tokenizer.encode(sentence, return_tensors='pt') -print(tokenizer.decode(input_ids[0], skip_special_tokens=True)) -quit() -# -# print(input_ids) -# print(type(input_ids)) -# quit() +def run_pytorch_fp32(model_name, batch_size, num_runs, timeout, lambada_path, **kwargs): + from utils.pytorch import PyTorchRunner -print('1') -output = model.generate(input_ids, max_length=30, num_beams=2, no_repeat_ngram_size=2, early_stopping=True) + def run_single_pass(pytorch_runner, lambada): + start_ids = lambada.get_input_array()[0] + output = pytorch_runner.run(None, start_ids, num_beams=2, no_repeat_ngram_size=2, + early_stopping=True, max_new_tokens=5) + pytorch_runner.set_task_size(output.shape[1] - start_ids.shape[1]) + output = detokenize(output[0]) -print(output) -print(type(output)) + for i in range(batch_size): + first_new_word = output.replace(detokenize(start_ids[0]), '').split()[0] + lambada.submit_prediction(i, first_new_word) -print(tokenizer.decode(output[0], skip_special_tokens=True)) \ No newline at end of file + # tokenizer = GPT2Tokenizer.from_pretrained(model_name) + tokenizer = AutoTokenizer.from_pretrained(model_name) + + def detokenize(answer): + return tokenizer.decode(answer, skip_special_tokens=True) + + def tokenize(text): + return tokenizer.encode(text, return_tensors='pt') + + # model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B") + model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id) + dataset = Lambada(batch_size, tokenize, detokenize, lambada_path) + runner = PyTorchRunner(model, disable_jit_freeze=True, func="generate") + + return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) + + +if __name__ == "__main__": + from utils.helpers import DefaultArgParser + + gpt_variants = ["EleutherAI/gpt-j-6B"] + parser = DefaultArgParser(["pytorch"]) + parser.require_model_name(gpt_variants) + parser.ask_for_batch_size() + parser.add_argument('--lambada_path', type=str, required=True, help="Path to Lambada dataset") + run_pytorch_fp32(**vars(parser.parse())) diff --git a/natural_language_processing/text_generation/gpt-j/run_example.py b/natural_language_processing/text_generation/gpt-j/run_example.py new file mode 100644 index 00000000..5dd046f8 --- /dev/null +++ b/natural_language_processing/text_generation/gpt-j/run_example.py @@ -0,0 +1,24 @@ +from transformers import AutoTokenizer, AutoModelForCausalLM +from transformers import GPT2LMHeadModel, GPT2Tokenizer + +tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B") +model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B") +# model = GPT2LMHeadModel.from_pretrained('gpt2', pad_token_id=tokenizer.eos_token_id) + +print(tokenizer.decode(tokenizer.eos_token_id)) + +sentence = "hey, how are you?" +input_ids = tokenizer.encode(sentence, return_tensors='pt') +print(tokenizer.decode(input_ids[0], skip_special_tokens=True)) +# +# print(input_ids) +# print(type(input_ids)) +# quit() + +print('1') +output = model.generate(input_ids, max_length=30, num_beams=2, no_repeat_ngram_size=2, early_stopping=True) + +print(output) +print(type(output)) + +print(tokenizer.decode(output[0], skip_special_tokens=True)) \ No newline at end of file From 840b7af42cbad12d96b51a259022886005a64ba4 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Tue, 7 Nov 2023 14:02:40 +0100 Subject: [PATCH 06/69] wip --- natural_language_processing/text_generation/gpt-j/run.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/natural_language_processing/text_generation/gpt-j/run.py b/natural_language_processing/text_generation/gpt-j/run.py index 2eba88a8..093ef5cf 100644 --- a/natural_language_processing/text_generation/gpt-j/run.py +++ b/natural_language_processing/text_generation/gpt-j/run.py @@ -18,7 +18,6 @@ def run_single_pass(pytorch_runner, lambada): first_new_word = output.replace(detokenize(start_ids[0]), '').split()[0] lambada.submit_prediction(i, first_new_word) - # tokenizer = GPT2Tokenizer.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) def detokenize(answer): @@ -27,7 +26,6 @@ def detokenize(answer): def tokenize(text): return tokenizer.encode(text, return_tensors='pt') - # model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B") model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id) dataset = Lambada(batch_size, tokenize, detokenize, lambada_path) runner = PyTorchRunner(model, disable_jit_freeze=True, func="generate") From d5cbb4a5afadb2d0746e64ac196ec39b1211f1dd Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Tue, 7 Nov 2023 14:52:14 +0100 Subject: [PATCH 07/69] wip --- natural_language_processing/text_generation/gpt-j/run.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/natural_language_processing/text_generation/gpt-j/run.py b/natural_language_processing/text_generation/gpt-j/run.py index 093ef5cf..92260ac9 100644 --- a/natural_language_processing/text_generation/gpt-j/run.py +++ b/natural_language_processing/text_generation/gpt-j/run.py @@ -5,7 +5,7 @@ def run_pytorch_fp32(model_name, batch_size, num_runs, timeout, lambada_path, **kwargs): - from utils.pytorch import PyTorchRunner + from utils.pytorch import PyTorchRunner, PyTorchRunnerV2 def run_single_pass(pytorch_runner, lambada): start_ids = lambada.get_input_array()[0] @@ -28,7 +28,9 @@ def tokenize(text): model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id) dataset = Lambada(batch_size, tokenize, detokenize, lambada_path) - runner = PyTorchRunner(model, disable_jit_freeze=True, func="generate") + # runner = PyTorchRunner(model, disable_jit_freeze=True, example_inputs=None, func="generate") + # runner = PyTorchRunner(model, disable_jit_freeze=True, func="generate") + runner = PyTorchRunnerV2(model) return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) From 947af870f31678ee870c3dec412902b561bcf50d Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Tue, 7 Nov 2023 14:58:57 +0100 Subject: [PATCH 08/69] wip --- natural_language_processing/text_generation/gpt-j/run.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/natural_language_processing/text_generation/gpt-j/run.py b/natural_language_processing/text_generation/gpt-j/run.py index 92260ac9..3aa64aa9 100644 --- a/natural_language_processing/text_generation/gpt-j/run.py +++ b/natural_language_processing/text_generation/gpt-j/run.py @@ -28,9 +28,10 @@ def tokenize(text): model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id) dataset = Lambada(batch_size, tokenize, detokenize, lambada_path) - # runner = PyTorchRunner(model, disable_jit_freeze=True, example_inputs=None, func="generate") - # runner = PyTorchRunner(model, disable_jit_freeze=True, func="generate") - runner = PyTorchRunnerV2(model) + runner = PyTorchRunner(model, disable_jit_freeze=True, + example_inputs=dataset.get_input_array()[0], func="generate") + # runner = PyTorchRunner(model, disable_jit_freeze=False, func="generate") + # runner = PyTorchRunnerV2(model) return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) From 79920f8292a76b634aaba648538ddaefc7eaedc0 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Tue, 7 Nov 2023 15:03:29 +0100 Subject: [PATCH 09/69] wip --- natural_language_processing/text_generation/gpt-j/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/natural_language_processing/text_generation/gpt-j/run.py b/natural_language_processing/text_generation/gpt-j/run.py index 3aa64aa9..f8f0a4ce 100644 --- a/natural_language_processing/text_generation/gpt-j/run.py +++ b/natural_language_processing/text_generation/gpt-j/run.py @@ -28,7 +28,7 @@ def tokenize(text): model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id) dataset = Lambada(batch_size, tokenize, detokenize, lambada_path) - runner = PyTorchRunner(model, disable_jit_freeze=True, + runner = PyTorchRunner(model, disable_jit_freeze=False, example_inputs=dataset.get_input_array()[0], func="generate") # runner = PyTorchRunner(model, disable_jit_freeze=False, func="generate") # runner = PyTorchRunnerV2(model) From 0acdbba858a0e96dea15657ad03779fc3d5f2970 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Tue, 7 Nov 2023 15:49:40 +0100 Subject: [PATCH 10/69] wip --- natural_language_processing/text_generation/gpt-j/run.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/natural_language_processing/text_generation/gpt-j/run.py b/natural_language_processing/text_generation/gpt-j/run.py index f8f0a4ce..017ed512 100644 --- a/natural_language_processing/text_generation/gpt-j/run.py +++ b/natural_language_processing/text_generation/gpt-j/run.py @@ -28,8 +28,9 @@ def tokenize(text): model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id) dataset = Lambada(batch_size, tokenize, detokenize, lambada_path) - runner = PyTorchRunner(model, disable_jit_freeze=False, - example_inputs=dataset.get_input_array()[0], func="generate") + runner = PyTorchRunner(model, skip_script=True, disable_jit_freeze=False, + example_inputs=(dataset.get_input_array()[0],), func="generate") + # runner = PyTorchRunner(model, disable_jit_freeze=False, func="generate") # runner = PyTorchRunnerV2(model) From 5dfaac524788da0dc89f60f58ab2271618367e3f Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Tue, 7 Nov 2023 16:03:31 +0100 Subject: [PATCH 11/69] wip --- .../text_generation/gpt-j/run1.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 natural_language_processing/text_generation/gpt-j/run1.py diff --git a/natural_language_processing/text_generation/gpt-j/run1.py b/natural_language_processing/text_generation/gpt-j/run1.py new file mode 100644 index 00000000..9b24754e --- /dev/null +++ b/natural_language_processing/text_generation/gpt-j/run1.py @@ -0,0 +1,19 @@ +import torch +from transformers import AutoModelForCausalLM, AutoTokenizer + + +tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B") +# model = GPT2Model.from_pretrained(model_name, torchscript=True) + +model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", pad_token_id=tokenizer.eos_token_id, torchscript=True) +text = "Hi, how are you?" +encoded_input = tokenizer(text, return_tensors='pt') +input_dict = {key: value for key, value in encoded_input.items()} + + +traced_model = torch.jit.trace(model, (input_dict['input_ids'],)) +#traced_model = torch.jit.trace(model, (encoded_input,)) +frozen_model = torch.jit.freeze(traced_model) + +#output = frozen_model(**encoded_input) +output = frozen_model(input_dict['input_ids']) From 3cc3c7b8c9f22530084aa1931c6bfacfa7f1ddc5 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Tue, 7 Nov 2023 16:24:34 +0100 Subject: [PATCH 12/69] wip --- natural_language_processing/text_generation/gpt-j/run.py | 5 +++++ natural_language_processing/text_generation/gpt-j/run1.py | 4 +++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/natural_language_processing/text_generation/gpt-j/run.py b/natural_language_processing/text_generation/gpt-j/run.py index 017ed512..2b4a282c 100644 --- a/natural_language_processing/text_generation/gpt-j/run.py +++ b/natural_language_processing/text_generation/gpt-j/run.py @@ -28,6 +28,11 @@ def tokenize(text): model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id) dataset = Lambada(batch_size, tokenize, detokenize, lambada_path) + encoded_in = dataset.get_input_array()[0] + print(encoded_in) + input_dict = {key: value for key, value in encoded_in.items()} + print(input_dict) + quit() runner = PyTorchRunner(model, skip_script=True, disable_jit_freeze=False, example_inputs=(dataset.get_input_array()[0],), func="generate") diff --git a/natural_language_processing/text_generation/gpt-j/run1.py b/natural_language_processing/text_generation/gpt-j/run1.py index 9b24754e..ee3c5bf8 100644 --- a/natural_language_processing/text_generation/gpt-j/run1.py +++ b/natural_language_processing/text_generation/gpt-j/run1.py @@ -8,8 +8,10 @@ model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", pad_token_id=tokenizer.eos_token_id, torchscript=True) text = "Hi, how are you?" encoded_input = tokenizer(text, return_tensors='pt') +print(encoded_input) input_dict = {key: value for key, value in encoded_input.items()} - +print(input_dict) +quit() traced_model = torch.jit.trace(model, (input_dict['input_ids'],)) #traced_model = torch.jit.trace(model, (encoded_input,)) From 93b256e38e925b995a4daf3f1c9d13c0b74d9410 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Tue, 7 Nov 2023 18:26:31 +0100 Subject: [PATCH 13/69] wip --- .../text_generation/gpt-j/run.py | 14 ++++---------- .../text_generation/gpt-j/run1.py | 2 ++ 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/natural_language_processing/text_generation/gpt-j/run.py b/natural_language_processing/text_generation/gpt-j/run.py index 2b4a282c..dbd89ea9 100644 --- a/natural_language_processing/text_generation/gpt-j/run.py +++ b/natural_language_processing/text_generation/gpt-j/run.py @@ -9,9 +9,8 @@ def run_pytorch_fp32(model_name, batch_size, num_runs, timeout, lambada_path, ** def run_single_pass(pytorch_runner, lambada): start_ids = lambada.get_input_array()[0] - output = pytorch_runner.run(None, start_ids, num_beams=2, no_repeat_ngram_size=2, - early_stopping=True, max_new_tokens=5) - pytorch_runner.set_task_size(output.shape[1] - start_ids.shape[1]) + output = pytorch_runner.run(None, start_ids) + # pytorch_runner.set_task_size(output.shape[1] - start_ids.shape[1]) output = detokenize(output[0]) for i in range(batch_size): @@ -26,15 +25,10 @@ def detokenize(answer): def tokenize(text): return tokenizer.encode(text, return_tensors='pt') - model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id) + model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id, torchscript=True) dataset = Lambada(batch_size, tokenize, detokenize, lambada_path) - encoded_in = dataset.get_input_array()[0] - print(encoded_in) - input_dict = {key: value for key, value in encoded_in.items()} - print(input_dict) - quit() runner = PyTorchRunner(model, skip_script=True, disable_jit_freeze=False, - example_inputs=(dataset.get_input_array()[0],), func="generate") + example_inputs=(dataset.get_input_array()[0],)) # runner = PyTorchRunner(model, disable_jit_freeze=False, func="generate") # runner = PyTorchRunnerV2(model) diff --git a/natural_language_processing/text_generation/gpt-j/run1.py b/natural_language_processing/text_generation/gpt-j/run1.py index ee3c5bf8..daff52e5 100644 --- a/natural_language_processing/text_generation/gpt-j/run1.py +++ b/natural_language_processing/text_generation/gpt-j/run1.py @@ -19,3 +19,5 @@ #output = frozen_model(**encoded_input) output = frozen_model(input_dict['input_ids']) + +tokenizer.decode(output, skip_special_tokens=True) From ec39eb589e35680e8f9aac0cb254980e17ac76ef Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 8 Nov 2023 10:33:02 +0100 Subject: [PATCH 14/69] wip --- .../text_generation/gpt-j/run2.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 natural_language_processing/text_generation/gpt-j/run2.py diff --git a/natural_language_processing/text_generation/gpt-j/run2.py b/natural_language_processing/text_generation/gpt-j/run2.py new file mode 100644 index 00000000..8ba59211 --- /dev/null +++ b/natural_language_processing/text_generation/gpt-j/run2.py @@ -0,0 +1,16 @@ +import torch +from transformers import AutoModelForCausalLM, AutoTokenizer + + +tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B") +# model = GPT2Model.from_pretrained(model_name, torchscript=True) + +model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", pad_token_id=tokenizer.eos_token_id) +text = "Hi, how are you?" +encoded_input = tokenizer.encode(text, return_tensors='pt') +output = model.generate(encoded_input, max_length=30, num_beams=2, no_repeat_ngram_size=2, early_stopping=True) + +print(output) +print(type(output)) + +print(tokenizer.decode(output[0], skip_special_tokens=True)) \ No newline at end of file From d4da8db9034c375617c94370cc3c91fde89d1452 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 8 Nov 2023 11:21:34 +0100 Subject: [PATCH 15/69] wip --- natural_language_processing/text_generation/gpt-j/run2.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/natural_language_processing/text_generation/gpt-j/run2.py b/natural_language_processing/text_generation/gpt-j/run2.py index 8ba59211..3be15636 100644 --- a/natural_language_processing/text_generation/gpt-j/run2.py +++ b/natural_language_processing/text_generation/gpt-j/run2.py @@ -8,6 +8,8 @@ model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", pad_token_id=tokenizer.eos_token_id) text = "Hi, how are you?" encoded_input = tokenizer.encode(text, return_tensors='pt') +model.eval() +torch.jit.script(model) output = model.generate(encoded_input, max_length=30, num_beams=2, no_repeat_ngram_size=2, early_stopping=True) print(output) From 2bb78f1955fc1a153a63c5260458f124849b525e Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 8 Nov 2023 11:22:21 +0100 Subject: [PATCH 16/69] wip --- .../text_generation/gpt-j/run-v2.py | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 natural_language_processing/text_generation/gpt-j/run-v2.py diff --git a/natural_language_processing/text_generation/gpt-j/run-v2.py b/natural_language_processing/text_generation/gpt-j/run-v2.py new file mode 100644 index 00000000..d7d95310 --- /dev/null +++ b/natural_language_processing/text_generation/gpt-j/run-v2.py @@ -0,0 +1,51 @@ +from transformers import AutoTokenizer, AutoModelForCausalLM + +from utils.benchmark import run_model +from utils.nlp.lambada import Lambada + + +def run_pytorch_fp32(model_name, batch_size, num_runs, timeout, lambada_path, **kwargs): + from utils.pytorch import PyTorchRunner, PyTorchRunnerV2, apply_jit_script + + def run_single_pass(pytorch_runner, lambada): + start_ids = lambada.get_input_array()[0] + output = pytorch_runner.run(None, start_ids) + # pytorch_runner.set_task_size(output.shape[1] - start_ids.shape[1]) + quit() + output = detokenize(output[0]) + + for i in range(batch_size): + first_new_word = output.replace(detokenize(start_ids[0]), '').split()[0] + lambada.submit_prediction(i, first_new_word) + + tokenizer = AutoTokenizer.from_pretrained(model_name) + + def detokenize(answer): + return tokenizer.decode(answer, skip_special_tokens=True) + + def tokenize(text): + return tokenizer.encode(text, return_tensors='pt') + + model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id, torchscript=True) + dataset = Lambada(batch_size, tokenize, detokenize, lambada_path) + model.eval() + model = apply_jit_script(model) + + runner = PyTorchRunnerV2(model.generate) + + + # runner = PyTorchRunner(model, disable_jit_freeze=False, func="generate") + # runner = PyTorchRunnerV2(model) + + return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) + + +if __name__ == "__main__": + from utils.helpers import DefaultArgParser + + gpt_variants = ["EleutherAI/gpt-j-6B"] + parser = DefaultArgParser(["pytorch"]) + parser.require_model_name(gpt_variants) + parser.ask_for_batch_size() + parser.add_argument('--lambada_path', type=str, required=True, help="Path to Lambada dataset") + run_pytorch_fp32(**vars(parser.parse())) From 3cee9a4ac11f0b9dba5494d7025d35905735cd4f Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 8 Nov 2023 11:26:11 +0100 Subject: [PATCH 17/69] wip --- natural_language_processing/text_generation/gpt-j/run2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/natural_language_processing/text_generation/gpt-j/run2.py b/natural_language_processing/text_generation/gpt-j/run2.py index 3be15636..d96b9440 100644 --- a/natural_language_processing/text_generation/gpt-j/run2.py +++ b/natural_language_processing/text_generation/gpt-j/run2.py @@ -5,7 +5,7 @@ tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B") # model = GPT2Model.from_pretrained(model_name, torchscript=True) -model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", pad_token_id=tokenizer.eos_token_id) +model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", pad_token_id=tokenizer.eos_token_id, torchscript=True) text = "Hi, how are you?" encoded_input = tokenizer.encode(text, return_tensors='pt') model.eval() From bd4eb26f09cb17366c0cc571be86c01d71c51c54 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 8 Nov 2023 11:30:49 +0100 Subject: [PATCH 18/69] wip --- natural_language_processing/text_generation/gpt-j/run-v2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/natural_language_processing/text_generation/gpt-j/run-v2.py b/natural_language_processing/text_generation/gpt-j/run-v2.py index d7d95310..8859e894 100644 --- a/natural_language_processing/text_generation/gpt-j/run-v2.py +++ b/natural_language_processing/text_generation/gpt-j/run-v2.py @@ -5,7 +5,7 @@ def run_pytorch_fp32(model_name, batch_size, num_runs, timeout, lambada_path, **kwargs): - from utils.pytorch import PyTorchRunner, PyTorchRunnerV2, apply_jit_script + from utils.pytorch import PyTorchRunner, PyTorchRunnerV2, apply_jit_script, apply_jit_trace def run_single_pass(pytorch_runner, lambada): start_ids = lambada.get_input_array()[0] @@ -29,7 +29,7 @@ def tokenize(text): model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id, torchscript=True) dataset = Lambada(batch_size, tokenize, detokenize, lambada_path) model.eval() - model = apply_jit_script(model) + model = apply_jit_trace(model, (dataset.get_input_array()[0],)) runner = PyTorchRunnerV2(model.generate) From 29e607f5124d963515d434b7321d5be8e490c22f Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 8 Nov 2023 11:42:53 +0100 Subject: [PATCH 19/69] wip --- .../text_generation/gpt-j/run-v2.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/natural_language_processing/text_generation/gpt-j/run-v2.py b/natural_language_processing/text_generation/gpt-j/run-v2.py index 8859e894..2f44456f 100644 --- a/natural_language_processing/text_generation/gpt-j/run-v2.py +++ b/natural_language_processing/text_generation/gpt-j/run-v2.py @@ -1,3 +1,4 @@ +import torch from transformers import AutoTokenizer, AutoModelForCausalLM from utils.benchmark import run_model @@ -29,9 +30,11 @@ def tokenize(text): model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id, torchscript=True) dataset = Lambada(batch_size, tokenize, detokenize, lambada_path) model.eval() - model = apply_jit_trace(model, (dataset.get_input_array()[0],)) + # model = apply_jit_trace(model, (dataset.get_input_array()[0],)) + model = apply_jit_trace(model, torch.randint(10000, (5,))) + model = apply_jit_script(model) - runner = PyTorchRunnerV2(model.generate) + runner = PyTorchRunnerV2(model) # runner = PyTorchRunner(model, disable_jit_freeze=False, func="generate") From 391b7055a1b27f0df2452ed1d8304cafa1657e00 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 8 Nov 2023 11:50:02 +0100 Subject: [PATCH 20/69] wip --- .../text_generation/gpt-j/run2.py | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/natural_language_processing/text_generation/gpt-j/run2.py b/natural_language_processing/text_generation/gpt-j/run2.py index d96b9440..22e57782 100644 --- a/natural_language_processing/text_generation/gpt-j/run2.py +++ b/natural_language_processing/text_generation/gpt-j/run2.py @@ -5,14 +5,21 @@ tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B") # model = GPT2Model.from_pretrained(model_name, torchscript=True) -model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", pad_token_id=tokenizer.eos_token_id, torchscript=True) +model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", torchscript=True).eval() text = "Hi, how are you?" encoded_input = tokenizer.encode(text, return_tensors='pt') -model.eval() -torch.jit.script(model) -output = model.generate(encoded_input, max_length=30, num_beams=2, no_repeat_ngram_size=2, early_stopping=True) -print(output) -print(type(output)) +model = torch.jit.trace(model, torch.randint(10000, (5,))) +scripted_model = torch.jit.script(model) +# output = model.generate(encoded_input, max_length=30, num_beams=2, no_repeat_ngram_size=2, early_stopping=True) +output = scripted_model(encoded_input) +# torch_out = scripted_model(context) +generated_text_torch = tokenizer.decode(output) -print(tokenizer.decode(output[0], skip_special_tokens=True)) \ No newline at end of file +# print("Fragment: {}".format(sentence_fragment)) +print("Completed: {}".format(generated_text_torch)) + +# print(output) +# print(type(output)) +# +# print(tokenizer.decode(output[0], skip_special_tokens=True)) \ No newline at end of file From 2c7f8c962703cf88b41fe04fd73f54c6174e21ae Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 8 Nov 2023 11:53:32 +0100 Subject: [PATCH 21/69] wip --- natural_language_processing/text_generation/gpt-j/run2.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/natural_language_processing/text_generation/gpt-j/run2.py b/natural_language_processing/text_generation/gpt-j/run2.py index 22e57782..df6730cd 100644 --- a/natural_language_processing/text_generation/gpt-j/run2.py +++ b/natural_language_processing/text_generation/gpt-j/run2.py @@ -14,6 +14,10 @@ # output = model.generate(encoded_input, max_length=30, num_beams=2, no_repeat_ngram_size=2, early_stopping=True) output = scripted_model(encoded_input) # torch_out = scripted_model(context) + +print(output) +print(type(output)) +print(len(output)) generated_text_torch = tokenizer.decode(output) # print("Fragment: {}".format(sentence_fragment)) From da03dd802016efad8a7e2a1bc93ca9d9f3806cbb Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 8 Nov 2023 12:04:32 +0100 Subject: [PATCH 22/69] wip --- .../text_generation/gpt/run-v2.py | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 natural_language_processing/text_generation/gpt/run-v2.py diff --git a/natural_language_processing/text_generation/gpt/run-v2.py b/natural_language_processing/text_generation/gpt/run-v2.py new file mode 100644 index 00000000..f2457bbc --- /dev/null +++ b/natural_language_processing/text_generation/gpt/run-v2.py @@ -0,0 +1,48 @@ +import torch +from transformers import GPT2Tokenizer, GPT2LMHeadModel + +from utils.benchmark import run_model +from utils.nlp.lambada import Lambada + + +def run_pytorch_fp32(model_name, batch_size, num_runs, timeout, lambada_path, **kwargs): + from utils.pytorch import PyTorchRunner, PyTorchRunnerV2, apply_jit_trace, apply_jit_script + + def run_single_pass(pytorch_runner, lambada): + start_ids = lambada.get_input_array()[0] + output = pytorch_runner.run(None, start_ids, num_beams=2, no_repeat_ngram_size=2, + early_stopping=True, max_new_tokens=5) + pytorch_runner.set_task_size(output.shape[1] - start_ids.shape[1]) + output = detokenize(output[0]) + + for i in range(batch_size): + first_new_word = output.replace(detokenize(start_ids[0]), '').split()[0] + lambada.submit_prediction(i, first_new_word) + + tokenizer = GPT2Tokenizer.from_pretrained(model_name) + + def detokenize(answer): + return tokenizer.decode(answer, skip_special_tokens=True) + + def tokenize(text): + return tokenizer.encode(text, return_tensors='pt') + + model = GPT2LMHeadModel.from_pretrained(model_name, torchscript=True).eval() + dataset = Lambada(batch_size, tokenize, detokenize, lambada_path) + model = apply_jit_trace(model, torch.randint(10000, (5,))) + model = apply_jit_script(model) + runner = PyTorchRunnerV2(model) + # runner = PyTorchRunner(model, disable_jit_freeze=True, func="generate") + + return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) + + +if __name__ == "__main__": + from utils.helpers import DefaultArgParser + + gpt_variants = ["gpt2"] + parser = DefaultArgParser(["pytorch"]) + parser.require_model_name(gpt_variants) + parser.ask_for_batch_size() + parser.add_argument('--lambada_path', type=str, required=True, help="Path to Lambada dataset") + run_pytorch_fp32(**vars(parser.parse())) From e8f4a940f6d68a370adfabd0db9b990a5c85665c Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 8 Nov 2023 12:11:18 +0100 Subject: [PATCH 23/69] wip --- natural_language_processing/text_generation/gpt/run-v2.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/natural_language_processing/text_generation/gpt/run-v2.py b/natural_language_processing/text_generation/gpt/run-v2.py index f2457bbc..cebb75a6 100644 --- a/natural_language_processing/text_generation/gpt/run-v2.py +++ b/natural_language_processing/text_generation/gpt/run-v2.py @@ -27,7 +27,8 @@ def detokenize(answer): def tokenize(text): return tokenizer.encode(text, return_tensors='pt') - model = GPT2LMHeadModel.from_pretrained(model_name, torchscript=True).eval() + model = GPT2LMHeadModel.from_pretrained(model_name, torchscript=True) + model.eval() dataset = Lambada(batch_size, tokenize, detokenize, lambada_path) model = apply_jit_trace(model, torch.randint(10000, (5,))) model = apply_jit_script(model) From 59f653aebb6c326d55be6f6bde87857283d92aa2 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 8 Nov 2023 12:16:30 +0100 Subject: [PATCH 24/69] wip --- .../text_generation/gpt/run1.py | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 natural_language_processing/text_generation/gpt/run1.py diff --git a/natural_language_processing/text_generation/gpt/run1.py b/natural_language_processing/text_generation/gpt/run1.py new file mode 100644 index 00000000..586823cc --- /dev/null +++ b/natural_language_processing/text_generation/gpt/run1.py @@ -0,0 +1,29 @@ +import torch +from transformers import AutoModelForCausalLM, AutoTokenizer, GPT2LMHeadModel, GPT2Tokenizer + + +tokenizer = GPT2Tokenizer.from_pretrained("gpt2") +# model = GPT2Model.from_pretrained(model_name, torchscript=True) + +model = GPT2LMHeadModel.from_pretrained("gpt2", torchscript=True).eval() +text = "Hi, how are you?" +encoded_input = tokenizer.encode(text, return_tensors='pt') + +model = torch.jit.trace(model, torch.randint(10000, (5,))) +scripted_model = torch.jit.script(model) +# output = model.generate(encoded_input, max_length=30, num_beams=2, no_repeat_ngram_size=2, early_stopping=True) +output = scripted_model(encoded_input) +# torch_out = scripted_model(context) + +print(output) +print(type(output)) +print(len(output)) +generated_text_torch = tokenizer.decode(output) + +# print("Fragment: {}".format(sentence_fragment)) +print("Completed: {}".format(generated_text_torch)) + +# print(output) +# print(type(output)) +# +# print(tokenizer.decode(output[0], skip_special_tokens=True)) \ No newline at end of file From 5ecb46459b69f1ffcbaa22d6c9c355aea3557de0 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 8 Nov 2023 12:22:09 +0100 Subject: [PATCH 25/69] wip --- natural_language_processing/text_generation/gpt-j/run-v2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/natural_language_processing/text_generation/gpt-j/run-v2.py b/natural_language_processing/text_generation/gpt-j/run-v2.py index 2f44456f..4872bfcd 100644 --- a/natural_language_processing/text_generation/gpt-j/run-v2.py +++ b/natural_language_processing/text_generation/gpt-j/run-v2.py @@ -32,11 +32,11 @@ def tokenize(text): model.eval() # model = apply_jit_trace(model, (dataset.get_input_array()[0],)) model = apply_jit_trace(model, torch.randint(10000, (5,))) - model = apply_jit_script(model) + with torch.no_grad: + model = apply_jit_script(model) runner = PyTorchRunnerV2(model) - # runner = PyTorchRunner(model, disable_jit_freeze=False, func="generate") # runner = PyTorchRunnerV2(model) From 4fb01401a5c3118722fc09e9b1341e946bbc56d4 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 8 Nov 2023 15:30:48 +0100 Subject: [PATCH 26/69] wip --- natural_language_processing/text_generation/gpt-j/run-v2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/natural_language_processing/text_generation/gpt-j/run-v2.py b/natural_language_processing/text_generation/gpt-j/run-v2.py index 4872bfcd..ad994842 100644 --- a/natural_language_processing/text_generation/gpt-j/run-v2.py +++ b/natural_language_processing/text_generation/gpt-j/run-v2.py @@ -32,7 +32,7 @@ def tokenize(text): model.eval() # model = apply_jit_trace(model, (dataset.get_input_array()[0],)) model = apply_jit_trace(model, torch.randint(10000, (5,))) - with torch.no_grad: + with torch.no_grad(): model = apply_jit_script(model) runner = PyTorchRunnerV2(model) From 89ac8eab7f170dee76e947de048cffe25a35e382 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 8 Nov 2023 15:35:05 +0100 Subject: [PATCH 27/69] wip --- natural_language_processing/text_generation/gpt-j/run-v2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/natural_language_processing/text_generation/gpt-j/run-v2.py b/natural_language_processing/text_generation/gpt-j/run-v2.py index ad994842..2203b62d 100644 --- a/natural_language_processing/text_generation/gpt-j/run-v2.py +++ b/natural_language_processing/text_generation/gpt-j/run-v2.py @@ -31,8 +31,8 @@ def tokenize(text): dataset = Lambada(batch_size, tokenize, detokenize, lambada_path) model.eval() # model = apply_jit_trace(model, (dataset.get_input_array()[0],)) - model = apply_jit_trace(model, torch.randint(10000, (5,))) with torch.no_grad(): + model = apply_jit_trace(model, torch.randint(10000, (5,))) model = apply_jit_script(model) runner = PyTorchRunnerV2(model) From 70226015e2b9f3675611f53e28c72b5dd6da0dca Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 8 Nov 2023 15:42:19 +0100 Subject: [PATCH 28/69] wip --- natural_language_processing/text_generation/gpt-j/run-v2.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/natural_language_processing/text_generation/gpt-j/run-v2.py b/natural_language_processing/text_generation/gpt-j/run-v2.py index 2203b62d..06adf9d7 100644 --- a/natural_language_processing/text_generation/gpt-j/run-v2.py +++ b/natural_language_processing/text_generation/gpt-j/run-v2.py @@ -32,7 +32,8 @@ def tokenize(text): model.eval() # model = apply_jit_trace(model, (dataset.get_input_array()[0],)) with torch.no_grad(): - model = apply_jit_trace(model, torch.randint(10000, (5,))) + # model = apply_jit_trace(model, torch.randint(10000, (5,))) + model = apply_jit_trace(model, (dataset.get_input_array()[0],)) model = apply_jit_script(model) runner = PyTorchRunnerV2(model) From e891622d2dc673ac6b13594000a12605694861c3 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 8 Nov 2023 16:01:07 +0100 Subject: [PATCH 29/69] wip --- natural_language_processing/text_generation/gpt-j/run-v2.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/natural_language_processing/text_generation/gpt-j/run-v2.py b/natural_language_processing/text_generation/gpt-j/run-v2.py index 06adf9d7..9442fa9d 100644 --- a/natural_language_processing/text_generation/gpt-j/run-v2.py +++ b/natural_language_processing/text_generation/gpt-j/run-v2.py @@ -27,14 +27,13 @@ def detokenize(answer): def tokenize(text): return tokenizer.encode(text, return_tensors='pt') - model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id, torchscript=True) + model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id, torchscript=True).eval() dataset = Lambada(batch_size, tokenize, detokenize, lambada_path) - model.eval() # model = apply_jit_trace(model, (dataset.get_input_array()[0],)) with torch.no_grad(): # model = apply_jit_trace(model, torch.randint(10000, (5,))) model = apply_jit_trace(model, (dataset.get_input_array()[0],)) - model = apply_jit_script(model) + # model = apply_jit_script(model) runner = PyTorchRunnerV2(model) From cf09b1853749353afe7f36fffff72de8841c95de Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 8 Nov 2023 16:30:30 +0100 Subject: [PATCH 30/69] wip --- natural_language_processing/text_generation/gpt-j/run2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/natural_language_processing/text_generation/gpt-j/run2.py b/natural_language_processing/text_generation/gpt-j/run2.py index df6730cd..b2b5b09e 100644 --- a/natural_language_processing/text_generation/gpt-j/run2.py +++ b/natural_language_processing/text_generation/gpt-j/run2.py @@ -9,7 +9,7 @@ text = "Hi, how are you?" encoded_input = tokenizer.encode(text, return_tensors='pt') -model = torch.jit.trace(model, torch.randint(10000, (5,))) +model = torch.jit.trace(model.generate, torch.randint(10000, (5,))) scripted_model = torch.jit.script(model) # output = model.generate(encoded_input, max_length=30, num_beams=2, no_repeat_ngram_size=2, early_stopping=True) output = scripted_model(encoded_input) From 5aecdc1c08dde354af432cc417a19a8235d76450 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Mon, 27 Nov 2023 12:38:20 +0100 Subject: [PATCH 31/69] wip --- natural_language_processing/text_generation/gpt-j/run2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/natural_language_processing/text_generation/gpt-j/run2.py b/natural_language_processing/text_generation/gpt-j/run2.py index b2b5b09e..42c10261 100644 --- a/natural_language_processing/text_generation/gpt-j/run2.py +++ b/natural_language_processing/text_generation/gpt-j/run2.py @@ -26,4 +26,4 @@ # print(output) # print(type(output)) # -# print(tokenizer.decode(output[0], skip_special_tokens=True)) \ No newline at end of file +# print(tokenizer.decode(output[0], skip_special_tokens=True)) From a9a90642db476fa0571440d35f7b97960ed133f7 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Mon, 27 Nov 2023 13:21:04 +0100 Subject: [PATCH 32/69] wip --- natural_language_processing/text_generation/gpt-j/run-v2.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/natural_language_processing/text_generation/gpt-j/run-v2.py b/natural_language_processing/text_generation/gpt-j/run-v2.py index 9442fa9d..eb59a8a9 100644 --- a/natural_language_processing/text_generation/gpt-j/run-v2.py +++ b/natural_language_processing/text_generation/gpt-j/run-v2.py @@ -11,8 +11,7 @@ def run_pytorch_fp32(model_name, batch_size, num_runs, timeout, lambada_path, ** def run_single_pass(pytorch_runner, lambada): start_ids = lambada.get_input_array()[0] output = pytorch_runner.run(None, start_ids) - # pytorch_runner.set_task_size(output.shape[1] - start_ids.shape[1]) - quit() + pytorch_runner.set_task_size(output.shape[1] - start_ids.shape[1]) output = detokenize(output[0]) for i in range(batch_size): From ddfcc4e187a10ed26eef7c36ec921a9609b5c931 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Mon, 27 Nov 2023 13:49:44 +0100 Subject: [PATCH 33/69] wip --- natural_language_processing/text_generation/gpt/run-v2.py | 1 - 1 file changed, 1 deletion(-) diff --git a/natural_language_processing/text_generation/gpt/run-v2.py b/natural_language_processing/text_generation/gpt/run-v2.py index cebb75a6..b72fee3c 100644 --- a/natural_language_processing/text_generation/gpt/run-v2.py +++ b/natural_language_processing/text_generation/gpt/run-v2.py @@ -31,7 +31,6 @@ def tokenize(text): model.eval() dataset = Lambada(batch_size, tokenize, detokenize, lambada_path) model = apply_jit_trace(model, torch.randint(10000, (5,))) - model = apply_jit_script(model) runner = PyTorchRunnerV2(model) # runner = PyTorchRunner(model, disable_jit_freeze=True, func="generate") From 483b8ab9e7854aee9fa0ecd8b99d7787b546ea01 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Mon, 27 Nov 2023 15:00:23 +0100 Subject: [PATCH 34/69] wip --- natural_language_processing/text_generation/gpt/run-v2.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/natural_language_processing/text_generation/gpt/run-v2.py b/natural_language_processing/text_generation/gpt/run-v2.py index b72fee3c..a4bb26ff 100644 --- a/natural_language_processing/text_generation/gpt/run-v2.py +++ b/natural_language_processing/text_generation/gpt/run-v2.py @@ -10,8 +10,9 @@ def run_pytorch_fp32(model_name, batch_size, num_runs, timeout, lambada_path, ** def run_single_pass(pytorch_runner, lambada): start_ids = lambada.get_input_array()[0] - output = pytorch_runner.run(None, start_ids, num_beams=2, no_repeat_ngram_size=2, - early_stopping=True, max_new_tokens=5) + # output = pytorch_runner.run(None, start_ids, num_beams=2, no_repeat_ngram_size=2, + # early_stopping=True, max_new_tokens=5) + output = pytorch_runner.run(None, start_ids) pytorch_runner.set_task_size(output.shape[1] - start_ids.shape[1]) output = detokenize(output[0]) From 65b9c935a0fb931b37606836005224fb9be5bc8f Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Mon, 27 Nov 2023 15:18:37 +0100 Subject: [PATCH 35/69] wip --- natural_language_processing/text_generation/gpt/run-v2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/natural_language_processing/text_generation/gpt/run-v2.py b/natural_language_processing/text_generation/gpt/run-v2.py index a4bb26ff..2c7dd3f4 100644 --- a/natural_language_processing/text_generation/gpt/run-v2.py +++ b/natural_language_processing/text_generation/gpt/run-v2.py @@ -13,7 +13,7 @@ def run_single_pass(pytorch_runner, lambada): # output = pytorch_runner.run(None, start_ids, num_beams=2, no_repeat_ngram_size=2, # early_stopping=True, max_new_tokens=5) output = pytorch_runner.run(None, start_ids) - pytorch_runner.set_task_size(output.shape[1] - start_ids.shape[1]) + pytorch_runner.set_task_size(output[1] - start_ids.shape[1]) output = detokenize(output[0]) for i in range(batch_size): From 59d5a14c9eafa663dbff87f01a90b9e9802101e1 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Mon, 27 Nov 2023 16:29:09 +0100 Subject: [PATCH 36/69] wip --- .../text_generation/gpt/run-v2.py | 3 ++- .../text_generation/gpt/run_example3.py | 26 ++++++++++--------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/natural_language_processing/text_generation/gpt/run-v2.py b/natural_language_processing/text_generation/gpt/run-v2.py index 2c7dd3f4..0c2746f9 100644 --- a/natural_language_processing/text_generation/gpt/run-v2.py +++ b/natural_language_processing/text_generation/gpt/run-v2.py @@ -31,7 +31,8 @@ def tokenize(text): model = GPT2LMHeadModel.from_pretrained(model_name, torchscript=True) model.eval() dataset = Lambada(batch_size, tokenize, detokenize, lambada_path) - model = apply_jit_trace(model, torch.randint(10000, (5,))) + # model = apply_jit_trace(model, torch.randint(10000, (5,))) + model = apply_jit_trace(model, (dataset.get_input_array()[0],)) runner = PyTorchRunnerV2(model) # runner = PyTorchRunner(model, disable_jit_freeze=True, func="generate") diff --git a/natural_language_processing/text_generation/gpt/run_example3.py b/natural_language_processing/text_generation/gpt/run_example3.py index 62de48c0..2228cf0f 100644 --- a/natural_language_processing/text_generation/gpt/run_example3.py +++ b/natural_language_processing/text_generation/gpt/run_example3.py @@ -1,23 +1,25 @@ +import torch from transformers import GPT2LMHeadModel, GPT2Tokenizer tokenizer = GPT2Tokenizer.from_pretrained('gpt2') model = GPT2LMHeadModel.from_pretrained('gpt2', pad_token_id=tokenizer.eos_token_id) -print(tokenizer.decode(tokenizer.eos_token_id)) +# print(tokenizer.decode(tokenizer.eos_token_id)) sentence = "hey, how are you?" input_ids = tokenizer.encode(sentence, return_tensors='pt') -print(tokenizer.decode(input_ids[0], skip_special_tokens=True)) -quit() -# -# print(input_ids) -# print(type(input_ids)) -# quit() -print('1') -output = model.generate(input_ids, max_length=30, num_beams=2, no_repeat_ngram_size=2, early_stopping=True) +# Trace the model +traced_model = torch.jit.trace(model, input_ids) -print(output) -print(type(output)) +# Freeze the model +frozen_model = torch.jit.freeze(traced_model) -print(tokenizer.decode(output[0], skip_special_tokens=True)) +with torch.no_grad(): + outputs = frozen_model(input_ids) + +# Decode the output +output_ids = outputs.logits.argmax(dim=-1) +decoded_output = tokenizer.decode(output_ids[0]) + +print(decoded_output) From 00f4f0402f0618b5b635804a96ba7a849831b61e Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Mon, 27 Nov 2023 16:34:31 +0100 Subject: [PATCH 37/69] wip --- natural_language_processing/text_generation/gpt/run_example3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/natural_language_processing/text_generation/gpt/run_example3.py b/natural_language_processing/text_generation/gpt/run_example3.py index 2228cf0f..eda12312 100644 --- a/natural_language_processing/text_generation/gpt/run_example3.py +++ b/natural_language_processing/text_generation/gpt/run_example3.py @@ -10,7 +10,7 @@ input_ids = tokenizer.encode(sentence, return_tensors='pt') # Trace the model -traced_model = torch.jit.trace(model, input_ids) +traced_model = torch.jit.trace(model, torch.randint(10000, (5,))) # Freeze the model frozen_model = torch.jit.freeze(traced_model) From e844c18ee900d341ed5d82bfa55acac2e7172693 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Mon, 27 Nov 2023 16:41:21 +0100 Subject: [PATCH 38/69] wip --- .../text_generation/gpt-j/run1.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/natural_language_processing/text_generation/gpt-j/run1.py b/natural_language_processing/text_generation/gpt-j/run1.py index daff52e5..8ab90c74 100644 --- a/natural_language_processing/text_generation/gpt-j/run1.py +++ b/natural_language_processing/text_generation/gpt-j/run1.py @@ -1,17 +1,18 @@ import torch from transformers import AutoModelForCausalLM, AutoTokenizer +from transformers import GPT2LMHeadModel, GPT2Tokenizer +# tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B") +tokenizer = GPT2Tokenizer.from_pretrained("gpt2") +model = GPT2LMHeadModel.from_pretrained("gpt2", torchscript=True) -tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B") -# model = GPT2Model.from_pretrained(model_name, torchscript=True) - -model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", pad_token_id=tokenizer.eos_token_id, torchscript=True) +# model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", pad_token_id=tokenizer.eos_token_id, torchscript=True) text = "Hi, how are you?" encoded_input = tokenizer(text, return_tensors='pt') -print(encoded_input) +# print(encoded_input) input_dict = {key: value for key, value in encoded_input.items()} -print(input_dict) -quit() +# print(input_dict) +# quit() traced_model = torch.jit.trace(model, (input_dict['input_ids'],)) #traced_model = torch.jit.trace(model, (encoded_input,)) From f2f680f1535e450ffccb193d2c861963338f9453 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Mon, 27 Nov 2023 16:42:54 +0100 Subject: [PATCH 39/69] wip --- natural_language_processing/text_generation/gpt-j/run1.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/natural_language_processing/text_generation/gpt-j/run1.py b/natural_language_processing/text_generation/gpt-j/run1.py index 8ab90c74..6eb035d8 100644 --- a/natural_language_processing/text_generation/gpt-j/run1.py +++ b/natural_language_processing/text_generation/gpt-j/run1.py @@ -21,4 +21,8 @@ #output = frozen_model(**encoded_input) output = frozen_model(input_dict['input_ids']) + +output_ids = output.logits.argmax(dim=-1) +decoded_output = tokenizer.decode(output_ids[0]) + tokenizer.decode(output, skip_special_tokens=True) From 0ac0aa449d44878518368819fc242c469556bc57 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Mon, 27 Nov 2023 16:55:00 +0100 Subject: [PATCH 40/69] wip --- .../text_generation/gpt-j/run1.py | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/natural_language_processing/text_generation/gpt-j/run1.py b/natural_language_processing/text_generation/gpt-j/run1.py index 6eb035d8..e2bae906 100644 --- a/natural_language_processing/text_generation/gpt-j/run1.py +++ b/natural_language_processing/text_generation/gpt-j/run1.py @@ -11,18 +11,23 @@ encoded_input = tokenizer(text, return_tensors='pt') # print(encoded_input) input_dict = {key: value for key, value in encoded_input.items()} -# print(input_dict) -# quit() -traced_model = torch.jit.trace(model, (input_dict['input_ids'],)) -#traced_model = torch.jit.trace(model, (encoded_input,)) +def model_wrapper(input_ids): + return model(input_ids)[0] + +# traced_model = torch.jit.trace(model, (input_dict['input_ids'],)) +traced_model = torch.jit.trace(model_wrapper, encoded_input) + frozen_model = torch.jit.freeze(traced_model) #output = frozen_model(**encoded_input) -output = frozen_model(input_dict['input_ids']) +with torch.no_grad(): + output = frozen_model(encoded_input) -output_ids = output.logits.argmax(dim=-1) -decoded_output = tokenizer.decode(output_ids[0]) +decoded_output = tokenizer.decode(output.argmax(dim=-1)[0]) -tokenizer.decode(output, skip_special_tokens=True) +# output_ids = output.logits.argmax(dim=-1) +# decoded_output = tokenizer.decode(output_ids[0]) +# +# tokenizer.decode(output, skip_special_tokens=True) From 2d20c60abdb6a7cdec40a1e8d878a82d34775c0a Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Mon, 27 Nov 2023 17:00:21 +0100 Subject: [PATCH 41/69] wip --- .../text_generation/gpt-j/run2.py | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/natural_language_processing/text_generation/gpt-j/run2.py b/natural_language_processing/text_generation/gpt-j/run2.py index 42c10261..a378982c 100644 --- a/natural_language_processing/text_generation/gpt-j/run2.py +++ b/natural_language_processing/text_generation/gpt-j/run2.py @@ -1,29 +1,29 @@ import torch -from transformers import AutoModelForCausalLM, AutoTokenizer +from transformers import GPT2LMHeadModel, GPT2Tokenizer +# Load the model and tokenizer +model = GPT2LMHeadModel.from_pretrained("gpt2") +tokenizer = GPT2Tokenizer.from_pretrained("gpt2") -tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B") -# model = GPT2Model.from_pretrained(model_name, torchscript=True) +# Prepare the input +sentence = "Hello, how are you?" +inputs = tokenizer.encode(sentence, return_tensors="pt") -model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", torchscript=True).eval() -text = "Hi, how are you?" -encoded_input = tokenizer.encode(text, return_tensors='pt') +# Define a wrapper function for the model to return only logits (tensors) +def model_wrapper(input_ids): + return model(input_ids, return_dict=False)[0] -model = torch.jit.trace(model.generate, torch.randint(10000, (5,))) -scripted_model = torch.jit.script(model) -# output = model.generate(encoded_input, max_length=30, num_beams=2, no_repeat_ngram_size=2, early_stopping=True) -output = scripted_model(encoded_input) -# torch_out = scripted_model(context) +# Trace the model with the wrapper function +traced_model = torch.jit.trace(model_wrapper, inputs) -print(output) -print(type(output)) -print(len(output)) -generated_text_torch = tokenizer.decode(output) +# Freeze the model +frozen_model = torch.jit.freeze(traced_model) -# print("Fragment: {}".format(sentence_fragment)) -print("Completed: {}".format(generated_text_torch)) +# Generate output +with torch.no_grad(): + outputs = frozen_model(inputs) -# print(output) -# print(type(output)) -# -# print(tokenizer.decode(output[0], skip_special_tokens=True)) +# Decode the output +decoded_output = tokenizer.decode(outputs.argmax(dim=-1)[0]) + +print(decoded_output) \ No newline at end of file From 04097619b9957db56efbec7fd2865f7ba828b3f0 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Mon, 27 Nov 2023 17:03:13 +0100 Subject: [PATCH 42/69] wip --- natural_language_processing/text_generation/gpt-j/run2.py | 1 + 1 file changed, 1 insertion(+) diff --git a/natural_language_processing/text_generation/gpt-j/run2.py b/natural_language_processing/text_generation/gpt-j/run2.py index a378982c..26f9ceaa 100644 --- a/natural_language_processing/text_generation/gpt-j/run2.py +++ b/natural_language_processing/text_generation/gpt-j/run2.py @@ -5,6 +5,7 @@ model = GPT2LMHeadModel.from_pretrained("gpt2") tokenizer = GPT2Tokenizer.from_pretrained("gpt2") +model.eval() # Prepare the input sentence = "Hello, how are you?" inputs = tokenizer.encode(sentence, return_tensors="pt") From ebcea3ccbf4981b47f45b362e6afde58d51849f9 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Mon, 27 Nov 2023 17:13:46 +0100 Subject: [PATCH 43/69] wip --- natural_language_processing/text_generation/gpt/run-v2.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/natural_language_processing/text_generation/gpt/run-v2.py b/natural_language_processing/text_generation/gpt/run-v2.py index 0c2746f9..65178f8c 100644 --- a/natural_language_processing/text_generation/gpt/run-v2.py +++ b/natural_language_processing/text_generation/gpt/run-v2.py @@ -13,9 +13,16 @@ def run_single_pass(pytorch_runner, lambada): # output = pytorch_runner.run(None, start_ids, num_beams=2, no_repeat_ngram_size=2, # early_stopping=True, max_new_tokens=5) output = pytorch_runner.run(None, start_ids) + # outputs.logits.argmax(dim=-1) + print(output.argmax(dim=-1)[0]) + quit() + + pytorch_runner.set_task_size(output[1] - start_ids.shape[1]) output = detokenize(output[0]) + + for i in range(batch_size): first_new_word = output.replace(detokenize(start_ids[0]), '').split()[0] lambada.submit_prediction(i, first_new_word) From 7d20adfe0ddd7c6a16f379a1d5f271c28d50ca2e Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Mon, 27 Nov 2023 17:26:14 +0100 Subject: [PATCH 44/69] wip --- natural_language_processing/text_generation/gpt/run-v2.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/natural_language_processing/text_generation/gpt/run-v2.py b/natural_language_processing/text_generation/gpt/run-v2.py index 65178f8c..0c2746f9 100644 --- a/natural_language_processing/text_generation/gpt/run-v2.py +++ b/natural_language_processing/text_generation/gpt/run-v2.py @@ -13,16 +13,9 @@ def run_single_pass(pytorch_runner, lambada): # output = pytorch_runner.run(None, start_ids, num_beams=2, no_repeat_ngram_size=2, # early_stopping=True, max_new_tokens=5) output = pytorch_runner.run(None, start_ids) - # outputs.logits.argmax(dim=-1) - print(output.argmax(dim=-1)[0]) - quit() - - pytorch_runner.set_task_size(output[1] - start_ids.shape[1]) output = detokenize(output[0]) - - for i in range(batch_size): first_new_word = output.replace(detokenize(start_ids[0]), '').split()[0] lambada.submit_prediction(i, first_new_word) From 8c7f13cf072311fd55fc39bf1d140a0fc38cd41f Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 29 Nov 2023 16:22:46 +0100 Subject: [PATCH 45/69] wip --- .../text_generation/gpt/run-v2.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/natural_language_processing/text_generation/gpt/run-v2.py b/natural_language_processing/text_generation/gpt/run-v2.py index 0c2746f9..d04bf4cf 100644 --- a/natural_language_processing/text_generation/gpt/run-v2.py +++ b/natural_language_processing/text_generation/gpt/run-v2.py @@ -14,6 +14,15 @@ def run_single_pass(pytorch_runner, lambada): # early_stopping=True, max_new_tokens=5) output = pytorch_runner.run(None, start_ids) pytorch_runner.set_task_size(output[1] - start_ids.shape[1]) + logits = output[0] + token_ids = torch.argmax(logits, dim=-1) + + print(type(token_ids)) + print(token_ids) + text = tokenizer.decode(token_ids) + print(text) + quit() + output = detokenize(output[0]) for i in range(batch_size): From 867e062ed573ba87d8bf7a9a1c3ecc557b9c85ff Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Thu, 30 Nov 2023 13:01:11 +0100 Subject: [PATCH 46/69] wip --- .../text_generation/gpt-j/run-v2-compile.py | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 natural_language_processing/text_generation/gpt-j/run-v2-compile.py diff --git a/natural_language_processing/text_generation/gpt-j/run-v2-compile.py b/natural_language_processing/text_generation/gpt-j/run-v2-compile.py new file mode 100644 index 00000000..eb59a8a9 --- /dev/null +++ b/natural_language_processing/text_generation/gpt-j/run-v2-compile.py @@ -0,0 +1,53 @@ +import torch +from transformers import AutoTokenizer, AutoModelForCausalLM + +from utils.benchmark import run_model +from utils.nlp.lambada import Lambada + + +def run_pytorch_fp32(model_name, batch_size, num_runs, timeout, lambada_path, **kwargs): + from utils.pytorch import PyTorchRunner, PyTorchRunnerV2, apply_jit_script, apply_jit_trace + + def run_single_pass(pytorch_runner, lambada): + start_ids = lambada.get_input_array()[0] + output = pytorch_runner.run(None, start_ids) + pytorch_runner.set_task_size(output.shape[1] - start_ids.shape[1]) + output = detokenize(output[0]) + + for i in range(batch_size): + first_new_word = output.replace(detokenize(start_ids[0]), '').split()[0] + lambada.submit_prediction(i, first_new_word) + + tokenizer = AutoTokenizer.from_pretrained(model_name) + + def detokenize(answer): + return tokenizer.decode(answer, skip_special_tokens=True) + + def tokenize(text): + return tokenizer.encode(text, return_tensors='pt') + + model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id, torchscript=True).eval() + dataset = Lambada(batch_size, tokenize, detokenize, lambada_path) + # model = apply_jit_trace(model, (dataset.get_input_array()[0],)) + with torch.no_grad(): + # model = apply_jit_trace(model, torch.randint(10000, (5,))) + model = apply_jit_trace(model, (dataset.get_input_array()[0],)) + # model = apply_jit_script(model) + + runner = PyTorchRunnerV2(model) + + # runner = PyTorchRunner(model, disable_jit_freeze=False, func="generate") + # runner = PyTorchRunnerV2(model) + + return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) + + +if __name__ == "__main__": + from utils.helpers import DefaultArgParser + + gpt_variants = ["EleutherAI/gpt-j-6B"] + parser = DefaultArgParser(["pytorch"]) + parser.require_model_name(gpt_variants) + parser.ask_for_batch_size() + parser.add_argument('--lambada_path', type=str, required=True, help="Path to Lambada dataset") + run_pytorch_fp32(**vars(parser.parse())) From 901684df846816fa51a0c4c771ae637d96513ad9 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Thu, 30 Nov 2023 13:13:21 +0100 Subject: [PATCH 47/69] wip --- .../text_generation/gpt-j/run-v2-compile.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/natural_language_processing/text_generation/gpt-j/run-v2-compile.py b/natural_language_processing/text_generation/gpt-j/run-v2-compile.py index eb59a8a9..978b8ad9 100644 --- a/natural_language_processing/text_generation/gpt-j/run-v2-compile.py +++ b/natural_language_processing/text_generation/gpt-j/run-v2-compile.py @@ -1,3 +1,4 @@ +import os import torch from transformers import AutoTokenizer, AutoModelForCausalLM @@ -6,11 +7,11 @@ def run_pytorch_fp32(model_name, batch_size, num_runs, timeout, lambada_path, **kwargs): - from utils.pytorch import PyTorchRunner, PyTorchRunnerV2, apply_jit_script, apply_jit_trace + from utils.pytorch import PyTorchRunner, PyTorchRunnerV2, apply_jit_script, apply_jit_trace, apply_compile_maybe def run_single_pass(pytorch_runner, lambada): start_ids = lambada.get_input_array()[0] - output = pytorch_runner.run(None, start_ids) + output = pytorch_runner.run(inputs=start_ids, max_new_tokens=10) pytorch_runner.set_task_size(output.shape[1] - start_ids.shape[1]) output = detokenize(output[0]) @@ -26,15 +27,14 @@ def detokenize(answer): def tokenize(text): return tokenizer.encode(text, return_tensors='pt') - model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id, torchscript=True).eval() + model = AutoModelForCausalLM.from_pretrained(model_name) + model.eval() dataset = Lambada(batch_size, tokenize, detokenize, lambada_path) # model = apply_jit_trace(model, (dataset.get_input_array()[0],)) - with torch.no_grad(): - # model = apply_jit_trace(model, torch.randint(10000, (5,))) - model = apply_jit_trace(model, (dataset.get_input_array()[0],)) - # model = apply_jit_script(model) + aio = '_aio_profiler_print' in dir(torch._C) and os.environ.get("AIO_PROCESS_MODE") != "0" + model.greedy_search = apply_compile_maybe(model.greedy_search, aio) - runner = PyTorchRunnerV2(model) + runner = PyTorchRunnerV2(model.generate) # runner = PyTorchRunner(model, disable_jit_freeze=False, func="generate") # runner = PyTorchRunnerV2(model) From e0fdb5563ed9b03ece677bb987eb9fe9d3a55dcd Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Thu, 30 Nov 2023 13:26:53 +0100 Subject: [PATCH 48/69] wip --- .../text_generation/gpt/run-v2-compile.py | 66 +++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 natural_language_processing/text_generation/gpt/run-v2-compile.py diff --git a/natural_language_processing/text_generation/gpt/run-v2-compile.py b/natural_language_processing/text_generation/gpt/run-v2-compile.py new file mode 100644 index 00000000..0f4b568d --- /dev/null +++ b/natural_language_processing/text_generation/gpt/run-v2-compile.py @@ -0,0 +1,66 @@ +import os + +import torch +from transformers import GPT2Tokenizer, GPT2LMHeadModel + +from utils.benchmark import run_model +from utils.nlp.lambada import Lambada + + +def run_pytorch_fp32(model_name, batch_size, num_runs, timeout, lambada_path, **kwargs): + from utils.pytorch import PyTorchRunner, PyTorchRunnerV2, apply_jit_trace, apply_jit_script, apply_compile_maybe + + def run_single_pass(pytorch_runner, lambada): + start_ids = lambada.get_input_array()[0] + output = pytorch_runner.run(inputs=start_ids, max_new_tokens=10) + + print(output) + print(type(output)) + + quit() + # output = pytorch_runner.run(None, start_ids) + # pytorch_runner.set_task_size(output[1] - start_ids.shape[1]) + # logits = output[0] + # token_ids = torch.argmax(logits, dim=-1) + + # print(type(token_ids)) + # print(token_ids) + # text = tokenizer.decode(token_ids) + # print(text) + # quit() + + # output = detokenize(output[0]) + + # for i in range(batch_size): + # first_new_word = output.replace(detokenize(start_ids[0]), '').split()[0] + # lambada.submit_prediction(i, first_new_word) + + tokenizer = GPT2Tokenizer.from_pretrained(model_name) + + def detokenize(answer): + return tokenizer.decode(answer, skip_special_tokens=True) + + def tokenize(text): + return tokenizer.encode(text, return_tensors='pt') + + model = GPT2LMHeadModel.from_pretrained(model_name, torchscript=True) + model.eval() + dataset = Lambada(batch_size, tokenize, detokenize, lambada_path) + aio = '_aio_profiler_print' in dir(torch._C) and os.environ.get("AIO_PROCESS_MODE") != "0" + model.greedy_search = apply_compile_maybe(model.greedy_search, aio) + + runner = PyTorchRunnerV2(model.generate) + # runner = PyTorchRunner(model, disable_jit_freeze=True, func="generate") + + return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) + + +if __name__ == "__main__": + from utils.helpers import DefaultArgParser + + gpt_variants = ["gpt2"] + parser = DefaultArgParser(["pytorch"]) + parser.require_model_name(gpt_variants) + parser.ask_for_batch_size() + parser.add_argument('--lambada_path', type=str, required=True, help="Path to Lambada dataset") + run_pytorch_fp32(**vars(parser.parse())) From 4ee143fa6b9261ada940531365ad752dccf69181 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Tue, 12 Dec 2023 16:05:30 +0100 Subject: [PATCH 49/69] wip --- .../text_generation/gpt/run_example3.py | 43 +++++++++++-------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/natural_language_processing/text_generation/gpt/run_example3.py b/natural_language_processing/text_generation/gpt/run_example3.py index eda12312..c56106e1 100644 --- a/natural_language_processing/text_generation/gpt/run_example3.py +++ b/natural_language_processing/text_generation/gpt/run_example3.py @@ -1,25 +1,34 @@ +from transformers import AutoTokenizer, GPT2LMHeadModel, AutoModelForCausalLM import torch -from transformers import GPT2LMHeadModel, GPT2Tokenizer +import time +import os +torch.set_num_threads(int(os.environ["AIO_NUM_THREADS"])) -tokenizer = GPT2Tokenizer.from_pretrained('gpt2') -model = GPT2LMHeadModel.from_pretrained('gpt2', pad_token_id=tokenizer.eos_token_id) +tokenizer = AutoTokenizer.from_pretrained("gpt2") +model = GPT2LMHeadModel.from_pretrained("gpt2", torchscript=True) -# print(tokenizer.decode(tokenizer.eos_token_id)) +#tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6b") +#model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6b") -sentence = "hey, how are you?" -input_ids = tokenizer.encode(sentence, return_tensors='pt') +model.eval() +inputs = tokenizer.encode("Hello, I'm looking for an employment, ", return_tensors="pt") +print("\nNo tracing\n") +with torch.no_grad(): + for n in range(3): + break + x = time.time() + outputs = model.generate(inputs, do_sample=True, max_length=100, top_k=50, top_p=0.95, num_return_sequences=1) + print(f"Run: {n}, throughput: {round(outputs.shape[1] / (time.time() - x), 3)} tps") -# Trace the model -traced_model = torch.jit.trace(model, torch.randint(10000, (5,))) -# Freeze the model -frozen_model = torch.jit.freeze(traced_model) +#model.forward = torch.jit.freeze(torch.jit.trace_module(model, {"forward": inputs})) +model.generate = torch.jit.freeze(torch.jit.trace_module(model, {"generate": inputs})) +print("\nTracing engaged\n") with torch.no_grad(): - outputs = frozen_model(input_ids) - -# Decode the output -output_ids = outputs.logits.argmax(dim=-1) -decoded_output = tokenizer.decode(output_ids[0]) - -print(decoded_output) + for n in range(3): + x = time.time() + outputs = model.generate(inputs, do_sample=True, max_length=100, top_k=50, top_p=0.95, num_return_sequences=1) + print(f"Run: {n}, throughput: {round(outputs.shape[1] / (time.time() - x), 3)} tps") +text = tokenizer.decode(outputs[0], skip_special_tokens=True) +print(text) From 3f4cf944929c0e9ae9aa3b3fbde377de60de2381 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Tue, 12 Dec 2023 16:55:55 +0100 Subject: [PATCH 50/69] wip --- .../text_generation/gpt/run_example3.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/natural_language_processing/text_generation/gpt/run_example3.py b/natural_language_processing/text_generation/gpt/run_example3.py index c56106e1..0e1c349a 100644 --- a/natural_language_processing/text_generation/gpt/run_example3.py +++ b/natural_language_processing/text_generation/gpt/run_example3.py @@ -25,10 +25,18 @@ model.generate = torch.jit.freeze(torch.jit.trace_module(model, {"generate": inputs})) print("\nTracing engaged\n") +# with torch.no_grad(): +# for n in range(3): +# x = time.time() +# outputs = model.generate(inputs, do_sample=True, max_length=100, top_k=50, top_p=0.95, num_return_sequences=1) +# print(f"Run: {n}, throughput: {round(outputs.shape[1] / (time.time() - x), 3)} tps") +# text = tokenizer.decode(outputs[0], skip_special_tokens=True) +# print(text) + + with torch.no_grad(): - for n in range(3): - x = time.time() - outputs = model.generate(inputs, do_sample=True, max_length=100, top_k=50, top_p=0.95, num_return_sequences=1) - print(f"Run: {n}, throughput: {round(outputs.shape[1] / (time.time() - x), 3)} tps") + x = time.time() + outputs = model.generate(inputs, do_sample=True, max_length=20, top_p=0.95) + print(f"throughput: {round(outputs.shape[1] / (time.time() - x), 3)} tps") text = tokenizer.decode(outputs[0], skip_special_tokens=True) print(text) From e73173a5686c00b4b4a5c4d5c6b535904650b2f9 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Tue, 12 Dec 2023 17:03:50 +0100 Subject: [PATCH 51/69] wip --- natural_language_processing/text_generation/gpt/run_example3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/natural_language_processing/text_generation/gpt/run_example3.py b/natural_language_processing/text_generation/gpt/run_example3.py index 0e1c349a..c3512f76 100644 --- a/natural_language_processing/text_generation/gpt/run_example3.py +++ b/natural_language_processing/text_generation/gpt/run_example3.py @@ -36,7 +36,7 @@ with torch.no_grad(): x = time.time() - outputs = model.generate(inputs, do_sample=True, max_length=20, top_p=0.95) + outputs = model.generate(inputs, do_sample=True, max_length=50, top_p=0.95) print(f"throughput: {round(outputs.shape[1] / (time.time() - x), 3)} tps") text = tokenizer.decode(outputs[0], skip_special_tokens=True) print(text) From 006bacb4d1461991494a39fae25afa14c3f8c48d Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 13 Dec 2023 15:05:04 +0100 Subject: [PATCH 52/69] wip --- .../text_generation/gpt/run-v2.py | 35 +++++++++++-------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/natural_language_processing/text_generation/gpt/run-v2.py b/natural_language_processing/text_generation/gpt/run-v2.py index d04bf4cf..551dc782 100644 --- a/natural_language_processing/text_generation/gpt/run-v2.py +++ b/natural_language_processing/text_generation/gpt/run-v2.py @@ -1,27 +1,30 @@ import torch -from transformers import GPT2Tokenizer, GPT2LMHeadModel +from transformers import GPT2Tokenizer, GPT2LMHeadModel, AutoTokenizer from utils.benchmark import run_model from utils.nlp.lambada import Lambada def run_pytorch_fp32(model_name, batch_size, num_runs, timeout, lambada_path, **kwargs): - from utils.pytorch import PyTorchRunner, PyTorchRunnerV2, apply_jit_trace, apply_jit_script + from utils.pytorch import PyTorchRunner, PyTorchRunnerV2, apply_jit_trace, apply_jit_script, apply_jit_trace_module def run_single_pass(pytorch_runner, lambada): start_ids = lambada.get_input_array()[0] # output = pytorch_runner.run(None, start_ids, num_beams=2, no_repeat_ngram_size=2, # early_stopping=True, max_new_tokens=5) - output = pytorch_runner.run(None, start_ids) - pytorch_runner.set_task_size(output[1] - start_ids.shape[1]) - logits = output[0] - token_ids = torch.argmax(logits, dim=-1) - - print(type(token_ids)) - print(token_ids) - text = tokenizer.decode(token_ids) - print(text) + output = pytorch_runner.run(None, start_ids, do_sample=True, max_length=50, top_p=0.95) + quit() + # output = pytorch_runner.run(None, start_ids) + # pytorch_runner.set_task_size(output[1] - start_ids.shape[1]) + # logits = output[0] + # token_ids = torch.argmax(logits, dim=-1) + # + # print(type(token_ids)) + # print(token_ids) + # text = tokenizer.decode(token_ids) + # print(text) + # quit() output = detokenize(output[0]) @@ -29,7 +32,8 @@ def run_single_pass(pytorch_runner, lambada): first_new_word = output.replace(detokenize(start_ids[0]), '').split()[0] lambada.submit_prediction(i, first_new_word) - tokenizer = GPT2Tokenizer.from_pretrained(model_name) + # tokenizer = GPT2Tokenizer.from_pretrained(model_name) + tokenizer = AutoTokenizer.from_pretrained("gpt2") def detokenize(answer): return tokenizer.decode(answer, skip_special_tokens=True) @@ -41,8 +45,11 @@ def tokenize(text): model.eval() dataset = Lambada(batch_size, tokenize, detokenize, lambada_path) # model = apply_jit_trace(model, torch.randint(10000, (5,))) - model = apply_jit_trace(model, (dataset.get_input_array()[0],)) - runner = PyTorchRunnerV2(model) + inputs = tokenize("Hello, I'm looking for an employment, ") + model.generate = apply_jit_trace_module(model, {"generate": inputs}) + + # model = apply_jit_trace(model, (dataset.get_input_array()[0],)) + runner = PyTorchRunnerV2(model.generate) # runner = PyTorchRunner(model, disable_jit_freeze=True, func="generate") return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) From 9766d1a8c0fd3451367e2ad35f557fd24a28e2a9 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 13 Dec 2023 15:05:16 +0100 Subject: [PATCH 53/69] wip --- utils/pytorch.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/utils/pytorch.py b/utils/pytorch.py index aee09e5f..9aff5f86 100644 --- a/utils/pytorch.py +++ b/utils/pytorch.py @@ -208,6 +208,9 @@ def apply_jit_script(model): def apply_jit_trace(model, example_inputs): return load_from_cache_or_apply(model, lambda: torch.jit.trace(model, example_inputs)) +def apply_jit_trace_module(model, example_inputs): + return load_from_cache_or_apply(model, lambda: torch.jit.trace_module(model, example_inputs)) + def apply_compile_maybe(model, aio): if os.environ.get("TORCH_COMPILE") != "1": From 38abcae5fe970c3488011c83e81886291972a7de Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 13 Dec 2023 15:10:27 +0100 Subject: [PATCH 54/69] wip --- natural_language_processing/text_generation/gpt/run-v2.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/natural_language_processing/text_generation/gpt/run-v2.py b/natural_language_processing/text_generation/gpt/run-v2.py index 551dc782..e44bf790 100644 --- a/natural_language_processing/text_generation/gpt/run-v2.py +++ b/natural_language_processing/text_generation/gpt/run-v2.py @@ -12,9 +12,13 @@ def run_single_pass(pytorch_runner, lambada): start_ids = lambada.get_input_array()[0] # output = pytorch_runner.run(None, start_ids, num_beams=2, no_repeat_ngram_size=2, # early_stopping=True, max_new_tokens=5) - output = pytorch_runner.run(None, start_ids, do_sample=True, max_length=50, top_p=0.95) + outputs = pytorch_runner.run(None, start_ids, do_sample=True, max_length=50, top_p=0.95) + text = detokenize(outputs[0]) + print(text) quit() + + # output = pytorch_runner.run(None, start_ids) # pytorch_runner.set_task_size(output[1] - start_ids.shape[1]) # logits = output[0] From a2968c69f24f81a0fbedd7ac68c3dfe3c84d8d47 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Wed, 13 Dec 2023 15:16:35 +0100 Subject: [PATCH 55/69] wip --- .../text_generation/gpt/run-v2.py | 26 ++++--------------- 1 file changed, 5 insertions(+), 21 deletions(-) diff --git a/natural_language_processing/text_generation/gpt/run-v2.py b/natural_language_processing/text_generation/gpt/run-v2.py index e44bf790..40879b26 100644 --- a/natural_language_processing/text_generation/gpt/run-v2.py +++ b/natural_language_processing/text_generation/gpt/run-v2.py @@ -13,31 +13,14 @@ def run_single_pass(pytorch_runner, lambada): # output = pytorch_runner.run(None, start_ids, num_beams=2, no_repeat_ngram_size=2, # early_stopping=True, max_new_tokens=5) outputs = pytorch_runner.run(None, start_ids, do_sample=True, max_length=50, top_p=0.95) - - text = detokenize(outputs[0]) - print(text) - quit() - - - # output = pytorch_runner.run(None, start_ids) - # pytorch_runner.set_task_size(output[1] - start_ids.shape[1]) - # logits = output[0] - # token_ids = torch.argmax(logits, dim=-1) - # - # print(type(token_ids)) - # print(token_ids) - # text = tokenizer.decode(token_ids) - # print(text) - # quit() - - output = detokenize(output[0]) + output = detokenize(outputs[0]) for i in range(batch_size): first_new_word = output.replace(detokenize(start_ids[0]), '').split()[0] lambada.submit_prediction(i, first_new_word) - # tokenizer = GPT2Tokenizer.from_pretrained(model_name) - tokenizer = AutoTokenizer.from_pretrained("gpt2") + tokenizer = GPT2Tokenizer.from_pretrained(model_name) + # tokenizer = AutoTokenizer.from_pretrained("gpt2") def detokenize(answer): return tokenizer.decode(answer, skip_special_tokens=True) @@ -50,7 +33,8 @@ def tokenize(text): dataset = Lambada(batch_size, tokenize, detokenize, lambada_path) # model = apply_jit_trace(model, torch.randint(10000, (5,))) inputs = tokenize("Hello, I'm looking for an employment, ") - model.generate = apply_jit_trace_module(model, {"generate": inputs}) + # model.generate = apply_jit_trace_module(model, {"generate": inputs}) + model.generate = apply_jit_trace_module(model, {"generate": dataset.get_input_array()[0]}) # model = apply_jit_trace(model, (dataset.get_input_array()[0],)) runner = PyTorchRunnerV2(model.generate) From f2dee8881a4765e8339ee8e59954fa9a6218c3f2 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Thu, 14 Dec 2023 12:29:48 +0100 Subject: [PATCH 56/69] wip --- natural_language_processing/text_generation/gpt/run-v2.py | 1 + 1 file changed, 1 insertion(+) diff --git a/natural_language_processing/text_generation/gpt/run-v2.py b/natural_language_processing/text_generation/gpt/run-v2.py index 40879b26..96bf5d3d 100644 --- a/natural_language_processing/text_generation/gpt/run-v2.py +++ b/natural_language_processing/text_generation/gpt/run-v2.py @@ -13,6 +13,7 @@ def run_single_pass(pytorch_runner, lambada): # output = pytorch_runner.run(None, start_ids, num_beams=2, no_repeat_ngram_size=2, # early_stopping=True, max_new_tokens=5) outputs = pytorch_runner.run(None, start_ids, do_sample=True, max_length=50, top_p=0.95) + pytorch_runner.set_task_size(outputs.shape[1] - inputs.input_ids.shape[1]) output = detokenize(outputs[0]) for i in range(batch_size): From 0b79157bb3724dbb7f0b29459ac50beb37639d72 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Thu, 14 Dec 2023 12:30:53 +0100 Subject: [PATCH 57/69] wip --- natural_language_processing/text_generation/gpt/run-v2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/natural_language_processing/text_generation/gpt/run-v2.py b/natural_language_processing/text_generation/gpt/run-v2.py index 96bf5d3d..d788c677 100644 --- a/natural_language_processing/text_generation/gpt/run-v2.py +++ b/natural_language_processing/text_generation/gpt/run-v2.py @@ -13,7 +13,7 @@ def run_single_pass(pytorch_runner, lambada): # output = pytorch_runner.run(None, start_ids, num_beams=2, no_repeat_ngram_size=2, # early_stopping=True, max_new_tokens=5) outputs = pytorch_runner.run(None, start_ids, do_sample=True, max_length=50, top_p=0.95) - pytorch_runner.set_task_size(outputs.shape[1] - inputs.input_ids.shape[1]) + pytorch_runner.set_task_size(outputs.shape[1] - start_ids.shape[1]) output = detokenize(outputs[0]) for i in range(batch_size): From f4f2e8f090172d6033ba8ce73c04570f037d5981 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Thu, 14 Dec 2023 13:00:36 +0100 Subject: [PATCH 58/69] cleanup --- .../gpt/{run-v2-compile.py => run-compile.py} | 0 .../text_generation/gpt/run-v2.py | 55 ------------------- .../text_generation/gpt/run.py | 27 ++++++--- .../text_generation/gpt/run1.py | 29 ---------- .../text_generation/gpt/run_example3.py | 42 -------------- 5 files changed, 19 insertions(+), 134 deletions(-) rename natural_language_processing/text_generation/gpt/{run-v2-compile.py => run-compile.py} (100%) delete mode 100644 natural_language_processing/text_generation/gpt/run-v2.py delete mode 100644 natural_language_processing/text_generation/gpt/run1.py delete mode 100644 natural_language_processing/text_generation/gpt/run_example3.py diff --git a/natural_language_processing/text_generation/gpt/run-v2-compile.py b/natural_language_processing/text_generation/gpt/run-compile.py similarity index 100% rename from natural_language_processing/text_generation/gpt/run-v2-compile.py rename to natural_language_processing/text_generation/gpt/run-compile.py diff --git a/natural_language_processing/text_generation/gpt/run-v2.py b/natural_language_processing/text_generation/gpt/run-v2.py deleted file mode 100644 index d788c677..00000000 --- a/natural_language_processing/text_generation/gpt/run-v2.py +++ /dev/null @@ -1,55 +0,0 @@ -import torch -from transformers import GPT2Tokenizer, GPT2LMHeadModel, AutoTokenizer - -from utils.benchmark import run_model -from utils.nlp.lambada import Lambada - - -def run_pytorch_fp32(model_name, batch_size, num_runs, timeout, lambada_path, **kwargs): - from utils.pytorch import PyTorchRunner, PyTorchRunnerV2, apply_jit_trace, apply_jit_script, apply_jit_trace_module - - def run_single_pass(pytorch_runner, lambada): - start_ids = lambada.get_input_array()[0] - # output = pytorch_runner.run(None, start_ids, num_beams=2, no_repeat_ngram_size=2, - # early_stopping=True, max_new_tokens=5) - outputs = pytorch_runner.run(None, start_ids, do_sample=True, max_length=50, top_p=0.95) - pytorch_runner.set_task_size(outputs.shape[1] - start_ids.shape[1]) - output = detokenize(outputs[0]) - - for i in range(batch_size): - first_new_word = output.replace(detokenize(start_ids[0]), '').split()[0] - lambada.submit_prediction(i, first_new_word) - - tokenizer = GPT2Tokenizer.from_pretrained(model_name) - # tokenizer = AutoTokenizer.from_pretrained("gpt2") - - def detokenize(answer): - return tokenizer.decode(answer, skip_special_tokens=True) - - def tokenize(text): - return tokenizer.encode(text, return_tensors='pt') - - model = GPT2LMHeadModel.from_pretrained(model_name, torchscript=True) - model.eval() - dataset = Lambada(batch_size, tokenize, detokenize, lambada_path) - # model = apply_jit_trace(model, torch.randint(10000, (5,))) - inputs = tokenize("Hello, I'm looking for an employment, ") - # model.generate = apply_jit_trace_module(model, {"generate": inputs}) - model.generate = apply_jit_trace_module(model, {"generate": dataset.get_input_array()[0]}) - - # model = apply_jit_trace(model, (dataset.get_input_array()[0],)) - runner = PyTorchRunnerV2(model.generate) - # runner = PyTorchRunner(model, disable_jit_freeze=True, func="generate") - - return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) - - -if __name__ == "__main__": - from utils.helpers import DefaultArgParser - - gpt_variants = ["gpt2"] - parser = DefaultArgParser(["pytorch"]) - parser.require_model_name(gpt_variants) - parser.ask_for_batch_size() - parser.add_argument('--lambada_path', type=str, required=True, help="Path to Lambada dataset") - run_pytorch_fp32(**vars(parser.parse())) diff --git a/natural_language_processing/text_generation/gpt/run.py b/natural_language_processing/text_generation/gpt/run.py index 17c51964..d788c677 100644 --- a/natural_language_processing/text_generation/gpt/run.py +++ b/natural_language_processing/text_generation/gpt/run.py @@ -1,24 +1,27 @@ -from transformers import GPT2Tokenizer, GPT2LMHeadModel +import torch +from transformers import GPT2Tokenizer, GPT2LMHeadModel, AutoTokenizer from utils.benchmark import run_model from utils.nlp.lambada import Lambada def run_pytorch_fp32(model_name, batch_size, num_runs, timeout, lambada_path, **kwargs): - from utils.pytorch import PyTorchRunner + from utils.pytorch import PyTorchRunner, PyTorchRunnerV2, apply_jit_trace, apply_jit_script, apply_jit_trace_module def run_single_pass(pytorch_runner, lambada): start_ids = lambada.get_input_array()[0] - output = pytorch_runner.run(None, start_ids, num_beams=2, no_repeat_ngram_size=2, - early_stopping=True, max_new_tokens=5) - pytorch_runner.set_task_size(output.shape[1] - start_ids.shape[1]) - output = detokenize(output[0]) + # output = pytorch_runner.run(None, start_ids, num_beams=2, no_repeat_ngram_size=2, + # early_stopping=True, max_new_tokens=5) + outputs = pytorch_runner.run(None, start_ids, do_sample=True, max_length=50, top_p=0.95) + pytorch_runner.set_task_size(outputs.shape[1] - start_ids.shape[1]) + output = detokenize(outputs[0]) for i in range(batch_size): first_new_word = output.replace(detokenize(start_ids[0]), '').split()[0] lambada.submit_prediction(i, first_new_word) tokenizer = GPT2Tokenizer.from_pretrained(model_name) + # tokenizer = AutoTokenizer.from_pretrained("gpt2") def detokenize(answer): return tokenizer.decode(answer, skip_special_tokens=True) @@ -26,9 +29,17 @@ def detokenize(answer): def tokenize(text): return tokenizer.encode(text, return_tensors='pt') - model = GPT2LMHeadModel.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id) + model = GPT2LMHeadModel.from_pretrained(model_name, torchscript=True) + model.eval() dataset = Lambada(batch_size, tokenize, detokenize, lambada_path) - runner = PyTorchRunner(model, disable_jit_freeze=True, func="generate") + # model = apply_jit_trace(model, torch.randint(10000, (5,))) + inputs = tokenize("Hello, I'm looking for an employment, ") + # model.generate = apply_jit_trace_module(model, {"generate": inputs}) + model.generate = apply_jit_trace_module(model, {"generate": dataset.get_input_array()[0]}) + + # model = apply_jit_trace(model, (dataset.get_input_array()[0],)) + runner = PyTorchRunnerV2(model.generate) + # runner = PyTorchRunner(model, disable_jit_freeze=True, func="generate") return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) diff --git a/natural_language_processing/text_generation/gpt/run1.py b/natural_language_processing/text_generation/gpt/run1.py deleted file mode 100644 index 586823cc..00000000 --- a/natural_language_processing/text_generation/gpt/run1.py +++ /dev/null @@ -1,29 +0,0 @@ -import torch -from transformers import AutoModelForCausalLM, AutoTokenizer, GPT2LMHeadModel, GPT2Tokenizer - - -tokenizer = GPT2Tokenizer.from_pretrained("gpt2") -# model = GPT2Model.from_pretrained(model_name, torchscript=True) - -model = GPT2LMHeadModel.from_pretrained("gpt2", torchscript=True).eval() -text = "Hi, how are you?" -encoded_input = tokenizer.encode(text, return_tensors='pt') - -model = torch.jit.trace(model, torch.randint(10000, (5,))) -scripted_model = torch.jit.script(model) -# output = model.generate(encoded_input, max_length=30, num_beams=2, no_repeat_ngram_size=2, early_stopping=True) -output = scripted_model(encoded_input) -# torch_out = scripted_model(context) - -print(output) -print(type(output)) -print(len(output)) -generated_text_torch = tokenizer.decode(output) - -# print("Fragment: {}".format(sentence_fragment)) -print("Completed: {}".format(generated_text_torch)) - -# print(output) -# print(type(output)) -# -# print(tokenizer.decode(output[0], skip_special_tokens=True)) \ No newline at end of file diff --git a/natural_language_processing/text_generation/gpt/run_example3.py b/natural_language_processing/text_generation/gpt/run_example3.py deleted file mode 100644 index c3512f76..00000000 --- a/natural_language_processing/text_generation/gpt/run_example3.py +++ /dev/null @@ -1,42 +0,0 @@ -from transformers import AutoTokenizer, GPT2LMHeadModel, AutoModelForCausalLM -import torch -import time -import os -torch.set_num_threads(int(os.environ["AIO_NUM_THREADS"])) - -tokenizer = AutoTokenizer.from_pretrained("gpt2") -model = GPT2LMHeadModel.from_pretrained("gpt2", torchscript=True) - -#tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6b") -#model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6b") - -model.eval() -inputs = tokenizer.encode("Hello, I'm looking for an employment, ", return_tensors="pt") -print("\nNo tracing\n") -with torch.no_grad(): - for n in range(3): - break - x = time.time() - outputs = model.generate(inputs, do_sample=True, max_length=100, top_k=50, top_p=0.95, num_return_sequences=1) - print(f"Run: {n}, throughput: {round(outputs.shape[1] / (time.time() - x), 3)} tps") - - -#model.forward = torch.jit.freeze(torch.jit.trace_module(model, {"forward": inputs})) -model.generate = torch.jit.freeze(torch.jit.trace_module(model, {"generate": inputs})) - -print("\nTracing engaged\n") -# with torch.no_grad(): -# for n in range(3): -# x = time.time() -# outputs = model.generate(inputs, do_sample=True, max_length=100, top_k=50, top_p=0.95, num_return_sequences=1) -# print(f"Run: {n}, throughput: {round(outputs.shape[1] / (time.time() - x), 3)} tps") -# text = tokenizer.decode(outputs[0], skip_special_tokens=True) -# print(text) - - -with torch.no_grad(): - x = time.time() - outputs = model.generate(inputs, do_sample=True, max_length=50, top_p=0.95) - print(f"throughput: {round(outputs.shape[1] / (time.time() - x), 3)} tps") -text = tokenizer.decode(outputs[0], skip_special_tokens=True) -print(text) From d2b84b44d8926e31ecb94c60077b4d5aad503d45 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Thu, 14 Dec 2023 13:11:29 +0100 Subject: [PATCH 59/69] wip --- .../text_generation/gpt/run.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/natural_language_processing/text_generation/gpt/run.py b/natural_language_processing/text_generation/gpt/run.py index d788c677..d0bd625a 100644 --- a/natural_language_processing/text_generation/gpt/run.py +++ b/natural_language_processing/text_generation/gpt/run.py @@ -1,17 +1,14 @@ -import torch -from transformers import GPT2Tokenizer, GPT2LMHeadModel, AutoTokenizer +from transformers import GPT2Tokenizer, GPT2LMHeadModel from utils.benchmark import run_model from utils.nlp.lambada import Lambada def run_pytorch_fp32(model_name, batch_size, num_runs, timeout, lambada_path, **kwargs): - from utils.pytorch import PyTorchRunner, PyTorchRunnerV2, apply_jit_trace, apply_jit_script, apply_jit_trace_module + from utils.pytorch import PyTorchRunnerV2, apply_jit_trace_module def run_single_pass(pytorch_runner, lambada): start_ids = lambada.get_input_array()[0] - # output = pytorch_runner.run(None, start_ids, num_beams=2, no_repeat_ngram_size=2, - # early_stopping=True, max_new_tokens=5) outputs = pytorch_runner.run(None, start_ids, do_sample=True, max_length=50, top_p=0.95) pytorch_runner.set_task_size(outputs.shape[1] - start_ids.shape[1]) output = detokenize(outputs[0]) @@ -21,7 +18,6 @@ def run_single_pass(pytorch_runner, lambada): lambada.submit_prediction(i, first_new_word) tokenizer = GPT2Tokenizer.from_pretrained(model_name) - # tokenizer = AutoTokenizer.from_pretrained("gpt2") def detokenize(answer): return tokenizer.decode(answer, skip_special_tokens=True) @@ -32,14 +28,8 @@ def tokenize(text): model = GPT2LMHeadModel.from_pretrained(model_name, torchscript=True) model.eval() dataset = Lambada(batch_size, tokenize, detokenize, lambada_path) - # model = apply_jit_trace(model, torch.randint(10000, (5,))) - inputs = tokenize("Hello, I'm looking for an employment, ") - # model.generate = apply_jit_trace_module(model, {"generate": inputs}) model.generate = apply_jit_trace_module(model, {"generate": dataset.get_input_array()[0]}) - - # model = apply_jit_trace(model, (dataset.get_input_array()[0],)) runner = PyTorchRunnerV2(model.generate) - # runner = PyTorchRunner(model, disable_jit_freeze=True, func="generate") return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) From b0b358c473cc29ba846212ed9101272d63d8e349 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Thu, 14 Dec 2023 13:33:56 +0100 Subject: [PATCH 60/69] wip --- .../text_generation/gpt-j/run_example.py | 53 +++++++++++++------ 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/natural_language_processing/text_generation/gpt-j/run_example.py b/natural_language_processing/text_generation/gpt-j/run_example.py index 5dd046f8..3ffda850 100644 --- a/natural_language_processing/text_generation/gpt-j/run_example.py +++ b/natural_language_processing/text_generation/gpt-j/run_example.py @@ -1,24 +1,43 @@ -from transformers import AutoTokenizer, AutoModelForCausalLM -from transformers import GPT2LMHeadModel, GPT2Tokenizer +from transformers import AutoTokenizer, GPT2LMHeadModel, AutoModelForCausalLM +import torch +import time +import os +torch.set_num_threads(int(os.environ["AIO_NUM_THREADS"])) tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B") -model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B") -# model = GPT2LMHeadModel.from_pretrained('gpt2', pad_token_id=tokenizer.eos_token_id) +# model = GPT2LMHeadModel.from_pretrained("gpt2", torchscript=True) +model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", pad_token_id=tokenizer.eos_token_id, torchscript=True).eval() -print(tokenizer.decode(tokenizer.eos_token_id)) +#tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6b") +#model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6b") -sentence = "hey, how are you?" -input_ids = tokenizer.encode(sentence, return_tensors='pt') -print(tokenizer.decode(input_ids[0], skip_special_tokens=True)) -# -# print(input_ids) -# print(type(input_ids)) -# quit() +model.eval() +inputs = tokenizer.encode("Hello, I'm looking for an employment, ", return_tensors="pt") +print("\nNo tracing\n") +with torch.no_grad(): + for n in range(3): + break + x = time.time() + outputs = model.generate(inputs, do_sample=True, max_length=100, top_k=50, top_p=0.95, num_return_sequences=1) + print(f"Run: {n}, throughput: {round(outputs.shape[1] / (time.time() - x), 3)} tps") -print('1') -output = model.generate(input_ids, max_length=30, num_beams=2, no_repeat_ngram_size=2, early_stopping=True) -print(output) -print(type(output)) +#model.forward = torch.jit.freeze(torch.jit.trace_module(model, {"forward": inputs})) +model.generate = torch.jit.freeze(torch.jit.trace_module(model, {"generate": inputs})) -print(tokenizer.decode(output[0], skip_special_tokens=True)) \ No newline at end of file +print("\nTracing engaged\n") +with torch.no_grad(): + for n in range(3): + x = time.time() + outputs = model.generate(inputs, do_sample=True, max_length=100, top_k=50, top_p=0.95, num_return_sequences=1) + print(f"Run: {n}, throughput: {round(outputs.shape[1] / (time.time() - x), 3)} tps") +text = tokenizer.decode(outputs[0], skip_special_tokens=True) +print(text) + + + + +# TORCH_COMPILE=1 AIO_NUM_THREADS=80 python natural_language_processing/text_generation/gpt/run-v2-compile.py -m gpt2 --lambada_path /ampere/aml/lambada_test_plain_text.txt --num_runs 4 + +# AIO_NUM_THREADS=80 python natural_language_processing/text_generation/gpt/run_example3.py +# AIO_NUM_THREADS=80 python natural_language_processing/text_generation/gpt/run-v2-compile.py -m gpt2 --lambada_path /ampere/aml/lambada_test_plain_text.txt --num_runs 4 \ No newline at end of file From e7882ddbf0b021be88df01b637bdb62426128e46 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Thu, 14 Dec 2023 14:04:11 +0100 Subject: [PATCH 61/69] wip --- .../text_generation/gpt-j/run_example.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/natural_language_processing/text_generation/gpt-j/run_example.py b/natural_language_processing/text_generation/gpt-j/run_example.py index 3ffda850..6dbb39eb 100644 --- a/natural_language_processing/text_generation/gpt-j/run_example.py +++ b/natural_language_processing/text_generation/gpt-j/run_example.py @@ -14,12 +14,12 @@ model.eval() inputs = tokenizer.encode("Hello, I'm looking for an employment, ", return_tensors="pt") print("\nNo tracing\n") -with torch.no_grad(): - for n in range(3): - break - x = time.time() - outputs = model.generate(inputs, do_sample=True, max_length=100, top_k=50, top_p=0.95, num_return_sequences=1) - print(f"Run: {n}, throughput: {round(outputs.shape[1] / (time.time() - x), 3)} tps") +# with torch.no_grad(): +# for n in range(1): +# break +# x = time.time() +# outputs = model.generate(inputs, do_sample=True, max_length=100, top_k=50, top_p=0.95, num_return_sequences=1) +# print(f"Run: {n}, throughput: {round(outputs.shape[1] / (time.time() - x), 3)} tps") #model.forward = torch.jit.freeze(torch.jit.trace_module(model, {"forward": inputs})) @@ -27,10 +27,9 @@ print("\nTracing engaged\n") with torch.no_grad(): - for n in range(3): - x = time.time() - outputs = model.generate(inputs, do_sample=True, max_length=100, top_k=50, top_p=0.95, num_return_sequences=1) - print(f"Run: {n}, throughput: {round(outputs.shape[1] / (time.time() - x), 3)} tps") + x = time.time() + outputs = model.generate(inputs, do_sample=True, max_length=100, top_k=50, top_p=0.95, num_return_sequences=1) + print(f"throughput: {round(outputs.shape[1] / (time.time() - x), 3)} tps") text = tokenizer.decode(outputs[0], skip_special_tokens=True) print(text) From 3a33cd99fc5c4029a1da81b0023642a3ca83b65e Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Thu, 14 Dec 2023 15:08:28 +0100 Subject: [PATCH 62/69] wip --- .../text_generation/gpt-j/run-v2.py | 16 +++++----------- .../text_generation/gpt/run.py | 3 +-- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/natural_language_processing/text_generation/gpt-j/run-v2.py b/natural_language_processing/text_generation/gpt-j/run-v2.py index eb59a8a9..f0132e2a 100644 --- a/natural_language_processing/text_generation/gpt-j/run-v2.py +++ b/natural_language_processing/text_generation/gpt-j/run-v2.py @@ -6,13 +6,14 @@ def run_pytorch_fp32(model_name, batch_size, num_runs, timeout, lambada_path, **kwargs): - from utils.pytorch import PyTorchRunner, PyTorchRunnerV2, apply_jit_script, apply_jit_trace + from utils.pytorch import PyTorchRunnerV2, apply_jit_trace_module def run_single_pass(pytorch_runner, lambada): start_ids = lambada.get_input_array()[0] - output = pytorch_runner.run(None, start_ids) + output = pytorch_runner.run(None, start_ids, do_sample=True, max_length=50, top_p=0.95) pytorch_runner.set_task_size(output.shape[1] - start_ids.shape[1]) output = detokenize(output[0]) + print(output) for i in range(batch_size): first_new_word = output.replace(detokenize(start_ids[0]), '').split()[0] @@ -28,16 +29,9 @@ def tokenize(text): model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id, torchscript=True).eval() dataset = Lambada(batch_size, tokenize, detokenize, lambada_path) - # model = apply_jit_trace(model, (dataset.get_input_array()[0],)) - with torch.no_grad(): - # model = apply_jit_trace(model, torch.randint(10000, (5,))) - model = apply_jit_trace(model, (dataset.get_input_array()[0],)) - # model = apply_jit_script(model) + model.generate = apply_jit_trace_module(model, {"generate": dataset.get_input_array()[0]}) - runner = PyTorchRunnerV2(model) - - # runner = PyTorchRunner(model, disable_jit_freeze=False, func="generate") - # runner = PyTorchRunnerV2(model) + runner = PyTorchRunnerV2(model.generate) return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) diff --git a/natural_language_processing/text_generation/gpt/run.py b/natural_language_processing/text_generation/gpt/run.py index d0bd625a..315ccd81 100644 --- a/natural_language_processing/text_generation/gpt/run.py +++ b/natural_language_processing/text_generation/gpt/run.py @@ -25,8 +25,7 @@ def detokenize(answer): def tokenize(text): return tokenizer.encode(text, return_tensors='pt') - model = GPT2LMHeadModel.from_pretrained(model_name, torchscript=True) - model.eval() + model = GPT2LMHeadModel.from_pretrained(model_name, torchscript=True).eval() dataset = Lambada(batch_size, tokenize, detokenize, lambada_path) model.generate = apply_jit_trace_module(model, {"generate": dataset.get_input_array()[0]}) runner = PyTorchRunnerV2(model.generate) From 61b904921f6f9f7876232af93940b0850a8b6744 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Thu, 14 Dec 2023 17:10:45 +0100 Subject: [PATCH 63/69] clean --- .../{run-v2-compile.py => run-compile.py} | 9 +--- .../text_generation/gpt-j/run-v2.py | 47 ------------------- .../text_generation/gpt-j/run.py | 15 +++--- .../text_generation/gpt-j/run1.py | 33 ------------- .../text_generation/gpt-j/run2.py | 30 ------------ .../text_generation/gpt-j/run_example.py | 42 ----------------- .../text_generation/gpt/run-compile.py | 24 +--------- 7 files changed, 11 insertions(+), 189 deletions(-) rename natural_language_processing/text_generation/gpt-j/{run-v2-compile.py => run-compile.py} (81%) delete mode 100644 natural_language_processing/text_generation/gpt-j/run-v2.py delete mode 100644 natural_language_processing/text_generation/gpt-j/run1.py delete mode 100644 natural_language_processing/text_generation/gpt-j/run2.py delete mode 100644 natural_language_processing/text_generation/gpt-j/run_example.py diff --git a/natural_language_processing/text_generation/gpt-j/run-v2-compile.py b/natural_language_processing/text_generation/gpt-j/run-compile.py similarity index 81% rename from natural_language_processing/text_generation/gpt-j/run-v2-compile.py rename to natural_language_processing/text_generation/gpt-j/run-compile.py index 978b8ad9..eb2819cc 100644 --- a/natural_language_processing/text_generation/gpt-j/run-v2-compile.py +++ b/natural_language_processing/text_generation/gpt-j/run-compile.py @@ -7,7 +7,7 @@ def run_pytorch_fp32(model_name, batch_size, num_runs, timeout, lambada_path, **kwargs): - from utils.pytorch import PyTorchRunner, PyTorchRunnerV2, apply_jit_script, apply_jit_trace, apply_compile_maybe + from utils.pytorch import PyTorchRunnerV2, apply_compile_maybe def run_single_pass(pytorch_runner, lambada): start_ids = lambada.get_input_array()[0] @@ -27,18 +27,13 @@ def detokenize(answer): def tokenize(text): return tokenizer.encode(text, return_tensors='pt') - model = AutoModelForCausalLM.from_pretrained(model_name) - model.eval() + model = AutoModelForCausalLM.from_pretrained(model_name).eval() dataset = Lambada(batch_size, tokenize, detokenize, lambada_path) - # model = apply_jit_trace(model, (dataset.get_input_array()[0],)) aio = '_aio_profiler_print' in dir(torch._C) and os.environ.get("AIO_PROCESS_MODE") != "0" model.greedy_search = apply_compile_maybe(model.greedy_search, aio) runner = PyTorchRunnerV2(model.generate) - # runner = PyTorchRunner(model, disable_jit_freeze=False, func="generate") - # runner = PyTorchRunnerV2(model) - return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) diff --git a/natural_language_processing/text_generation/gpt-j/run-v2.py b/natural_language_processing/text_generation/gpt-j/run-v2.py deleted file mode 100644 index f0132e2a..00000000 --- a/natural_language_processing/text_generation/gpt-j/run-v2.py +++ /dev/null @@ -1,47 +0,0 @@ -import torch -from transformers import AutoTokenizer, AutoModelForCausalLM - -from utils.benchmark import run_model -from utils.nlp.lambada import Lambada - - -def run_pytorch_fp32(model_name, batch_size, num_runs, timeout, lambada_path, **kwargs): - from utils.pytorch import PyTorchRunnerV2, apply_jit_trace_module - - def run_single_pass(pytorch_runner, lambada): - start_ids = lambada.get_input_array()[0] - output = pytorch_runner.run(None, start_ids, do_sample=True, max_length=50, top_p=0.95) - pytorch_runner.set_task_size(output.shape[1] - start_ids.shape[1]) - output = detokenize(output[0]) - print(output) - - for i in range(batch_size): - first_new_word = output.replace(detokenize(start_ids[0]), '').split()[0] - lambada.submit_prediction(i, first_new_word) - - tokenizer = AutoTokenizer.from_pretrained(model_name) - - def detokenize(answer): - return tokenizer.decode(answer, skip_special_tokens=True) - - def tokenize(text): - return tokenizer.encode(text, return_tensors='pt') - - model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id, torchscript=True).eval() - dataset = Lambada(batch_size, tokenize, detokenize, lambada_path) - model.generate = apply_jit_trace_module(model, {"generate": dataset.get_input_array()[0]}) - - runner = PyTorchRunnerV2(model.generate) - - return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) - - -if __name__ == "__main__": - from utils.helpers import DefaultArgParser - - gpt_variants = ["EleutherAI/gpt-j-6B"] - parser = DefaultArgParser(["pytorch"]) - parser.require_model_name(gpt_variants) - parser.ask_for_batch_size() - parser.add_argument('--lambada_path', type=str, required=True, help="Path to Lambada dataset") - run_pytorch_fp32(**vars(parser.parse())) diff --git a/natural_language_processing/text_generation/gpt-j/run.py b/natural_language_processing/text_generation/gpt-j/run.py index dbd89ea9..a5a210cf 100644 --- a/natural_language_processing/text_generation/gpt-j/run.py +++ b/natural_language_processing/text_generation/gpt-j/run.py @@ -1,3 +1,4 @@ +import torch from transformers import AutoTokenizer, AutoModelForCausalLM from utils.benchmark import run_model @@ -5,12 +6,12 @@ def run_pytorch_fp32(model_name, batch_size, num_runs, timeout, lambada_path, **kwargs): - from utils.pytorch import PyTorchRunner, PyTorchRunnerV2 + from utils.pytorch import PyTorchRunnerV2, apply_jit_trace_module def run_single_pass(pytorch_runner, lambada): start_ids = lambada.get_input_array()[0] - output = pytorch_runner.run(None, start_ids) - # pytorch_runner.set_task_size(output.shape[1] - start_ids.shape[1]) + output = pytorch_runner.run(None, start_ids, do_sample=True, max_length=50, top_p=0.95) + pytorch_runner.set_task_size(output.shape[1] - start_ids.shape[1]) output = detokenize(output[0]) for i in range(batch_size): @@ -25,13 +26,11 @@ def detokenize(answer): def tokenize(text): return tokenizer.encode(text, return_tensors='pt') - model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id, torchscript=True) + model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id, torchscript=True).eval() dataset = Lambada(batch_size, tokenize, detokenize, lambada_path) - runner = PyTorchRunner(model, skip_script=True, disable_jit_freeze=False, - example_inputs=(dataset.get_input_array()[0],)) + model.generate = apply_jit_trace_module(model, {"generate": dataset.get_input_array()[0]}) - # runner = PyTorchRunner(model, disable_jit_freeze=False, func="generate") - # runner = PyTorchRunnerV2(model) + runner = PyTorchRunnerV2(model.generate) return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) diff --git a/natural_language_processing/text_generation/gpt-j/run1.py b/natural_language_processing/text_generation/gpt-j/run1.py deleted file mode 100644 index e2bae906..00000000 --- a/natural_language_processing/text_generation/gpt-j/run1.py +++ /dev/null @@ -1,33 +0,0 @@ -import torch -from transformers import AutoModelForCausalLM, AutoTokenizer -from transformers import GPT2LMHeadModel, GPT2Tokenizer - -# tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B") -tokenizer = GPT2Tokenizer.from_pretrained("gpt2") -model = GPT2LMHeadModel.from_pretrained("gpt2", torchscript=True) - -# model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", pad_token_id=tokenizer.eos_token_id, torchscript=True) -text = "Hi, how are you?" -encoded_input = tokenizer(text, return_tensors='pt') -# print(encoded_input) -input_dict = {key: value for key, value in encoded_input.items()} - -def model_wrapper(input_ids): - return model(input_ids)[0] - -# traced_model = torch.jit.trace(model, (input_dict['input_ids'],)) -traced_model = torch.jit.trace(model_wrapper, encoded_input) - -frozen_model = torch.jit.freeze(traced_model) - -#output = frozen_model(**encoded_input) - -with torch.no_grad(): - output = frozen_model(encoded_input) - -decoded_output = tokenizer.decode(output.argmax(dim=-1)[0]) - -# output_ids = output.logits.argmax(dim=-1) -# decoded_output = tokenizer.decode(output_ids[0]) -# -# tokenizer.decode(output, skip_special_tokens=True) diff --git a/natural_language_processing/text_generation/gpt-j/run2.py b/natural_language_processing/text_generation/gpt-j/run2.py deleted file mode 100644 index 26f9ceaa..00000000 --- a/natural_language_processing/text_generation/gpt-j/run2.py +++ /dev/null @@ -1,30 +0,0 @@ -import torch -from transformers import GPT2LMHeadModel, GPT2Tokenizer - -# Load the model and tokenizer -model = GPT2LMHeadModel.from_pretrained("gpt2") -tokenizer = GPT2Tokenizer.from_pretrained("gpt2") - -model.eval() -# Prepare the input -sentence = "Hello, how are you?" -inputs = tokenizer.encode(sentence, return_tensors="pt") - -# Define a wrapper function for the model to return only logits (tensors) -def model_wrapper(input_ids): - return model(input_ids, return_dict=False)[0] - -# Trace the model with the wrapper function -traced_model = torch.jit.trace(model_wrapper, inputs) - -# Freeze the model -frozen_model = torch.jit.freeze(traced_model) - -# Generate output -with torch.no_grad(): - outputs = frozen_model(inputs) - -# Decode the output -decoded_output = tokenizer.decode(outputs.argmax(dim=-1)[0]) - -print(decoded_output) \ No newline at end of file diff --git a/natural_language_processing/text_generation/gpt-j/run_example.py b/natural_language_processing/text_generation/gpt-j/run_example.py deleted file mode 100644 index 6dbb39eb..00000000 --- a/natural_language_processing/text_generation/gpt-j/run_example.py +++ /dev/null @@ -1,42 +0,0 @@ -from transformers import AutoTokenizer, GPT2LMHeadModel, AutoModelForCausalLM -import torch -import time -import os -torch.set_num_threads(int(os.environ["AIO_NUM_THREADS"])) - -tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B") -# model = GPT2LMHeadModel.from_pretrained("gpt2", torchscript=True) -model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", pad_token_id=tokenizer.eos_token_id, torchscript=True).eval() - -#tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6b") -#model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6b") - -model.eval() -inputs = tokenizer.encode("Hello, I'm looking for an employment, ", return_tensors="pt") -print("\nNo tracing\n") -# with torch.no_grad(): -# for n in range(1): -# break -# x = time.time() -# outputs = model.generate(inputs, do_sample=True, max_length=100, top_k=50, top_p=0.95, num_return_sequences=1) -# print(f"Run: {n}, throughput: {round(outputs.shape[1] / (time.time() - x), 3)} tps") - - -#model.forward = torch.jit.freeze(torch.jit.trace_module(model, {"forward": inputs})) -model.generate = torch.jit.freeze(torch.jit.trace_module(model, {"generate": inputs})) - -print("\nTracing engaged\n") -with torch.no_grad(): - x = time.time() - outputs = model.generate(inputs, do_sample=True, max_length=100, top_k=50, top_p=0.95, num_return_sequences=1) - print(f"throughput: {round(outputs.shape[1] / (time.time() - x), 3)} tps") -text = tokenizer.decode(outputs[0], skip_special_tokens=True) -print(text) - - - - -# TORCH_COMPILE=1 AIO_NUM_THREADS=80 python natural_language_processing/text_generation/gpt/run-v2-compile.py -m gpt2 --lambada_path /ampere/aml/lambada_test_plain_text.txt --num_runs 4 - -# AIO_NUM_THREADS=80 python natural_language_processing/text_generation/gpt/run_example3.py -# AIO_NUM_THREADS=80 python natural_language_processing/text_generation/gpt/run-v2-compile.py -m gpt2 --lambada_path /ampere/aml/lambada_test_plain_text.txt --num_runs 4 \ No newline at end of file diff --git a/natural_language_processing/text_generation/gpt/run-compile.py b/natural_language_processing/text_generation/gpt/run-compile.py index 0f4b568d..a0281259 100644 --- a/natural_language_processing/text_generation/gpt/run-compile.py +++ b/natural_language_processing/text_generation/gpt/run-compile.py @@ -13,27 +13,10 @@ def run_pytorch_fp32(model_name, batch_size, num_runs, timeout, lambada_path, ** def run_single_pass(pytorch_runner, lambada): start_ids = lambada.get_input_array()[0] output = pytorch_runner.run(inputs=start_ids, max_new_tokens=10) - + output = detokenize(output[0]) print(output) - print(type(output)) quit() - # output = pytorch_runner.run(None, start_ids) - # pytorch_runner.set_task_size(output[1] - start_ids.shape[1]) - # logits = output[0] - # token_ids = torch.argmax(logits, dim=-1) - - # print(type(token_ids)) - # print(token_ids) - # text = tokenizer.decode(token_ids) - # print(text) - # quit() - - # output = detokenize(output[0]) - - # for i in range(batch_size): - # first_new_word = output.replace(detokenize(start_ids[0]), '').split()[0] - # lambada.submit_prediction(i, first_new_word) tokenizer = GPT2Tokenizer.from_pretrained(model_name) @@ -43,14 +26,11 @@ def detokenize(answer): def tokenize(text): return tokenizer.encode(text, return_tensors='pt') - model = GPT2LMHeadModel.from_pretrained(model_name, torchscript=True) - model.eval() + model = GPT2LMHeadModel.from_pretrained(model_name, torchscript=True).eval() dataset = Lambada(batch_size, tokenize, detokenize, lambada_path) aio = '_aio_profiler_print' in dir(torch._C) and os.environ.get("AIO_PROCESS_MODE") != "0" model.greedy_search = apply_compile_maybe(model.greedy_search, aio) - runner = PyTorchRunnerV2(model.generate) - # runner = PyTorchRunner(model, disable_jit_freeze=True, func="generate") return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) From e300b428d0c1d6535bc1d36c74b364d476fef9ad Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Thu, 14 Dec 2023 17:48:07 +0100 Subject: [PATCH 64/69] wip --- .../text_generation/gpt/run-compile.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/natural_language_processing/text_generation/gpt/run-compile.py b/natural_language_processing/text_generation/gpt/run-compile.py index a0281259..73d97f60 100644 --- a/natural_language_processing/text_generation/gpt/run-compile.py +++ b/natural_language_processing/text_generation/gpt/run-compile.py @@ -13,10 +13,12 @@ def run_pytorch_fp32(model_name, batch_size, num_runs, timeout, lambada_path, ** def run_single_pass(pytorch_runner, lambada): start_ids = lambada.get_input_array()[0] output = pytorch_runner.run(inputs=start_ids, max_new_tokens=10) + pytorch_runner.set_task_size(output.shape[1] - start_ids.shape[1]) output = detokenize(output[0]) - print(output) - quit() + for i in range(batch_size): + first_new_word = output.replace(detokenize(start_ids[0]), '').split()[0] + lambada.submit_prediction(i, first_new_word) tokenizer = GPT2Tokenizer.from_pretrained(model_name) From fa6bf5216e4473ba8dcb0f0638d8b4ccad6ec5ba Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Mon, 18 Dec 2023 15:24:42 +0100 Subject: [PATCH 65/69] wip --- .../text_generation/gpt-j/run-compile.py | 48 ------------------- .../text_generation/gpt-j/run.py | 46 ------------------ .../text_generation/gpt/run-compile.py | 48 ------------------- .../text_generation/gpt/run.py | 14 ++++-- 4 files changed, 9 insertions(+), 147 deletions(-) delete mode 100644 natural_language_processing/text_generation/gpt-j/run-compile.py delete mode 100644 natural_language_processing/text_generation/gpt-j/run.py delete mode 100644 natural_language_processing/text_generation/gpt/run-compile.py diff --git a/natural_language_processing/text_generation/gpt-j/run-compile.py b/natural_language_processing/text_generation/gpt-j/run-compile.py deleted file mode 100644 index eb2819cc..00000000 --- a/natural_language_processing/text_generation/gpt-j/run-compile.py +++ /dev/null @@ -1,48 +0,0 @@ -import os -import torch -from transformers import AutoTokenizer, AutoModelForCausalLM - -from utils.benchmark import run_model -from utils.nlp.lambada import Lambada - - -def run_pytorch_fp32(model_name, batch_size, num_runs, timeout, lambada_path, **kwargs): - from utils.pytorch import PyTorchRunnerV2, apply_compile_maybe - - def run_single_pass(pytorch_runner, lambada): - start_ids = lambada.get_input_array()[0] - output = pytorch_runner.run(inputs=start_ids, max_new_tokens=10) - pytorch_runner.set_task_size(output.shape[1] - start_ids.shape[1]) - output = detokenize(output[0]) - - for i in range(batch_size): - first_new_word = output.replace(detokenize(start_ids[0]), '').split()[0] - lambada.submit_prediction(i, first_new_word) - - tokenizer = AutoTokenizer.from_pretrained(model_name) - - def detokenize(answer): - return tokenizer.decode(answer, skip_special_tokens=True) - - def tokenize(text): - return tokenizer.encode(text, return_tensors='pt') - - model = AutoModelForCausalLM.from_pretrained(model_name).eval() - dataset = Lambada(batch_size, tokenize, detokenize, lambada_path) - aio = '_aio_profiler_print' in dir(torch._C) and os.environ.get("AIO_PROCESS_MODE") != "0" - model.greedy_search = apply_compile_maybe(model.greedy_search, aio) - - runner = PyTorchRunnerV2(model.generate) - - return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) - - -if __name__ == "__main__": - from utils.helpers import DefaultArgParser - - gpt_variants = ["EleutherAI/gpt-j-6B"] - parser = DefaultArgParser(["pytorch"]) - parser.require_model_name(gpt_variants) - parser.ask_for_batch_size() - parser.add_argument('--lambada_path', type=str, required=True, help="Path to Lambada dataset") - run_pytorch_fp32(**vars(parser.parse())) diff --git a/natural_language_processing/text_generation/gpt-j/run.py b/natural_language_processing/text_generation/gpt-j/run.py deleted file mode 100644 index a5a210cf..00000000 --- a/natural_language_processing/text_generation/gpt-j/run.py +++ /dev/null @@ -1,46 +0,0 @@ -import torch -from transformers import AutoTokenizer, AutoModelForCausalLM - -from utils.benchmark import run_model -from utils.nlp.lambada import Lambada - - -def run_pytorch_fp32(model_name, batch_size, num_runs, timeout, lambada_path, **kwargs): - from utils.pytorch import PyTorchRunnerV2, apply_jit_trace_module - - def run_single_pass(pytorch_runner, lambada): - start_ids = lambada.get_input_array()[0] - output = pytorch_runner.run(None, start_ids, do_sample=True, max_length=50, top_p=0.95) - pytorch_runner.set_task_size(output.shape[1] - start_ids.shape[1]) - output = detokenize(output[0]) - - for i in range(batch_size): - first_new_word = output.replace(detokenize(start_ids[0]), '').split()[0] - lambada.submit_prediction(i, first_new_word) - - tokenizer = AutoTokenizer.from_pretrained(model_name) - - def detokenize(answer): - return tokenizer.decode(answer, skip_special_tokens=True) - - def tokenize(text): - return tokenizer.encode(text, return_tensors='pt') - - model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id, torchscript=True).eval() - dataset = Lambada(batch_size, tokenize, detokenize, lambada_path) - model.generate = apply_jit_trace_module(model, {"generate": dataset.get_input_array()[0]}) - - runner = PyTorchRunnerV2(model.generate) - - return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) - - -if __name__ == "__main__": - from utils.helpers import DefaultArgParser - - gpt_variants = ["EleutherAI/gpt-j-6B"] - parser = DefaultArgParser(["pytorch"]) - parser.require_model_name(gpt_variants) - parser.ask_for_batch_size() - parser.add_argument('--lambada_path', type=str, required=True, help="Path to Lambada dataset") - run_pytorch_fp32(**vars(parser.parse())) diff --git a/natural_language_processing/text_generation/gpt/run-compile.py b/natural_language_processing/text_generation/gpt/run-compile.py deleted file mode 100644 index 73d97f60..00000000 --- a/natural_language_processing/text_generation/gpt/run-compile.py +++ /dev/null @@ -1,48 +0,0 @@ -import os - -import torch -from transformers import GPT2Tokenizer, GPT2LMHeadModel - -from utils.benchmark import run_model -from utils.nlp.lambada import Lambada - - -def run_pytorch_fp32(model_name, batch_size, num_runs, timeout, lambada_path, **kwargs): - from utils.pytorch import PyTorchRunner, PyTorchRunnerV2, apply_jit_trace, apply_jit_script, apply_compile_maybe - - def run_single_pass(pytorch_runner, lambada): - start_ids = lambada.get_input_array()[0] - output = pytorch_runner.run(inputs=start_ids, max_new_tokens=10) - pytorch_runner.set_task_size(output.shape[1] - start_ids.shape[1]) - output = detokenize(output[0]) - - for i in range(batch_size): - first_new_word = output.replace(detokenize(start_ids[0]), '').split()[0] - lambada.submit_prediction(i, first_new_word) - - tokenizer = GPT2Tokenizer.from_pretrained(model_name) - - def detokenize(answer): - return tokenizer.decode(answer, skip_special_tokens=True) - - def tokenize(text): - return tokenizer.encode(text, return_tensors='pt') - - model = GPT2LMHeadModel.from_pretrained(model_name, torchscript=True).eval() - dataset = Lambada(batch_size, tokenize, detokenize, lambada_path) - aio = '_aio_profiler_print' in dir(torch._C) and os.environ.get("AIO_PROCESS_MODE") != "0" - model.greedy_search = apply_compile_maybe(model.greedy_search, aio) - runner = PyTorchRunnerV2(model.generate) - - return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) - - -if __name__ == "__main__": - from utils.helpers import DefaultArgParser - - gpt_variants = ["gpt2"] - parser = DefaultArgParser(["pytorch"]) - parser.require_model_name(gpt_variants) - parser.ask_for_batch_size() - parser.add_argument('--lambada_path', type=str, required=True, help="Path to Lambada dataset") - run_pytorch_fp32(**vars(parser.parse())) diff --git a/natural_language_processing/text_generation/gpt/run.py b/natural_language_processing/text_generation/gpt/run.py index 315ccd81..73d97f60 100644 --- a/natural_language_processing/text_generation/gpt/run.py +++ b/natural_language_processing/text_generation/gpt/run.py @@ -1,3 +1,6 @@ +import os + +import torch from transformers import GPT2Tokenizer, GPT2LMHeadModel from utils.benchmark import run_model @@ -5,13 +8,13 @@ def run_pytorch_fp32(model_name, batch_size, num_runs, timeout, lambada_path, **kwargs): - from utils.pytorch import PyTorchRunnerV2, apply_jit_trace_module + from utils.pytorch import PyTorchRunner, PyTorchRunnerV2, apply_jit_trace, apply_jit_script, apply_compile_maybe def run_single_pass(pytorch_runner, lambada): start_ids = lambada.get_input_array()[0] - outputs = pytorch_runner.run(None, start_ids, do_sample=True, max_length=50, top_p=0.95) - pytorch_runner.set_task_size(outputs.shape[1] - start_ids.shape[1]) - output = detokenize(outputs[0]) + output = pytorch_runner.run(inputs=start_ids, max_new_tokens=10) + pytorch_runner.set_task_size(output.shape[1] - start_ids.shape[1]) + output = detokenize(output[0]) for i in range(batch_size): first_new_word = output.replace(detokenize(start_ids[0]), '').split()[0] @@ -27,7 +30,8 @@ def tokenize(text): model = GPT2LMHeadModel.from_pretrained(model_name, torchscript=True).eval() dataset = Lambada(batch_size, tokenize, detokenize, lambada_path) - model.generate = apply_jit_trace_module(model, {"generate": dataset.get_input_array()[0]}) + aio = '_aio_profiler_print' in dir(torch._C) and os.environ.get("AIO_PROCESS_MODE") != "0" + model.greedy_search = apply_compile_maybe(model.greedy_search, aio) runner = PyTorchRunnerV2(model.generate) return run_model(run_single_pass, runner, dataset, batch_size, num_runs, timeout) From 097f2b5796bca5034076b3f6453e84d12b0b1ad5 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Mon, 18 Dec 2023 15:32:28 +0100 Subject: [PATCH 66/69] wip --- natural_language_processing/text_generation/gpt/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/natural_language_processing/text_generation/gpt/run.py b/natural_language_processing/text_generation/gpt/run.py index 73d97f60..94b80d7f 100644 --- a/natural_language_processing/text_generation/gpt/run.py +++ b/natural_language_processing/text_generation/gpt/run.py @@ -8,7 +8,7 @@ def run_pytorch_fp32(model_name, batch_size, num_runs, timeout, lambada_path, **kwargs): - from utils.pytorch import PyTorchRunner, PyTorchRunnerV2, apply_jit_trace, apply_jit_script, apply_compile_maybe + from utils.pytorch import PyTorchRunnerV2, apply_compile_maybe def run_single_pass(pytorch_runner, lambada): start_ids = lambada.get_input_array()[0] From ac835280836233b9772fe1d00a02de5c77607a14 Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Mon, 18 Dec 2023 16:07:56 +0100 Subject: [PATCH 67/69] wip --- natural_language_processing/text_generation/gpt/run.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/natural_language_processing/text_generation/gpt/run.py b/natural_language_processing/text_generation/gpt/run.py index 94b80d7f..b8e9978e 100644 --- a/natural_language_processing/text_generation/gpt/run.py +++ b/natural_language_processing/text_generation/gpt/run.py @@ -12,7 +12,7 @@ def run_pytorch_fp32(model_name, batch_size, num_runs, timeout, lambada_path, ** def run_single_pass(pytorch_runner, lambada): start_ids = lambada.get_input_array()[0] - output = pytorch_runner.run(inputs=start_ids, max_new_tokens=10) + output = pytorch_runner.run(inputs=start_ids, max_new_tokens=10, pad_token_id=tokenizer.pad_token_id) pytorch_runner.set_task_size(output.shape[1] - start_ids.shape[1]) output = detokenize(output[0]) @@ -21,6 +21,8 @@ def run_single_pass(pytorch_runner, lambada): lambada.submit_prediction(i, first_new_word) tokenizer = GPT2Tokenizer.from_pretrained(model_name) + if tokenizer.pad_token is None: + tokenizer.pad_token = tokenizer.eos_token def detokenize(answer): return tokenizer.decode(answer, skip_special_tokens=True) From 9d740281d554840136eb8b9ab26bb79db177bd8e Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Mon, 18 Dec 2023 16:13:45 +0100 Subject: [PATCH 68/69] wip --- .../text_generation/gpt/run.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/natural_language_processing/text_generation/gpt/run.py b/natural_language_processing/text_generation/gpt/run.py index b8e9978e..63cd38ae 100644 --- a/natural_language_processing/text_generation/gpt/run.py +++ b/natural_language_processing/text_generation/gpt/run.py @@ -1,7 +1,7 @@ import os import torch -from transformers import GPT2Tokenizer, GPT2LMHeadModel +from transformers import GPT2Tokenizer, GPT2LMHeadModel, AutoTokenizer from utils.benchmark import run_model from utils.nlp.lambada import Lambada @@ -12,7 +12,8 @@ def run_pytorch_fp32(model_name, batch_size, num_runs, timeout, lambada_path, ** def run_single_pass(pytorch_runner, lambada): start_ids = lambada.get_input_array()[0] - output = pytorch_runner.run(inputs=start_ids, max_new_tokens=10, pad_token_id=tokenizer.pad_token_id) + # output = pytorch_runner.run(inputs=start_ids, max_new_tokens=10, pad_token_id=tokenizer.pad_token_id) + output = pytorch_runner.run(inputs=start_ids, max_new_tokens=10) pytorch_runner.set_task_size(output.shape[1] - start_ids.shape[1]) output = detokenize(output[0]) @@ -20,9 +21,10 @@ def run_single_pass(pytorch_runner, lambada): first_new_word = output.replace(detokenize(start_ids[0]), '').split()[0] lambada.submit_prediction(i, first_new_word) - tokenizer = GPT2Tokenizer.from_pretrained(model_name) - if tokenizer.pad_token is None: - tokenizer.pad_token = tokenizer.eos_token + # tokenizer = GPT2Tokenizer.from_pretrained(model_name) + tokenizer = AutoTokenizer.from_pretrained(model_name) + # if tokenizer.pad_token is None: + # tokenizer.pad_token = tokenizer.eos_token def detokenize(answer): return tokenizer.decode(answer, skip_special_tokens=True) From 0944b67bc00d3b4b765d6bee3882a33fb106074c Mon Sep 17 00:00:00 2001 From: Marcel Wilnicki Date: Mon, 18 Dec 2023 16:16:23 +0100 Subject: [PATCH 69/69] wip --- .../text_generation/gpt/run.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/natural_language_processing/text_generation/gpt/run.py b/natural_language_processing/text_generation/gpt/run.py index 63cd38ae..b8e9978e 100644 --- a/natural_language_processing/text_generation/gpt/run.py +++ b/natural_language_processing/text_generation/gpt/run.py @@ -1,7 +1,7 @@ import os import torch -from transformers import GPT2Tokenizer, GPT2LMHeadModel, AutoTokenizer +from transformers import GPT2Tokenizer, GPT2LMHeadModel from utils.benchmark import run_model from utils.nlp.lambada import Lambada @@ -12,8 +12,7 @@ def run_pytorch_fp32(model_name, batch_size, num_runs, timeout, lambada_path, ** def run_single_pass(pytorch_runner, lambada): start_ids = lambada.get_input_array()[0] - # output = pytorch_runner.run(inputs=start_ids, max_new_tokens=10, pad_token_id=tokenizer.pad_token_id) - output = pytorch_runner.run(inputs=start_ids, max_new_tokens=10) + output = pytorch_runner.run(inputs=start_ids, max_new_tokens=10, pad_token_id=tokenizer.pad_token_id) pytorch_runner.set_task_size(output.shape[1] - start_ids.shape[1]) output = detokenize(output[0]) @@ -21,10 +20,9 @@ def run_single_pass(pytorch_runner, lambada): first_new_word = output.replace(detokenize(start_ids[0]), '').split()[0] lambada.submit_prediction(i, first_new_word) - # tokenizer = GPT2Tokenizer.from_pretrained(model_name) - tokenizer = AutoTokenizer.from_pretrained(model_name) - # if tokenizer.pad_token is None: - # tokenizer.pad_token = tokenizer.eos_token + tokenizer = GPT2Tokenizer.from_pretrained(model_name) + if tokenizer.pad_token is None: + tokenizer.pad_token = tokenizer.eos_token def detokenize(answer): return tokenizer.decode(answer, skip_special_tokens=True)