From 0da6b17518210038ba1a2849c31c150f112317a6 Mon Sep 17 00:00:00 2001 From: Karol Kontny Date: Fri, 10 May 2024 07:56:14 +0200 Subject: [PATCH 1/4] Changes to run transformers 4.40 --- natural_language_processing/text_generation/alpaca/run.py | 8 +++++--- natural_language_processing/text_generation/llama2/run.py | 6 ++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/natural_language_processing/text_generation/alpaca/run.py b/natural_language_processing/text_generation/alpaca/run.py index 0e095813..0ab37880 100644 --- a/natural_language_processing/text_generation/alpaca/run.py +++ b/natural_language_processing/text_generation/alpaca/run.py @@ -22,11 +22,13 @@ def run_pytorch(model_path, num_runs, timeout, dataset_path, use_torch_fp16=Fals from utils.nlp.alpaca_instruct import AlpacaInstruct from utils.pytorch import PyTorchRunnerV2, apply_compile from utils.benchmark import run_model - from transformers import AutoModelForCausalLM, AutoTokenizer + from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig def run_single_pass(pytorch_runner, _dataset): inputs = encode(_dataset.get_input_string()) - outputs = pytorch_runner.run(inputs=inputs.input_ids, max_new_tokens=100) + config = GenerationConfig() + config.max_new_tokens = 100 + outputs = pytorch_runner.run(inputs=inputs.input_ids, generation_config=config) pytorch_runner.set_task_size(outputs.shape[1] - inputs.input_ids.shape[1]) response = decode(outputs[:, inputs.input_ids.shape[1]:]) _dataset.submit_prediction(response) @@ -35,7 +37,7 @@ def run_single_pass(pytorch_runner, _dataset): if use_torch_fp16: model = model.half() model.eval() - model.greedy_search = apply_compile(model.greedy_search) + model.generate = apply_compile(model.generate) tokenizer = AutoTokenizer.from_pretrained(model_path) dataset = AlpacaInstruct(1, dataset_path=dataset_path) diff --git a/natural_language_processing/text_generation/llama2/run.py b/natural_language_processing/text_generation/llama2/run.py index 74d46d4e..fed29b86 100644 --- a/natural_language_processing/text_generation/llama2/run.py +++ b/natural_language_processing/text_generation/llama2/run.py @@ -22,12 +22,14 @@ def run_pytorch(model_name, batch_size, num_runs, timeout, dataset_path, use_tor from utils.pytorch import PyTorchRunnerV2, apply_compile from utils.benchmark import run_model from utils.nlp.alpaca_instruct import AlpacaInstruct - from transformers import LlamaForCausalLM, AutoTokenizer + from transformers import LlamaForCausalLM, AutoTokenizer, GenerationConfig def run_single_pass(pytorch_runner, _dataset): input_tensor = tokenizer.encode(_dataset.get_input_string(), return_tensors="pt") input_tensor = torch.cat([input_tensor for _ in range(batch_size)], 0) - output = pytorch_runner.run(inputs=input_tensor, max_length=400) + config = GenerationConfig() + config.max_length = 400 + output = pytorch_runner.run(inputs=input_tensor, generation_config=config) pytorch_runner.set_task_size(sum([len(output[i]) - len(input_tensor[i]) for i in range(batch_size)])) for i in range(batch_size): _dataset.submit_prediction(tokenizer.decode(output[i][len(input_tensor[i]):], skip_special_tokens=True)) From 5ac446b11793ff24d286c25c39bbed311d0d1d3c Mon Sep 17 00:00:00 2001 From: Karol Kontny Date: Fri, 10 May 2024 11:17:37 +0200 Subject: [PATCH 2/4] Try with forward --- natural_language_processing/text_generation/alpaca/run.py | 2 +- natural_language_processing/text_generation/llama2/run.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/natural_language_processing/text_generation/alpaca/run.py b/natural_language_processing/text_generation/alpaca/run.py index 0ab37880..6c234f43 100644 --- a/natural_language_processing/text_generation/alpaca/run.py +++ b/natural_language_processing/text_generation/alpaca/run.py @@ -37,7 +37,7 @@ def run_single_pass(pytorch_runner, _dataset): if use_torch_fp16: model = model.half() model.eval() - model.generate = apply_compile(model.generate) + model.forward = apply_compile(model.forward) tokenizer = AutoTokenizer.from_pretrained(model_path) dataset = AlpacaInstruct(1, dataset_path=dataset_path) diff --git a/natural_language_processing/text_generation/llama2/run.py b/natural_language_processing/text_generation/llama2/run.py index fed29b86..182e1797 100644 --- a/natural_language_processing/text_generation/llama2/run.py +++ b/natural_language_processing/text_generation/llama2/run.py @@ -43,7 +43,7 @@ def run_single_pass(pytorch_runner, _dataset): model.eval() if use_torch_fp16: model = model.half() - model.generate = apply_compile(model.generate) + model.forward = apply_compile(model.forward) tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side='left') tokenizer.add_special_tokens({'pad_token': tokenizer.eos_token}) From 87658ed6da25194986538b3b9f1965bdccc98cbd Mon Sep 17 00:00:00 2001 From: Karol Kontny Date: Mon, 13 May 2024 11:48:46 +0200 Subject: [PATCH 3/4] Update transformers and tokenizers --- setup_deb.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup_deb.sh b/setup_deb.sh index 8342a83f..863d569e 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -93,7 +93,7 @@ pip3 install --no-deps --upgrade \ cchardet==2.1.7 pip3 install --no-build-isolation --upgrade \ - git+https://github.com/AmpereComputingAI/transformers.git@ampere/v4.36 + git+https://github.com/AmpereComputingAI/transformers.git@ampere/v4.40 # dependencies of dependencies pip3 install --no-deps --upgrade \ @@ -116,7 +116,7 @@ pip3 install --no-deps --upgrade \ scikit-image==0.19.2 \ scikit-learn==1.0.2 \ threadpoolctl==3.1.0 \ - tokenizers==0.14.1 \ + tokenizers==0.19.1 \ tabulate==0.9.0 \ regex==2022.3.15 \ portalocker==2.6.0 \ From de23227e236cacd0ce45213d6e2dacce4042f80b Mon Sep 17 00:00:00 2001 From: Karol Kontny Date: Thu, 9 May 2024 15:22:06 +0000 Subject: [PATCH 4/4] Compile only encoder and decoder instead of whole generate --- speech_recognition/whisper/run_hf.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/speech_recognition/whisper/run_hf.py b/speech_recognition/whisper/run_hf.py index 9095ff2e..2950dc23 100644 --- a/speech_recognition/whisper/run_hf.py +++ b/speech_recognition/whisper/run_hf.py @@ -38,7 +38,8 @@ def run_pytorch_fp32(model_name, batch_size, num_runs, timeout, **kwargs): librispeech = LibriSpeech() # reset model = model.generate else: - model = apply_compile(model.generate) + model.forward = apply_compile(model.forward) + model.model.encoder = apply_compile(model.model.encoder) def single_pass_pytorch(_runner, _librispeech): waveform = [_librispeech.get_input_array() for _ in range(batch_size)] @@ -49,7 +50,7 @@ def single_pass_pytorch(_runner, _librispeech): for i in range(batch_size): _librispeech.submit_transcription(decoded_output[i].lstrip().replace(",", "").replace(".", "").upper()) - runner = PyTorchRunnerV2(model, throughput_only=True) + runner = PyTorchRunnerV2(model.generate, throughput_only=True) print_warning_message("Sampling rate Whisper operates at is 16,000 Hz, therefore throughput values below can be " "divided by 16,000 to derive 'seconds of processed audio per second'") return run_model(single_pass_pytorch, runner, librispeech, batch_size, num_runs, timeout)