diff --git a/natural_language_processing/text_generation/alpaca/run.py b/natural_language_processing/text_generation/alpaca/run.py index 0e095813..6c234f43 100644 --- a/natural_language_processing/text_generation/alpaca/run.py +++ b/natural_language_processing/text_generation/alpaca/run.py @@ -22,11 +22,13 @@ def run_pytorch(model_path, num_runs, timeout, dataset_path, use_torch_fp16=Fals from utils.nlp.alpaca_instruct import AlpacaInstruct from utils.pytorch import PyTorchRunnerV2, apply_compile from utils.benchmark import run_model - from transformers import AutoModelForCausalLM, AutoTokenizer + from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig def run_single_pass(pytorch_runner, _dataset): inputs = encode(_dataset.get_input_string()) - outputs = pytorch_runner.run(inputs=inputs.input_ids, max_new_tokens=100) + config = GenerationConfig() + config.max_new_tokens = 100 + outputs = pytorch_runner.run(inputs=inputs.input_ids, generation_config=config) pytorch_runner.set_task_size(outputs.shape[1] - inputs.input_ids.shape[1]) response = decode(outputs[:, inputs.input_ids.shape[1]:]) _dataset.submit_prediction(response) @@ -35,7 +37,7 @@ def run_single_pass(pytorch_runner, _dataset): if use_torch_fp16: model = model.half() model.eval() - model.greedy_search = apply_compile(model.greedy_search) + model.forward = apply_compile(model.forward) tokenizer = AutoTokenizer.from_pretrained(model_path) dataset = AlpacaInstruct(1, dataset_path=dataset_path) diff --git a/natural_language_processing/text_generation/llama2/run.py b/natural_language_processing/text_generation/llama2/run.py index 74d46d4e..182e1797 100644 --- a/natural_language_processing/text_generation/llama2/run.py +++ b/natural_language_processing/text_generation/llama2/run.py @@ -22,12 +22,14 @@ def run_pytorch(model_name, batch_size, num_runs, timeout, dataset_path, use_tor from utils.pytorch import PyTorchRunnerV2, apply_compile from utils.benchmark import run_model from utils.nlp.alpaca_instruct import AlpacaInstruct - from transformers import LlamaForCausalLM, AutoTokenizer + from transformers import LlamaForCausalLM, AutoTokenizer, GenerationConfig def run_single_pass(pytorch_runner, _dataset): input_tensor = tokenizer.encode(_dataset.get_input_string(), return_tensors="pt") input_tensor = torch.cat([input_tensor for _ in range(batch_size)], 0) - output = pytorch_runner.run(inputs=input_tensor, max_length=400) + config = GenerationConfig() + config.max_length = 400 + output = pytorch_runner.run(inputs=input_tensor, generation_config=config) pytorch_runner.set_task_size(sum([len(output[i]) - len(input_tensor[i]) for i in range(batch_size)])) for i in range(batch_size): _dataset.submit_prediction(tokenizer.decode(output[i][len(input_tensor[i]):], skip_special_tokens=True)) @@ -41,7 +43,7 @@ def run_single_pass(pytorch_runner, _dataset): model.eval() if use_torch_fp16: model = model.half() - model.generate = apply_compile(model.generate) + model.forward = apply_compile(model.forward) tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side='left') tokenizer.add_special_tokens({'pad_token': tokenizer.eos_token}) diff --git a/setup_deb.sh b/setup_deb.sh index 8342a83f..863d569e 100644 --- a/setup_deb.sh +++ b/setup_deb.sh @@ -93,7 +93,7 @@ pip3 install --no-deps --upgrade \ cchardet==2.1.7 pip3 install --no-build-isolation --upgrade \ - git+https://github.com/AmpereComputingAI/transformers.git@ampere/v4.36 + git+https://github.com/AmpereComputingAI/transformers.git@ampere/v4.40 # dependencies of dependencies pip3 install --no-deps --upgrade \ @@ -116,7 +116,7 @@ pip3 install --no-deps --upgrade \ scikit-image==0.19.2 \ scikit-learn==1.0.2 \ threadpoolctl==3.1.0 \ - tokenizers==0.14.1 \ + tokenizers==0.19.1 \ tabulate==0.9.0 \ regex==2022.3.15 \ portalocker==2.6.0 \ diff --git a/speech_recognition/whisper/run_hf.py b/speech_recognition/whisper/run_hf.py index 9095ff2e..2950dc23 100644 --- a/speech_recognition/whisper/run_hf.py +++ b/speech_recognition/whisper/run_hf.py @@ -38,7 +38,8 @@ def run_pytorch_fp32(model_name, batch_size, num_runs, timeout, **kwargs): librispeech = LibriSpeech() # reset model = model.generate else: - model = apply_compile(model.generate) + model.forward = apply_compile(model.forward) + model.model.encoder = apply_compile(model.model.encoder) def single_pass_pytorch(_runner, _librispeech): waveform = [_librispeech.get_input_array() for _ in range(batch_size)] @@ -49,7 +50,7 @@ def single_pass_pytorch(_runner, _librispeech): for i in range(batch_size): _librispeech.submit_transcription(decoded_output[i].lstrip().replace(",", "").replace(".", "").upper()) - runner = PyTorchRunnerV2(model, throughput_only=True) + runner = PyTorchRunnerV2(model.generate, throughput_only=True) print_warning_message("Sampling rate Whisper operates at is 16,000 Hz, therefore throughput values below can be " "divided by 16,000 to derive 'seconds of processed audio per second'") return run_model(single_pass_pytorch, runner, librispeech, batch_size, num_runs, timeout)