From e5e2433eb2c046bf5f4872a0ad1f49a87fe78563 Mon Sep 17 00:00:00 2001 From: Daniel Kupnicki Date: Fri, 5 Jul 2024 18:05:49 +0000 Subject: [PATCH 1/4] add mixtral --- .../text_generation/mixtral/run.py | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 natural_language_processing/text_generation/mixtral/run.py diff --git a/natural_language_processing/text_generation/mixtral/run.py b/natural_language_processing/text_generation/mixtral/run.py new file mode 100644 index 00000000..3ea8eadb --- /dev/null +++ b/natural_language_processing/text_generation/mixtral/run.py @@ -0,0 +1,69 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2024, Ampere Computing LLC +try: + from utils import misc # noqa +except ModuleNotFoundError: + import os + import sys + filename = "set_env_variables.sh" + directory = os.path.realpath(__file__).split("/")[:-1] + for idx in range(1, len(directory) - 1): + subdir = "/".join(directory[:-idx]) + if filename in os.listdir(subdir): + print(f"\nPlease run \033[91m'source {os.path.join(subdir, filename)}'\033[0m first.") + break + else: + print(f"\n\033[91mFAIL: Couldn't find {filename}, are you running this script as part of Ampere Model Library?" + f"\033[0m") + sys.exit(1) + + +def run_pytorch(num_runs, timeout, dataset_path, disable_jit_freeze=False, **kwargs): + from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig + from utils.nlp.alpaca_instruct import AlpacaInstruct + from utils.pytorch import PyTorchRunnerV2, apply_compile + from utils.benchmark import run_model + + def run_single_pass(pytorch_runner, dataset): + input_array = [{"role": "user", "content": dataset.get_input_string()}] + inputs = encode(input_array) + + outputs = pytorch_runner.run(inputs=inputs, generation_config=config) + pytorch_runner.set_task_size(outputs.shape[1] - inputs.shape[1]) + response = decode(outputs[:, inputs.shape[1]:])[0] + print(response) + dataset.submit_prediction(response) + + model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1") + model.eval() + model.generate = apply_compile(model.generate) + + tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1") + dataset = AlpacaInstruct(1, dataset_path=dataset_path) + encode = lambda i: tokenizer.apply_chat_template(i, return_tensors="pt") + decode = lambda t: tokenizer.batch_decode(t, skip_special_tokens=True) + config = GenerationConfig.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1") + config.max_new_tokens=100 + config.do_sample = True + config.pad_token_id = config.eos_token_id + + runner = PyTorchRunnerV2(model.generate) + + return run_model(run_single_pass, runner, dataset, 1, num_runs, timeout) + + +def run_pytorch_fp32(num_runs, timeout, dataset_path, disable_jit_freeze=False, **kwargs): + return run_pytorch(num_runs, timeout, dataset_path, disable_jit_freeze, **kwargs) + + +def main(): + from utils.helpers import DefaultArgParser + parser = DefaultArgParser(["pytorch"]) + parser.add_argument("--dataset_path", + type=str, + help="path to JSON file with instructions") + run_pytorch_fp32(**vars(parser.parse())) + + +if __name__ == "__main__": + main() From 7babe0766ddb562c7c72fd8c864db8de2506b40a Mon Sep 17 00:00:00 2001 From: Daniel Kupnicki Date: Mon, 8 Jul 2024 14:14:31 +0000 Subject: [PATCH 2/4] change compiled function from generate to forward --- natural_language_processing/text_generation/mixtral/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/natural_language_processing/text_generation/mixtral/run.py b/natural_language_processing/text_generation/mixtral/run.py index 3ea8eadb..090fe1e2 100644 --- a/natural_language_processing/text_generation/mixtral/run.py +++ b/natural_language_processing/text_generation/mixtral/run.py @@ -36,7 +36,7 @@ def run_single_pass(pytorch_runner, dataset): model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1") model.eval() - model.generate = apply_compile(model.generate) + model.forward = apply_compile(model.forward) tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1") dataset = AlpacaInstruct(1, dataset_path=dataset_path) From 94f89d11777e7658d44021d617ea016d720c5aee Mon Sep 17 00:00:00 2001 From: Daniel Kupnicki Date: Mon, 8 Jul 2024 14:29:55 +0000 Subject: [PATCH 3/4] don't print response --- natural_language_processing/text_generation/mixtral/run.py | 1 - 1 file changed, 1 deletion(-) diff --git a/natural_language_processing/text_generation/mixtral/run.py b/natural_language_processing/text_generation/mixtral/run.py index 090fe1e2..0ac496cf 100644 --- a/natural_language_processing/text_generation/mixtral/run.py +++ b/natural_language_processing/text_generation/mixtral/run.py @@ -31,7 +31,6 @@ def run_single_pass(pytorch_runner, dataset): outputs = pytorch_runner.run(inputs=inputs, generation_config=config) pytorch_runner.set_task_size(outputs.shape[1] - inputs.shape[1]) response = decode(outputs[:, inputs.shape[1]:])[0] - print(response) dataset.submit_prediction(response) model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1") From 32067946b66291d166c28764f8671b62c98fcc3c Mon Sep 17 00:00:00 2001 From: Daniel Kupnicki Date: Mon, 8 Jul 2024 14:57:57 +0000 Subject: [PATCH 4/4] lint --- natural_language_processing/text_generation/mixtral/run.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/natural_language_processing/text_generation/mixtral/run.py b/natural_language_processing/text_generation/mixtral/run.py index 0ac496cf..5f7e2eb1 100644 --- a/natural_language_processing/text_generation/mixtral/run.py +++ b/natural_language_processing/text_generation/mixtral/run.py @@ -27,7 +27,7 @@ def run_pytorch(num_runs, timeout, dataset_path, disable_jit_freeze=False, **kwa def run_single_pass(pytorch_runner, dataset): input_array = [{"role": "user", "content": dataset.get_input_string()}] inputs = encode(input_array) - + outputs = pytorch_runner.run(inputs=inputs, generation_config=config) pytorch_runner.set_task_size(outputs.shape[1] - inputs.shape[1]) response = decode(outputs[:, inputs.shape[1]:])[0] @@ -42,7 +42,7 @@ def run_single_pass(pytorch_runner, dataset): encode = lambda i: tokenizer.apply_chat_template(i, return_tensors="pt") decode = lambda t: tokenizer.batch_decode(t, skip_special_tokens=True) config = GenerationConfig.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1") - config.max_new_tokens=100 + config.max_new_tokens = 100 config.do_sample = True config.pad_token_id = config.eos_token_id