Skip to content

Karol/v4.40 adaptation #246

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions natural_language_processing/text_generation/alpaca/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,13 @@ def run_pytorch(model_path, num_runs, timeout, dataset_path, use_torch_fp16=Fals
from utils.nlp.alpaca_instruct import AlpacaInstruct
from utils.pytorch import PyTorchRunnerV2, apply_compile
from utils.benchmark import run_model
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig

def run_single_pass(pytorch_runner, _dataset):
inputs = encode(_dataset.get_input_string())
outputs = pytorch_runner.run(inputs=inputs.input_ids, max_new_tokens=100)
config = GenerationConfig()
config.max_new_tokens = 100
outputs = pytorch_runner.run(inputs=inputs.input_ids, generation_config=config)
pytorch_runner.set_task_size(outputs.shape[1] - inputs.input_ids.shape[1])
response = decode(outputs[:, inputs.input_ids.shape[1]:])
_dataset.submit_prediction(response)
Expand All @@ -35,7 +37,7 @@ def run_single_pass(pytorch_runner, _dataset):
if use_torch_fp16:
model = model.half()
model.eval()
model.greedy_search = apply_compile(model.greedy_search)
model.forward = apply_compile(model.forward)

tokenizer = AutoTokenizer.from_pretrained(model_path)
dataset = AlpacaInstruct(1, dataset_path=dataset_path)
Expand Down
8 changes: 5 additions & 3 deletions natural_language_processing/text_generation/llama2/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,14 @@ def run_pytorch(model_name, batch_size, num_runs, timeout, dataset_path, use_tor
from utils.pytorch import PyTorchRunnerV2, apply_compile
from utils.benchmark import run_model
from utils.nlp.alpaca_instruct import AlpacaInstruct
from transformers import LlamaForCausalLM, AutoTokenizer
from transformers import LlamaForCausalLM, AutoTokenizer, GenerationConfig

def run_single_pass(pytorch_runner, _dataset):
input_tensor = tokenizer.encode(_dataset.get_input_string(), return_tensors="pt")
input_tensor = torch.cat([input_tensor for _ in range(batch_size)], 0)
output = pytorch_runner.run(inputs=input_tensor, max_length=400)
config = GenerationConfig()
config.max_length = 400
output = pytorch_runner.run(inputs=input_tensor, generation_config=config)
pytorch_runner.set_task_size(sum([len(output[i]) - len(input_tensor[i]) for i in range(batch_size)]))
for i in range(batch_size):
_dataset.submit_prediction(tokenizer.decode(output[i][len(input_tensor[i]):], skip_special_tokens=True))
Expand All @@ -41,7 +43,7 @@ def run_single_pass(pytorch_runner, _dataset):
model.eval()
if use_torch_fp16:
model = model.half()
model.generate = apply_compile(model.generate)
model.forward = apply_compile(model.forward)

tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side='left')
tokenizer.add_special_tokens({'pad_token': tokenizer.eos_token})
Expand Down
4 changes: 2 additions & 2 deletions setup_deb.sh
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ pip3 install --no-deps --upgrade \
cchardet==2.1.7

pip3 install --no-build-isolation --upgrade \
git+https://github.com/AmpereComputingAI/transformers.git@ampere/v4.36
git+https://github.com/AmpereComputingAI/transformers.git@ampere/v4.40

# dependencies of dependencies
pip3 install --no-deps --upgrade \
Expand All @@ -116,7 +116,7 @@ pip3 install --no-deps --upgrade \
scikit-image==0.19.2 \
scikit-learn==1.0.2 \
threadpoolctl==3.1.0 \
tokenizers==0.14.1 \
tokenizers==0.19.1 \
tabulate==0.9.0 \
regex==2022.3.15 \
portalocker==2.6.0 \
Expand Down
5 changes: 3 additions & 2 deletions speech_recognition/whisper/run_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ def run_pytorch_fp32(model_name, batch_size, num_runs, timeout, **kwargs):
librispeech = LibriSpeech() # reset
model = model.generate
else:
model = apply_compile(model.generate)
model.forward = apply_compile(model.forward)
model.model.encoder = apply_compile(model.model.encoder)

def single_pass_pytorch(_runner, _librispeech):
waveform = [_librispeech.get_input_array() for _ in range(batch_size)]
Expand All @@ -49,7 +50,7 @@ def single_pass_pytorch(_runner, _librispeech):
for i in range(batch_size):
_librispeech.submit_transcription(decoded_output[i].lstrip().replace(",", "").replace(".", "").upper())

runner = PyTorchRunnerV2(model, throughput_only=True)
runner = PyTorchRunnerV2(model.generate, throughput_only=True)
print_warning_message("Sampling rate Whisper operates at is 16,000 Hz, therefore throughput values below can be "
"divided by 16,000 to derive 'seconds of processed audio per second'")
return run_model(single_pass_pytorch, runner, librispeech, batch_size, num_runs, timeout)
Expand Down