Skip to content

Commit f6010ce

Browse files
authored
[Bugfix] Fix multigpu dispatch_for_generation (#1567)
## Purpose ## * Fix `test_oneshot_and_finetune_with_tokenizer.py` with multiple cuda devices * This test had two failures, the first failure occurred as a result of sequential onloading introducing `dispatch_for_generation`. This function is also used to dispatch for training. However, this method did not account for no split modules * The second failure is an existing failure where HFTrainer.compute_loss does not account for multi-gpu models. This will be fixed in the next release by huggingface/transformers#38029 ## Changes ## * Pass no split modules when computing a device map for generation (and training) * Load model on CPU (since this is now the default flow as of sequential onloading landing ## Testing ## * Ran `test_oneshot_and_finetune_with_tokenizer` with two GPUs to completion (with upstream transformers) --------- Signed-off-by: Kyle Sayers <kylesayrs@gmail.com>
1 parent 16ada3a commit f6010ce

File tree

3 files changed

+11
-4
lines changed

3 files changed

+11
-4
lines changed

src/llmcompressor/transformers/finetune/session_mixin.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ def compute_loss(
270270
model: Module,
271271
inputs: Dict[str, Any],
272272
return_outputs: bool = False,
273-
num_items_in_batch: Optional[int] = None,
273+
num_items_in_batch: Optional[torch.Tensor] = None,
274274
) -> Union[torch.Tensor, Tuple[torch.Tensor, Any]]:
275275
"""
276276
Override for the compute_loss to factor trigger callbacks and filter columns
@@ -279,6 +279,7 @@ def compute_loss(
279279
:param inputs: the inputs to pass through the model for calculating the loss
280280
:param return_outputs: True to return the outputs with the loss,
281281
False otherwise
282+
:param num_items_in_batch: the number of items which contribute to loss
282283
:return: the resulting loss if not return_outputs, otherwise a tuple
283284
containing the loss and the model's outputs
284285
"""

src/llmcompressor/utils/dev.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,11 +126,17 @@ def dispatch_for_generation(model: PreTrainedModel) -> PreTrainedModel:
126126
"""
127127
remove_dispatch(model)
128128

129+
no_split_module_classes = model._get_no_split_modules("auto")
129130
max_memory = get_balanced_memory(
130131
model,
131132
dtype=model.dtype,
132-
no_split_module_classes=model._get_no_split_modules("auto"),
133+
no_split_module_classes=no_split_module_classes,
134+
)
135+
device_map = infer_auto_device_map(
136+
model,
137+
dtype=model.dtype,
138+
max_memory=max_memory,
139+
no_split_module_classes=no_split_module_classes,
133140
)
134-
device_map = infer_auto_device_map(model, dtype=model.dtype, max_memory=max_memory)
135141

136142
return dispatch_model(model, device_map=device_map)

tests/llmcompressor/transformers/finetune/test_oneshot_and_finetune_with_tokenizer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def test_oneshot_and_finetune_with_tokenizer(self):
3636
self.model,
3737
)
3838
model_loaded = AutoModelForCausalLM.from_pretrained(
39-
self.model, device_map="cuda:0", torch_dtype="auto"
39+
self.model, torch_dtype="auto"
4040
)
4141

4242
dataset_loaded = load_dataset(

0 commit comments

Comments
 (0)