Skip to content

Commit 56f7400

Browse files
authored
[tests] enable bnb tests on xpu (#11001)
* enable bnb on xpu * add 2 more cases * add missing change * add missing change * add one more
1 parent a34d97c commit 56f7400

File tree

6 files changed

+64
-47
lines changed

6 files changed

+64
-47
lines changed

src/diffusers/pipelines/pipeline_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,7 @@ def module_is_offloaded(module):
427427
"It seems like you have activated a device mapping strategy on the pipeline which doesn't allow explicit device placement using `to()`. You can call `reset_device_map()` to remove the existing device map from the pipeline."
428428
)
429429

430-
if device_type == "cuda":
430+
if device_type in ["cuda", "xpu"]:
431431
if pipeline_is_sequentially_offloaded and not pipeline_has_bnb:
432432
raise ValueError(
433433
"It seems like you have activated sequential model offloading by calling `enable_sequential_cpu_offload`, but are now attempting to move the pipeline to GPU. This is not compatible with offloading. Please, move your pipeline `.to('cpu')` or consider removing the move altogether if you use sequential offloading."
@@ -440,7 +440,7 @@ def module_is_offloaded(module):
440440

441441
# Display a warning in this case (the operation succeeds but the benefits are lost)
442442
pipeline_is_offloaded = any(module_is_offloaded(module) for _, module in self.components.items())
443-
if pipeline_is_offloaded and device_type == "cuda":
443+
if pipeline_is_offloaded and device_type in ["cuda", "xpu"]:
444444
logger.warning(
445445
f"It seems like you have activated model offloading by calling `enable_model_cpu_offload`, but are now manually moving the pipeline to GPU. It is strongly recommended against doing so as memory gains from offloading are likely to be lost. Offloading automatically takes care of moving the individual components {', '.join(self.components.keys())} to GPU when needed. To make sure offloading works as expected, you should consider moving the pipeline back to CPU: `pipeline.to('cpu')` or removing the move altogether if you use offloading."
446446
)

src/diffusers/quantizers/bitsandbytes/bnb_quantizer.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def __init__(self, quantization_config, **kwargs):
6161
self.modules_to_not_convert = self.quantization_config.llm_int8_skip_modules
6262

6363
def validate_environment(self, *args, **kwargs):
64-
if not torch.cuda.is_available():
64+
if not (torch.cuda.is_available() or torch.xpu.is_available()):
6565
raise RuntimeError("No GPU found. A GPU is needed for quantization.")
6666
if not is_accelerate_available() or is_accelerate_version("<", "0.26.0"):
6767
raise ImportError(
@@ -238,11 +238,15 @@ def update_torch_dtype(self, torch_dtype: "torch.dtype") -> "torch.dtype":
238238

239239
def update_device_map(self, device_map):
240240
if device_map is None:
241-
device_map = {"": f"cuda:{torch.cuda.current_device()}"}
241+
if torch.xpu.is_available():
242+
current_device = f"xpu:{torch.xpu.current_device()}"
243+
else:
244+
current_device = f"cuda:{torch.cuda.current_device()}"
245+
device_map = {"": current_device}
242246
logger.info(
243247
"The device_map was not initialized. "
244248
"Setting device_map to {"
245-
": f`cuda:{torch.cuda.current_device()}`}. "
249+
": {current_device}}. "
246250
"If you want to use the model for inference, please set device_map ='auto' "
247251
)
248252
return device_map
@@ -312,7 +316,10 @@ def _dequantize(self, model):
312316
logger.info(
313317
"Model was found to be on CPU (could happen as a result of `enable_model_cpu_offload()`). So, moving it to GPU. After dequantization, will move the model back to CPU again to preserve the previous device."
314318
)
315-
model.to(torch.cuda.current_device())
319+
if torch.xpu.is_available():
320+
model.to(torch.xpu.current_device())
321+
else:
322+
model.to(torch.cuda.current_device())
316323

317324
model = dequantize_and_replace(
318325
model, self.modules_to_not_convert, quantization_config=self.quantization_config
@@ -343,7 +350,7 @@ def __init__(self, quantization_config, **kwargs):
343350
self.modules_to_not_convert = self.quantization_config.llm_int8_skip_modules
344351

345352
def validate_environment(self, *args, **kwargs):
346-
if not torch.cuda.is_available():
353+
if not (torch.cuda.is_available() or torch.xpu.is_available()):
347354
raise RuntimeError("No GPU found. A GPU is needed for quantization.")
348355
if not is_accelerate_available() or is_accelerate_version("<", "0.26.0"):
349356
raise ImportError(
@@ -402,11 +409,15 @@ def update_torch_dtype(self, torch_dtype: "torch.dtype") -> "torch.dtype":
402409
# Copied from diffusers.quantizers.bitsandbytes.bnb_quantizer.BnB4BitDiffusersQuantizer.update_device_map
403410
def update_device_map(self, device_map):
404411
if device_map is None:
405-
device_map = {"": f"cuda:{torch.cuda.current_device()}"}
412+
if torch.xpu.is_available():
413+
current_device = f"xpu:{torch.xpu.current_device()}"
414+
else:
415+
current_device = f"cuda:{torch.cuda.current_device()}"
416+
device_map = {"": current_device}
406417
logger.info(
407418
"The device_map was not initialized. "
408419
"Setting device_map to {"
409-
": f`cuda:{torch.cuda.current_device()}`}. "
420+
": {current_device}}. "
410421
"If you want to use the model for inference, please set device_map ='auto' "
411422
)
412423
return device_map

src/diffusers/utils/testing_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -574,10 +574,10 @@ def load_numpy(arry: Union[str, np.ndarray], local_path: Optional[str] = None) -
574574
return arry
575575

576576

577-
def load_pt(url: str):
577+
def load_pt(url: str, map_location: str):
578578
response = requests.get(url)
579579
response.raise_for_status()
580-
arry = torch.load(BytesIO(response.content))
580+
arry = torch.load(BytesIO(response.content), map_location=map_location)
581581
return arry
582582

583583

tests/pipelines/ip_adapters/test_ip_adapter_stable_diffusion.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -377,9 +377,10 @@ def test_text_to_image_face_id(self):
377377
pipeline.set_ip_adapter_scale(0.7)
378378

379379
inputs = self.get_dummy_inputs()
380-
id_embeds = load_pt("https://huggingface.co/datasets/fabiorigano/testing-images/resolve/main/ai_face2.ipadpt")[
381-
0
382-
]
380+
id_embeds = load_pt(
381+
"https://huggingface.co/datasets/fabiorigano/testing-images/resolve/main/ai_face2.ipadpt",
382+
map_location=torch_device,
383+
)[0]
383384
id_embeds = id_embeds.reshape((2, 1, 1, 512))
384385
inputs["ip_adapter_image_embeds"] = [id_embeds]
385386
inputs["ip_adapter_image"] = None

tests/quantization/bnb/test_4bit.py

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from diffusers.utils import is_accelerate_version, logging
2727
from diffusers.utils.testing_utils import (
2828
CaptureLogger,
29+
backend_empty_cache,
2930
is_bitsandbytes_available,
3031
is_torch_available,
3132
is_transformers_available,
@@ -35,7 +36,7 @@
3536
require_bitsandbytes_version_greater,
3637
require_peft_backend,
3738
require_torch,
38-
require_torch_gpu,
39+
require_torch_accelerator,
3940
require_transformers_version_greater,
4041
slow,
4142
torch_device,
@@ -66,7 +67,7 @@ def get_some_linear_layer(model):
6667
@require_bitsandbytes_version_greater("0.43.2")
6768
@require_accelerate
6869
@require_torch
69-
@require_torch_gpu
70+
@require_torch_accelerator
7071
@slow
7172
class Base4bitTests(unittest.TestCase):
7273
# We need to test on relatively large models (aka >1b parameters otherwise the quantiztion may not work as expected)
@@ -84,13 +85,16 @@ class Base4bitTests(unittest.TestCase):
8485

8586
def get_dummy_inputs(self):
8687
prompt_embeds = load_pt(
87-
"https://huggingface.co/datasets/hf-internal-testing/bnb-diffusers-testing-artifacts/resolve/main/prompt_embeds.pt"
88+
"https://huggingface.co/datasets/hf-internal-testing/bnb-diffusers-testing-artifacts/resolve/main/prompt_embeds.pt",
89+
torch_device,
8890
)
8991
pooled_prompt_embeds = load_pt(
90-
"https://huggingface.co/datasets/hf-internal-testing/bnb-diffusers-testing-artifacts/resolve/main/pooled_prompt_embeds.pt"
92+
"https://huggingface.co/datasets/hf-internal-testing/bnb-diffusers-testing-artifacts/resolve/main/pooled_prompt_embeds.pt",
93+
torch_device,
9194
)
9295
latent_model_input = load_pt(
93-
"https://huggingface.co/datasets/hf-internal-testing/bnb-diffusers-testing-artifacts/resolve/main/latent_model_input.pt"
96+
"https://huggingface.co/datasets/hf-internal-testing/bnb-diffusers-testing-artifacts/resolve/main/latent_model_input.pt",
97+
torch_device,
9498
)
9599

96100
input_dict_for_transformer = {
@@ -106,7 +110,7 @@ def get_dummy_inputs(self):
106110
class BnB4BitBasicTests(Base4bitTests):
107111
def setUp(self):
108112
gc.collect()
109-
torch.cuda.empty_cache()
113+
backend_empty_cache(torch_device)
110114

111115
# Models
112116
self.model_fp16 = SD3Transformer2DModel.from_pretrained(
@@ -128,7 +132,7 @@ def tearDown(self):
128132
del self.model_4bit
129133

130134
gc.collect()
131-
torch.cuda.empty_cache()
135+
backend_empty_cache(torch_device)
132136

133137
def test_quantization_num_parameters(self):
134138
r"""
@@ -224,7 +228,7 @@ def test_keep_modules_in_fp32(self):
224228
self.assertTrue(module.weight.dtype == torch.uint8)
225229

226230
# test if inference works.
227-
with torch.no_grad() and torch.amp.autocast("cuda", dtype=torch.float16):
231+
with torch.no_grad() and torch.amp.autocast(torch_device, dtype=torch.float16):
228232
input_dict_for_transformer = self.get_dummy_inputs()
229233
model_inputs = {
230234
k: v.to(device=torch_device) for k, v in input_dict_for_transformer.items() if not isinstance(v, bool)
@@ -266,9 +270,9 @@ def test_device_assignment(self):
266270
self.assertAlmostEqual(self.model_4bit.get_memory_footprint(), mem_before)
267271

268272
# Move back to CUDA device
269-
for device in [0, "cuda", "cuda:0", "call()"]:
273+
for device in [0, f"{torch_device}", f"{torch_device}:0", "call()"]:
270274
if device == "call()":
271-
self.model_4bit.cuda(0)
275+
self.model_4bit.to(f"{torch_device}:0")
272276
else:
273277
self.model_4bit.to(device)
274278
self.assertEqual(self.model_4bit.device, torch.device(0))
@@ -286,7 +290,7 @@ def test_device_and_dtype_assignment(self):
286290

287291
with self.assertRaises(ValueError):
288292
# Tries with a `device` and `dtype`
289-
self.model_4bit.to(device="cuda:0", dtype=torch.float16)
293+
self.model_4bit.to(device=f"{torch_device}:0", dtype=torch.float16)
290294

291295
with self.assertRaises(ValueError):
292296
# Tries with a cast
@@ -297,7 +301,7 @@ def test_device_and_dtype_assignment(self):
297301
self.model_4bit.half()
298302

299303
# This should work
300-
self.model_4bit.to("cuda")
304+
self.model_4bit.to(torch_device)
301305

302306
# Test if we did not break anything
303307
self.model_fp16 = self.model_fp16.to(dtype=torch.float32, device=torch_device)
@@ -321,7 +325,7 @@ def test_device_and_dtype_assignment(self):
321325
_ = self.model_fp16.float()
322326

323327
# Check that this does not throw an error
324-
_ = self.model_fp16.cuda()
328+
_ = self.model_fp16.to(torch_device)
325329

326330
def test_bnb_4bit_wrong_config(self):
327331
r"""
@@ -398,7 +402,7 @@ def test_training(self):
398402
model_inputs.update({k: v for k, v in input_dict_for_transformer.items() if k not in model_inputs})
399403

400404
# Step 4: Check if the gradient is not None
401-
with torch.amp.autocast("cuda", dtype=torch.float16):
405+
with torch.amp.autocast(torch_device, dtype=torch.float16):
402406
out = self.model_4bit(**model_inputs)[0]
403407
out.norm().backward()
404408

@@ -412,7 +416,7 @@ def test_training(self):
412416
class SlowBnb4BitTests(Base4bitTests):
413417
def setUp(self) -> None:
414418
gc.collect()
415-
torch.cuda.empty_cache()
419+
backend_empty_cache(torch_device)
416420

417421
nf4_config = BitsAndBytesConfig(
418422
load_in_4bit=True,
@@ -431,7 +435,7 @@ def tearDown(self):
431435
del self.pipeline_4bit
432436

433437
gc.collect()
434-
torch.cuda.empty_cache()
438+
backend_empty_cache(torch_device)
435439

436440
def test_quality(self):
437441
output = self.pipeline_4bit(
@@ -501,7 +505,7 @@ def test_moving_to_cpu_throws_warning(self):
501505
reason="Test will pass after https://github.com/huggingface/accelerate/pull/3223 is in a release.",
502506
strict=True,
503507
)
504-
def test_pipeline_cuda_placement_works_with_nf4(self):
508+
def test_pipeline_device_placement_works_with_nf4(self):
505509
transformer_nf4_config = BitsAndBytesConfig(
506510
load_in_4bit=True,
507511
bnb_4bit_quant_type="nf4",
@@ -532,7 +536,7 @@ def test_pipeline_cuda_placement_works_with_nf4(self):
532536
transformer=transformer_4bit,
533537
text_encoder_3=text_encoder_3_4bit,
534538
torch_dtype=torch.float16,
535-
).to("cuda")
539+
).to(torch_device)
536540

537541
# Check if inference works.
538542
_ = pipeline_4bit("table", max_sequence_length=20, num_inference_steps=2)
@@ -696,7 +700,7 @@ def test_lora_loading(self):
696700
class BaseBnb4BitSerializationTests(Base4bitTests):
697701
def tearDown(self):
698702
gc.collect()
699-
torch.cuda.empty_cache()
703+
backend_empty_cache(torch_device)
700704

701705
def test_serialization(self, quant_type="nf4", double_quant=True, safe_serialization=True):
702706
r"""

0 commit comments

Comments
 (0)