Skip to content

Commit d23ad18

Browse files
committed
Make quantized loading fast.
1 parent 4181ab6 commit d23ad18

File tree

1 file changed

+8
-9
lines changed

1 file changed

+8
-9
lines changed

invokeai/backend/requantize.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
11
from typing import Any, Dict
22

33
import torch
4-
from optimum.quanto.nn import QModuleMixin
5-
from optimum.quanto.quantize import _quantize_submodule, freeze
4+
from optimum.quanto.quantize import _quantize_submodule
65

7-
8-
def custom_freeze(model: torch.nn.Module):
9-
for name, m in model.named_modules():
10-
if isinstance(m, QModuleMixin):
11-
m.freeze()
6+
# def custom_freeze(model: torch.nn.Module):
7+
# for name, m in model.named_modules():
8+
# if isinstance(m, QModuleMixin):
9+
# m.weight =
10+
# m.freeze()
1211

1312

1413
def requantize(
@@ -47,8 +46,8 @@ def move_tensor(t, device):
4746
for name, param in m.named_buffers(recurse=False):
4847
setattr(m, name, move_tensor(param, "cpu"))
4948
# Freeze model and move to target device
50-
freeze(model)
51-
model.to(device)
49+
# freeze(model)
50+
# model.to(device)
5251

5352
# Load the quantized model weights
5453
model.load_state_dict(state_dict, strict=False)

0 commit comments

Comments
 (0)