Skip to content

Commit 6932f27

Browse files
author
Lincoln Stein
committed
fixup code broken by merge with main
1 parent 0df018b commit 6932f27

File tree

6 files changed

+25
-14
lines changed

6 files changed

+25
-14
lines changed

invokeai/app/services/model_load/model_load_base.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,4 +60,3 @@ def load_model_from_path(
6060
Returns:
6161
A LoadedModel object.
6262
"""
63-

invokeai/app/services/model_manager/model_manager_default.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ def build_model_manager(
7676

7777
ram_cache = ModelCache(
7878
max_cache_size=app_config.ram,
79+
max_vram_cache_size=app_config.vram,
7980
logger=logger,
8081
)
8182
convert_cache = ModelConvertCache(cache_path=app_config.convert_cache_path, max_size=app_config.convert_cache)

invokeai/backend/model_manager/load/model_cache/model_cache_default.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,10 @@
1919
"""
2020

2121
import gc
22+
import math
2223
import sys
2324
import threading
25+
import time
2426
from contextlib import contextmanager, suppress
2527
from logging import Logger
2628
from threading import BoundedSemaphore
@@ -40,6 +42,7 @@
4042
# Maximum size of the cache, in gigs
4143
# Default is roughly enough to hold three fp16 diffusers models in RAM simultaneously
4244
DEFAULT_MAX_CACHE_SIZE = 6.0
45+
DEFAULT_MAX_VRAM_CACHE_SIZE = 0.25
4346

4447
# actual size of a gig
4548
GIG = 1073741824
@@ -54,6 +57,7 @@ class ModelCache(ModelCacheBase[AnyModel]):
5457
def __init__(
5558
self,
5659
max_cache_size: float = DEFAULT_MAX_CACHE_SIZE,
60+
max_vram_cache_size: float = DEFAULT_MAX_VRAM_CACHE_SIZE,
5761
storage_device: torch.device = torch.device("cpu"),
5862
execution_devices: Optional[Set[torch.device]] = None,
5963
precision: torch.dtype = torch.float16,
@@ -76,6 +80,7 @@ def __init__(
7680
"""
7781
self._precision: torch.dtype = precision
7882
self._max_cache_size: float = max_cache_size
83+
self._max_vram_cache_size: float = max_vram_cache_size
7984
self._storage_device: torch.device = storage_device
8085
self._ram_lock = threading.Lock()
8186
self._logger = logger or InvokeAILogger.get_logger(self.__class__.__name__)
@@ -281,14 +286,17 @@ def _make_cache_key(self, model_key: str, submodel_type: Optional[SubModelType]
281286

282287
def offload_unlocked_models(self, size_required: int) -> None:
283288
"""Move any unused models from VRAM."""
289+
device = self.get_execution_device()
284290
reserved = self._max_vram_cache_size * GIG
285-
vram_in_use = torch.cuda.memory_allocated() + size_required
291+
vram_in_use = torch.cuda.memory_allocated(device) + size_required
286292
self.logger.debug(f"{(vram_in_use/GIG):.2f}GB VRAM needed for models; max allowed={(reserved/GIG):.2f}GB")
287293
for _, cache_entry in sorted(self._cached_models.items(), key=lambda x: x[1].size):
288294
if vram_in_use <= reserved:
289295
break
290296
if not cache_entry.loaded:
291297
continue
298+
if cache_entry.device is not device:
299+
continue
292300
if not cache_entry.locked:
293301
self.move_model_to_device(cache_entry, self.storage_device)
294302
cache_entry.loaded = False

invokeai/backend/model_manager/load/model_cache/model_locker.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,11 @@ def lock(self) -> AnyModel:
3939
"""Move the model into the execution device (GPU) and lock it."""
4040
self._cache_entry.lock()
4141
try:
42-
if self._cache.lazy_offloading:
43-
self._cache.offload_unlocked_models(self._cache_entry.size)
44-
self._cache.move_model_to_device(self._cache_entry, self._cache.get_execution_device())
42+
device = self._cache.get_execution_device()
43+
self._cache.offload_unlocked_models(self._cache_entry.size)
44+
self._cache.move_model_to_device(self._cache_entry, device)
4545
self._cache_entry.loaded = True
46-
self._cache.logger.debug(f"Locking {self._cache_entry.key} in {self._cache.execution_device}")
46+
self._cache.logger.debug(f"Locking {self._cache_entry.key} in {device}")
4747
self._cache.print_cuda_stats()
4848
except torch.cuda.OutOfMemoryError:
4949
self._cache.logger.warning("Insufficient GPU memory to load model. Aborting")

tests/backend/model_manager/model_loading/test_model_load.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,14 @@ def test_loading(mm2_model_manager: ModelManagerServiceBase, embedding_file: Pat
1414
matches = store.search_by_attr(model_name="test_embedding")
1515
assert len(matches) == 0
1616
key = mm2_model_manager.install.register_path(embedding_file)
17-
loaded_model = mm2_model_manager.load.load_model(store.get_model(key))
18-
assert loaded_model is not None
19-
assert loaded_model.config.key == key
20-
with loaded_model as model:
21-
assert isinstance(model, TextualInversionModelRaw)
17+
with mm2_model_manager.load.ram_cache.reserve_execution_device():
18+
loaded_model = mm2_model_manager.load.load_model(store.get_model(key))
19+
assert loaded_model is not None
20+
assert loaded_model.config.key == key
21+
with loaded_model as model:
22+
assert isinstance(model, TextualInversionModelRaw)
2223

23-
config = mm2_model_manager.store.get_model(key)
24-
loaded_model_2 = mm2_model_manager.load.load_model(config)
24+
config = mm2_model_manager.store.get_model(key)
25+
loaded_model_2 = mm2_model_manager.load.load_model(config)
2526

26-
assert loaded_model.config.key == loaded_model_2.config.key
27+
assert loaded_model.config.key == loaded_model_2.config.key

tests/backend/util/test_devices.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from invokeai.app.services.config import get_config
1111
from invokeai.backend.model_manager.load import ModelCache
1212
from invokeai.backend.util.devices import TorchDevice, choose_precision, choose_torch_device, torch_dtype
13+
from tests.backend.model_manager.model_manager_fixtures import * # noqa F403
1314

1415
devices = ["cpu", "cuda:0", "cuda:1", "mps"]
1516
device_types_cpu = [("cpu", torch.float32), ("cuda:0", torch.float32), ("mps", torch.float32)]
@@ -21,6 +22,7 @@
2122
def test_device_choice(device_name):
2223
config = get_config()
2324
config.device = device_name
25+
TorchDevice.set_model_cache(None) # disable dynamic selection of GPU device
2426
torch_device = TorchDevice.choose_torch_device()
2527
assert torch_device == torch.device(device_name)
2628

0 commit comments

Comments
 (0)