@@ -132,6 +132,14 @@ class LLM:
132
132
hf_overrides: If a dictionary, contains arguments to be forwarded to the
133
133
HuggingFace config. If a callable, it is called to update the
134
134
HuggingFace config.
135
+ mm_processor_kwargs: Arguments to be forwarded to the model's processor
136
+ for multi-modal data, e.g., image processor. Overrides for the
137
+ multi-modal processor obtained from `AutoProcessor.from_pretrained`.
138
+ The available overrides depend on the model that is being run.
139
+ For example, for Phi-3-Vision: `{"num_crops": 4}`.
140
+ override_pooler_config: Initialize non-default pooling config or
141
+ override default pooling config for the pooling model.
142
+ e.g. `PoolerConfig(pooling_type="mean", normalize=False)`.
135
143
compilation_config: Either an integer or a dictionary. If it is an
136
144
integer, it is used as the level of compilation optimization. If it
137
145
is a dictionary, it can specify the full compilation configuration.
@@ -1347,16 +1355,16 @@ def sleep(self, level: int = 1):
1347
1355
during the sleep period, before `wake_up` is called.
1348
1356
1349
1357
Args:
1350
- level: The sleep level. Level 1 sleep will offload the model
1351
- weights and discard the kv cache. The content of kv cache
1358
+ level: The sleep level. Level 1 sleep will offload the model
1359
+ weights and discard the kv cache. The content of kv cache
1352
1360
is forgotten. Level 1 sleep is good for sleeping and waking
1353
- up the engine to run the same model again. The model weights
1354
- are backed up in CPU memory. Please make sure there's enough
1355
- CPU memory to store the model weights. Level 2 sleep will
1356
- discard both the model weights and the kv cache. The content
1357
- of both the model weights and kv cache is forgotten. Level 2
1361
+ up the engine to run the same model again. The model weights
1362
+ are backed up in CPU memory. Please make sure there's enough
1363
+ CPU memory to store the model weights. Level 2 sleep will
1364
+ discard both the model weights and the kv cache. The content
1365
+ of both the model weights and kv cache is forgotten. Level 2
1358
1366
sleep is good for sleeping and waking up the engine to run a
1359
- different model or update the model, where previous model
1367
+ different model or update the model, where previous model
1360
1368
weights are not needed. It reduces CPU memory pressure.
1361
1369
"""
1362
1370
self .reset_prefix_cache ()
@@ -1366,12 +1374,12 @@ def wake_up(self, tags: Optional[list[str]] = None):
1366
1374
"""
1367
1375
Wake up the engine from sleep mode. See the [sleep][] method
1368
1376
for more details.
1369
-
1377
+
1370
1378
Args:
1371
- tags: An optional list of tags to reallocate the engine memory
1372
- for specific memory allocations. Values must be in
1379
+ tags: An optional list of tags to reallocate the engine memory
1380
+ for specific memory allocations. Values must be in
1373
1381
`("weights", "kv_cache")`. If None, all memory is reallocated.
1374
- wake_up should be called with all tags (or None) before the
1382
+ wake_up should be called with all tags (or None) before the
1375
1383
engine is used again.
1376
1384
"""
1377
1385
self .llm_engine .wake_up (tags )
0 commit comments