Skip to content

Commit e9109bf

Browse files
committed
Fixed CLI Default Values
1 parent fbc7110 commit e9109bf

File tree

5 files changed

+28
-13
lines changed

5 files changed

+28
-13
lines changed

CHANGELOG.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,21 @@
22

33
All notable changes to LocalLab will be documented in this file.
44

5+
## [0.6.2] - 2024-05-03
6+
7+
### Fixed
8+
9+
- Fixed CLI configuration issue where optimization settings shown as enabled by default weren't being properly saved
10+
- Updated default values for all optimization settings (quantization, flash attention, CPU offloading, better transformer) to be enabled by default
11+
- Ensured consistency between displayed optimization settings and saved configuration
12+
- Fixed resource check functions to use correct default values for optimization settings
13+
14+
## [0.6.1] - 2024-05-02
15+
16+
### Fixed
17+
18+
- Fixed CLI config environment variable issue
19+
520
## [0.6.0] - 2024-05-02
621

722
### Added

locallab/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
LocalLab - A lightweight AI inference server for running LLMs locally
33
"""
44

5-
__version__ = "0.6.1" # Updated to fix CLI config environment variable issue
5+
__version__ = "0.6.2" # Updated to fix CLI optimization settings defaults
66

77
# Only import what's necessary initially, lazy-load the rest
88
from .logger import get_logger

locallab/config.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -106,16 +106,16 @@ def save_config(config: Dict[str, Any]):
106106

107107
# Optimization settings
108108
ENABLE_QUANTIZATION = get_env_var(
109-
"ENABLE_QUANTIZATION", default="false", var_type=bool)
109+
"ENABLE_QUANTIZATION", default="true", var_type=bool)
110110
QUANTIZATION_TYPE = get_env_var("QUANTIZATION_TYPE", default="int8")
111111
ENABLE_FLASH_ATTENTION = get_env_var(
112-
"ENABLE_FLASH_ATTENTION", default="false", var_type=bool)
112+
"ENABLE_FLASH_ATTENTION", default="true", var_type=bool)
113113
ENABLE_ATTENTION_SLICING = get_env_var(
114114
"ENABLE_ATTENTION_SLICING", default="true", var_type=bool)
115115
ENABLE_CPU_OFFLOADING = get_env_var(
116-
"ENABLE_CPU_OFFLOADING", default="false", var_type=bool)
116+
"ENABLE_CPU_OFFLOADING", default="true", var_type=bool)
117117
ENABLE_BETTERTRANSFORMER = get_env_var(
118-
"ENABLE_BETTERTRANSFORMER", default="false", var_type=bool)
118+
"ENABLE_BETTERTRANSFORMER", default="true", var_type=bool)
119119

120120
# Resource management
121121
UNLOAD_UNUSED_MODELS = get_env_var(
@@ -164,13 +164,13 @@ def can_run_model(model_id: str) -> bool:
164164
available_ram = (psutil.virtual_memory().available / (1024 ** 3)) * 0.8 # 80% of available RAM in GB
165165

166166
# Adjust requirements based on optimizations
167-
if get_env_var("LOCALLAB_ENABLE_QUANTIZATION", default=False, var_type=bool):
167+
if get_env_var("LOCALLAB_ENABLE_QUANTIZATION", default=True, var_type=bool):
168168
# Quantization reduces memory usage
169169
requirements["min_ram"] *= 0.5
170170
if "min_vram" in requirements:
171171
requirements["min_vram"] *= 0.5
172172

173-
if get_env_var("LOCALLAB_ENABLE_CPU_OFFLOADING", default=False, var_type=bool):
173+
if get_env_var("LOCALLAB_ENABLE_CPU_OFFLOADING", default=True, var_type=bool):
174174
# CPU offloading allows running with less RAM
175175
requirements["min_ram"] *= 0.7
176176

locallab/ui/banners.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -209,12 +209,12 @@ def print_model_info():
209209
model_id = get_env_var("HUGGINGFACE_MODEL") or get_env_var("LOCALLAB_MODEL_ID") or "microsoft/phi-2"
210210

211211
# Get optimization settings
212-
enable_quantization = get_env_var("LOCALLAB_ENABLE_QUANTIZATION", default="false").lower() == "true"
212+
enable_quantization = get_env_var("LOCALLAB_ENABLE_QUANTIZATION", default="true").lower() == "true"
213213
quantization_type = get_env_var("LOCALLAB_QUANTIZATION_TYPE", default="int8")
214-
enable_attention_slicing = get_env_var("LOCALLAB_ENABLE_ATTENTION_SLICING", default="false").lower() == "true"
215-
enable_flash_attention = get_env_var("LOCALLAB_ENABLE_FLASH_ATTENTION", default="false").lower() == "true"
216-
enable_better_transformer = get_env_var("LOCALLAB_ENABLE_BETTERTRANSFORMER", default="false").lower() == "true"
217-
enable_cpu_offloading = get_env_var("LOCALLAB_ENABLE_CPU_OFFLOADING", default="false").lower() == "true"
214+
enable_attention_slicing = get_env_var("LOCALLAB_ENABLE_ATTENTION_SLICING", default="true").lower() == "true"
215+
enable_flash_attention = get_env_var("LOCALLAB_ENABLE_FLASH_ATTENTION", default="true").lower() == "true"
216+
enable_better_transformer = get_env_var("LOCALLAB_ENABLE_BETTERTRANSFORMER", default="true").lower() == "true"
217+
enable_cpu_offloading = get_env_var("LOCALLAB_ENABLE_CPU_OFFLOADING", default="true").lower() == "true"
218218

219219
# Format model information
220220
model_info = f"""

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747

4848
setup(
4949
name="locallab",
50-
version="0.6.1",
50+
version="0.6.2",
5151
packages=find_packages(include=["locallab", "locallab.*"]),
5252
install_requires=install_requires,
5353
extras_require={

0 commit comments

Comments
 (0)