Skip to content

Commit 7f15150

Browse files
committed
Bump version to 0.11.1 and update changelog
1 parent 7f69377 commit 7f15150

File tree

6 files changed

+173
-56
lines changed

6 files changed

+173
-56
lines changed

CHANGELOG.md

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,33 @@
22

33
All notable changes to LocalLab will be documented in this file.
44

5+
## [0.11.1] - 2025-07-08
6+
7+
### 🔧 Bug Fixes - Download Command Improvements
8+
9+
This patch release fixes several warnings and errors that appeared during model downloads, providing a cleaner and more user-friendly experience.
10+
11+
### Fixed
12+
13+
#### 🚀 Download Command Improvements
14+
- **Fixed HuggingFace Hub progress bar configuration error** - Resolved `'module 'huggingface_hub.utils.logging' has no attribute 'enable_progress_bars''` error with multiple fallback methods for different huggingface_hub versions
15+
- **Fixed BetterTransformer version compatibility warning** - Updated optimization code to handle transformers>=4.49.0 requirement with intelligent version detection and graceful fallback to native PyTorch optimizations
16+
- **Improved CUDA availability warnings** - Changed alarming "CUDA not available" warning to informative "GPU not detected - running in CPU mode" with helpful tips
17+
- **Enhanced Flash Attention messages** - Improved warning messages to be more informative with installation guidance for faster inference
18+
- **Added graceful optimization fallbacks** - Implemented comprehensive error handling for all optimization attempts with result tracking and summary logging
19+
20+
#### 🛠️ Enhanced Error Handling
21+
- **Robust optimization system** - Download process continues smoothly even if some optimizations fail
22+
- **Clear user feedback** - Users now get a summary of which optimizations were applied successfully
23+
- **Version compatibility** - Works correctly with current transformers and huggingface_hub versions
24+
- **Graceful degradation** - Falls back to safe defaults when advanced features aren't available
25+
26+
### Technical Changes
27+
- Updated `locallab/utils/progress.py` with improved HuggingFace Hub progress bar configuration
28+
- Updated `locallab/utils/early_config.py` with better version compatibility handling
29+
- Enhanced `locallab/model_manager.py` with comprehensive optimization tracking and fallback mechanisms
30+
- Improved logging levels from warnings to informative messages for better user experience
31+
532
## [0.11.0] - 2025-07-08
633

734
### 🎉 Major Release - Comprehensive Model Management CLI

locallab/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
# This ensures Hugging Face's progress bars are displayed correctly
77
from .utils.early_config import configure_hf_logging
88

9-
__version__ = "0.11.0" # Comprehensive model management CLI with HuggingFace Hub integration
9+
__version__ = "0.11.1" # Fixed download command warnings and improved error handling
1010

1111
# Only import what's necessary initially, lazy-load the rest
1212
from .logger import get_logger

locallab/model_manager.py

Lines changed: 80 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,8 @@ def _get_quantization_config(self) -> Optional[Dict[str, Any]]:
103103

104104
# First check if CUDA is available - if not, we can't use bitsandbytes quantization
105105
if not torch.cuda.is_available():
106-
logger.warning("CUDA not available - quantization with bitsandbytes requires CUDA")
107-
logger.info("Disabling quantization and using CPU-compatible settings")
106+
logger.info("GPU not detected - running in CPU mode with optimized settings")
107+
logger.info("💡 For faster inference, consider using a system with CUDA-compatible GPU")
108108
return {
109109
"torch_dtype": torch.float32,
110110
"device_map": safe_device_map
@@ -187,7 +187,9 @@ def _get_quantization_config(self) -> Optional[Dict[str, Any]]:
187187
}
188188

189189
def _apply_optimizations(self, model: AutoModelForCausalLM) -> AutoModelForCausalLM:
190-
"""Apply various optimizations to the model"""
190+
"""Apply various optimizations to the model with graceful fallbacks"""
191+
optimization_results = []
192+
191193
try:
192194
# Import the config system
193195
from .cli.config import get_config_value
@@ -198,25 +200,36 @@ def _apply_optimizations(self, model: AutoModelForCausalLM) -> AutoModelForCausa
198200
enable_attention_slicing = enable_attention_slicing.lower() not in ('false', '0', 'none', '')
199201

200202
if enable_attention_slicing:
201-
if hasattr(model, 'enable_attention_slicing'):
202-
# Use more aggressive slicing for faster inference
203-
model.enable_attention_slicing("max")
204-
logger.info("Attention slicing enabled with max setting")
205-
else:
206-
logger.info(
207-
"Attention slicing not available for this model")
203+
try:
204+
if hasattr(model, 'enable_attention_slicing'):
205+
# Use more aggressive slicing for faster inference
206+
model.enable_attention_slicing("max")
207+
logger.info("Attention slicing enabled with max setting")
208+
optimization_results.append("✓ Attention slicing")
209+
else:
210+
logger.info("Attention slicing not available for this model")
211+
optimization_results.append("- Attention slicing (not supported)")
212+
except Exception as e:
213+
logger.debug(f"Attention slicing failed: {str(e)}")
214+
optimization_results.append("- Attention slicing (failed)")
208215

209216
# Only apply CPU offloading if explicitly enabled and not empty
210217
enable_cpu_offloading = get_config_value('enable_cpu_offloading', ENABLE_CPU_OFFLOADING)
211218
if isinstance(enable_cpu_offloading, str):
212219
enable_cpu_offloading = enable_cpu_offloading.lower() not in ('false', '0', 'none', '')
213220

214221
if enable_cpu_offloading:
215-
if hasattr(model, "enable_cpu_offload"):
216-
model.enable_cpu_offload()
217-
logger.info("CPU offloading enabled")
218-
else:
219-
logger.info("CPU offloading not available for this model")
222+
try:
223+
if hasattr(model, "enable_cpu_offload"):
224+
model.enable_cpu_offload()
225+
logger.info("CPU offloading enabled")
226+
optimization_results.append("✓ CPU offloading")
227+
else:
228+
logger.info("CPU offloading not available for this model")
229+
optimization_results.append("- CPU offloading (not supported)")
230+
except Exception as e:
231+
logger.debug(f"CPU offloading failed: {str(e)}")
232+
optimization_results.append("- CPU offloading (failed)")
220233

221234
# Only apply BetterTransformer if explicitly enabled and not empty
222235
enable_bettertransformer = get_config_value('enable_better_transformer', ENABLE_BETTERTRANSFORMER)
@@ -225,15 +238,38 @@ def _apply_optimizations(self, model: AutoModelForCausalLM) -> AutoModelForCausa
225238

226239
if enable_bettertransformer:
227240
try:
228-
from optimum.bettertransformer import BetterTransformer
229-
model = BetterTransformer.transform(model)
230-
logger.info("BetterTransformer optimization applied")
241+
# Check transformers version compatibility
242+
import transformers
243+
from packaging import version
244+
245+
transformers_version = version.parse(transformers.__version__)
246+
if transformers_version >= version.parse("4.49.0"):
247+
logger.info("BetterTransformer is deprecated for transformers>=4.49.0, using native optimizations instead")
248+
# Use native PyTorch optimizations instead
249+
try:
250+
if hasattr(model, "to_bettertransformer"):
251+
# Some models still support the native method
252+
model = model.to_bettertransformer()
253+
logger.info("Applied native BetterTransformer optimization")
254+
optimization_results.append("✓ Native BetterTransformer")
255+
else:
256+
logger.info("Using default PyTorch optimizations (BetterTransformer not needed)")
257+
optimization_results.append("✓ Default PyTorch optimizations")
258+
except Exception as e:
259+
logger.debug(f"Native BetterTransformer not available: {str(e)}")
260+
optimization_results.append("✓ Default PyTorch optimizations")
261+
else:
262+
# Use optimum BetterTransformer for older transformers versions
263+
from optimum.bettertransformer import BetterTransformer
264+
model = BetterTransformer.transform(model)
265+
logger.info("BetterTransformer optimization applied via optimum")
266+
optimization_results.append("✓ BetterTransformer (optimum)")
231267
except ImportError:
232-
logger.warning(
233-
"BetterTransformer not available - install 'optimum' for this feature")
268+
logger.info("BetterTransformer not available - using default PyTorch optimizations")
269+
optimization_results.append("✓ Default PyTorch optimizations")
234270
except Exception as e:
235-
logger.warning(
236-
f"BetterTransformer optimization failed: {str(e)}")
271+
logger.debug(f"BetterTransformer optimization skipped: {str(e)}")
272+
optimization_results.append("- BetterTransformer (failed)")
237273

238274
# Only apply Flash Attention if explicitly enabled and not empty
239275
enable_flash_attention = get_config_value('enable_flash_attention', ENABLE_FLASH_ATTENTION)
@@ -246,36 +282,52 @@ def _apply_optimizations(self, model: AutoModelForCausalLM) -> AutoModelForCausa
246282
if hasattr(model.config, "attn_implementation"):
247283
model.config.attn_implementation = "flash_attention_2"
248284
logger.info("Flash Attention 2 enabled via config")
285+
optimization_results.append("✓ Flash Attention 2")
249286
# For older models, try the flash_attn module
250287
else:
251288
import flash_attn
252289
logger.info("Flash Attention enabled via module")
290+
optimization_results.append("✓ Flash Attention")
253291
except ImportError:
254-
logger.warning(
255-
"Flash Attention not available - install 'flash-attn' for this feature")
292+
logger.info(
293+
"Flash Attention not available - using standard attention (install 'flash-attn' for faster inference)")
294+
optimization_results.append("- Flash Attention (not installed)")
256295
except Exception as e:
257-
logger.warning(
258-
f"Flash Attention optimization failed: {str(e)}")
296+
logger.debug(f"Flash Attention optimization skipped: {str(e)}")
297+
optimization_results.append("- Flash Attention (failed)")
259298

260299
# Enable memory efficient attention if available
261300
try:
262301
if hasattr(model, "enable_xformers_memory_efficient_attention"):
263302
model.enable_xformers_memory_efficient_attention()
264303
logger.info("XFormers memory efficient attention enabled")
304+
optimization_results.append("✓ XFormers memory efficient attention")
305+
else:
306+
optimization_results.append("- XFormers (not supported)")
265307
except Exception as e:
266-
logger.info(f"XFormers memory efficient attention not available: {str(e)}")
308+
logger.debug(f"XFormers memory efficient attention not available: {str(e)}")
309+
optimization_results.append("- XFormers (not available)")
267310

268311
# Enable gradient checkpointing for memory efficiency if available
269312
try:
270313
if hasattr(model, "gradient_checkpointing_enable"):
271314
model.gradient_checkpointing_enable()
272315
logger.info("Gradient checkpointing enabled for memory efficiency")
316+
optimization_results.append("✓ Gradient checkpointing")
317+
else:
318+
optimization_results.append("- Gradient checkpointing (not supported)")
273319
except Exception as e:
274-
logger.info(f"Gradient checkpointing not available: {str(e)}")
320+
logger.debug(f"Gradient checkpointing not available: {str(e)}")
321+
optimization_results.append("- Gradient checkpointing (failed)")
275322

276323
# Set model to evaluation mode for faster inference
277324
model.eval()
278325
logger.info("Model set to evaluation mode for faster inference")
326+
optimization_results.append("✓ Evaluation mode")
327+
328+
# Log optimization summary
329+
if optimization_results:
330+
logger.info(f"Applied optimizations: {', '.join(optimization_results)}")
279331

280332
return model
281333
except Exception as e:

locallab/utils/early_config.py

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -91,24 +91,36 @@ def enable_hf_progress_bars():
9191
except ImportError:
9292
pass
9393

94-
# Configure huggingface_hub
94+
# Configure huggingface_hub progress bars
9595
try:
9696
import huggingface_hub
97+
import os
9798

9899
# Different versions of huggingface_hub have different ways to enable progress bars
99100
# Try multiple approaches to ensure compatibility
101+
progress_enabled = False
100102

101103
# Method 1: Try direct module function (newer versions)
102104
if hasattr(huggingface_hub, "enable_progress_bars"):
103-
huggingface_hub.enable_progress_bars()
105+
try:
106+
huggingface_hub.enable_progress_bars()
107+
progress_enabled = True
108+
except Exception:
109+
pass
104110

105-
# Method 2: Try through utils.logging (some versions)
106-
try:
107-
from huggingface_hub.utils import logging as hf_logging
108-
if hasattr(hf_logging, "enable_progress_bars"):
109-
hf_logging.enable_progress_bars()
110-
except (ImportError, AttributeError):
111-
pass
111+
# Method 2: Try through utils.logging (older versions)
112+
if not progress_enabled:
113+
try:
114+
from huggingface_hub.utils import logging as hf_logging
115+
if hasattr(hf_logging, "enable_progress_bars"):
116+
hf_logging.enable_progress_bars()
117+
progress_enabled = True
118+
except (ImportError, AttributeError):
119+
pass
120+
121+
# Method 3: Use environment variable as fallback
122+
if not progress_enabled:
123+
os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "0"
112124

113125
# Method 3: Set environment variable (works for all versions)
114126
os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "0"

locallab/utils/progress.py

Lines changed: 43 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -160,31 +160,57 @@ def configure_hf_hub_progress():
160160
This completely bypasses our custom logger for HuggingFace download progress.
161161
"""
162162
try:
163-
# 1. Enable HuggingFace's native progress bars
164-
from huggingface_hub.utils import logging as hf_logging
165-
hf_logging.enable_progress_bars()
163+
# 1. Enable HuggingFace's native progress bars using the correct API
164+
# Try multiple methods for different huggingface_hub versions
165+
progress_enabled = False
166+
167+
# Method 1: Try the main module function (newer versions)
168+
try:
169+
import huggingface_hub
170+
if hasattr(huggingface_hub, "enable_progress_bars"):
171+
huggingface_hub.enable_progress_bars()
172+
progress_enabled = True
173+
logger.debug("Enabled HF progress bars via main module")
174+
except (ImportError, AttributeError):
175+
pass
176+
177+
# Method 2: Try through utils.logging (older versions)
178+
if not progress_enabled:
179+
try:
180+
from huggingface_hub.utils import logging as hf_logging
181+
if hasattr(hf_logging, "enable_progress_bars"):
182+
hf_logging.enable_progress_bars()
183+
progress_enabled = True
184+
logger.debug("Enabled HF progress bars via utils.logging")
185+
except (ImportError, AttributeError):
186+
pass
187+
188+
# Method 3: Try setting environment variable as fallback
189+
if not progress_enabled:
190+
import os
191+
os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "0"
192+
logger.debug("Enabled HF progress bars via environment variable")
166193

167194
# 2. Enable HF Transfer for better download experience (only if available)
168195
try:
169196
import hf_transfer
170197
from huggingface_hub import constants
171198
constants.HF_HUB_ENABLE_HF_TRANSFER = True
199+
logger.debug("Enabled HF Transfer for faster downloads")
172200
except ImportError:
173201
# hf_transfer not available, skip enabling it
174202
pass
175203

176204
# 3. Make sure we're NOT overriding HuggingFace's progress callback
177205
# This is critical - we want to use their native implementation
178-
from huggingface_hub import file_download
179-
if hasattr(file_download, "_tqdm_callback"):
180-
# Reset to default - we don't want any custom callback
181-
file_download._tqdm_callback = None
182-
183-
# 4. Ensure HuggingFace Hub's own logging is properly configured
184-
# This ensures HF's own progress bars are displayed correctly
185-
import huggingface_hub
186-
if hasattr(huggingface_hub, "enable_progress_bars"):
187-
huggingface_hub.enable_progress_bars()
206+
try:
207+
from huggingface_hub import file_download
208+
if hasattr(file_download, "_tqdm_callback"):
209+
# Reset to default - we don't want any custom callback
210+
file_download._tqdm_callback = None
211+
logger.debug("Reset HF download callback to default")
212+
except (ImportError, AttributeError):
213+
pass
188214

189215
# 5. Configure tqdm directly to ensure proper display
190216
import tqdm
@@ -200,11 +226,11 @@ def configure_hf_hub_progress():
200226
global is_downloading
201227
is_downloading = True
202228

203-
logger.debug("Configured HuggingFace Hub to use its native progress bars")
204-
except ImportError:
205-
logger.warning("Failed to configure HuggingFace Hub progress bars")
229+
logger.debug("Successfully configured HuggingFace Hub progress bars")
230+
except ImportError as e:
231+
logger.debug(f"HuggingFace Hub progress configuration skipped: {str(e)}")
206232
except Exception as e:
207-
logger.warning(f"Error configuring HuggingFace Hub progress: {str(e)}")
233+
logger.debug(f"HuggingFace Hub progress configuration failed: {str(e)}")
208234

209235
# Function to check if we're currently downloading
210236
def is_model_downloading():

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747

4848
setup(
4949
name="locallab",
50-
version="0.11.0",
50+
version="0.11.1",
5151
packages=find_packages(include=["locallab", "locallab.*"]),
5252
install_requires=install_requires,
5353
extras_require={

0 commit comments

Comments
 (0)