Bump version to 0.11.1 and update changelog

UtkarshTheDev · UtkarshTheDev · commit 7f151505ef68 · 2025-07-08T17:24:49.000+05:30
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,33 @@
 
 All notable changes to LocalLab will be documented in this file.
 
+## [0.11.1] - 2025-07-08
+
+### 🔧 Bug Fixes - Download Command Improvements
+
+This patch release fixes several warnings and errors that appeared during model downloads, providing a cleaner and more user-friendly experience.
+
+### Fixed
+
+#### 🚀 Download Command Improvements
+- **Fixed HuggingFace Hub progress bar configuration error** - Resolved `'module 'huggingface_hub.utils.logging' has no attribute 'enable_progress_bars''` error with multiple fallback methods for different huggingface_hub versions
+- **Fixed BetterTransformer version compatibility warning** - Updated optimization code to handle transformers>=4.49.0 requirement with intelligent version detection and graceful fallback to native PyTorch optimizations
+- **Improved CUDA availability warnings** - Changed alarming "CUDA not available" warning to informative "GPU not detected - running in CPU mode" with helpful tips
+- **Enhanced Flash Attention messages** - Improved warning messages to be more informative with installation guidance for faster inference
+- **Added graceful optimization fallbacks** - Implemented comprehensive error handling for all optimization attempts with result tracking and summary logging
+
+#### 🛠️ Enhanced Error Handling
+- **Robust optimization system** - Download process continues smoothly even if some optimizations fail
+- **Clear user feedback** - Users now get a summary of which optimizations were applied successfully
+- **Version compatibility** - Works correctly with current transformers and huggingface_hub versions
+- **Graceful degradation** - Falls back to safe defaults when advanced features aren't available
+
+### Technical Changes
+- Updated `locallab/utils/progress.py` with improved HuggingFace Hub progress bar configuration
+- Updated `locallab/utils/early_config.py` with better version compatibility handling
+- Enhanced `locallab/model_manager.py` with comprehensive optimization tracking and fallback mechanisms
+- Improved logging levels from warnings to informative messages for better user experience
+
 ## [0.11.0] - 2025-07-08
 
 ### 🎉 Major Release - Comprehensive Model Management CLI
diff --git a/locallab/__init__.py b/locallab/__init__.py
@@ -6,7 +6,7 @@
 # This ensures Hugging Face's progress bars are displayed correctly
 from .utils.early_config import configure_hf_logging
 
-__version__ = "0.11.0"  # Comprehensive model management CLI with HuggingFace Hub integration
+__version__ = "0.11.1"  # Fixed download command warnings and improved error handling
 
 # Only import what's necessary initially, lazy-load the rest
 from .logger import get_logger
diff --git a/locallab/model_manager.py b/locallab/model_manager.py
@@ -103,8 +103,8 @@ def _get_quantization_config(self) -> Optional[Dict[str, Any]]:
 
         # First check if CUDA is available - if not, we can't use bitsandbytes quantization
         if not torch.cuda.is_available():
-            logger.warning("CUDA not available - quantization with bitsandbytes requires CUDA")
-            logger.info("Disabling quantization and using CPU-compatible settings")
+            logger.info("GPU not detected - running in CPU mode with optimized settings")
+            logger.info("💡 For faster inference, consider using a system with CUDA-compatible GPU")
             return {
                 "torch_dtype": torch.float32,
                 "device_map": safe_device_map
@@ -187,7 +187,9 @@ def _get_quantization_config(self) -> Optional[Dict[str, Any]]:
             }
 
     def _apply_optimizations(self, model: AutoModelForCausalLM) -> AutoModelForCausalLM:
-        """Apply various optimizations to the model"""
+        """Apply various optimizations to the model with graceful fallbacks"""
+        optimization_results = []
+
         try:
             # Import the config system
             from .cli.config import get_config_value
@@ -198,25 +200,36 @@ def _apply_optimizations(self, model: AutoModelForCausalLM) -> AutoModelForCausa
                 enable_attention_slicing = enable_attention_slicing.lower() not in ('false', '0', 'none', '')
 
             if enable_attention_slicing:
-                if hasattr(model, 'enable_attention_slicing'):
-                    # Use more aggressive slicing for faster inference
-                    model.enable_attention_slicing("max")
-                    logger.info("Attention slicing enabled with max setting")
-                else:
-                    logger.info(
-                        "Attention slicing not available for this model")
+                try:
+                    if hasattr(model, 'enable_attention_slicing'):
+                        # Use more aggressive slicing for faster inference
+                        model.enable_attention_slicing("max")
+                        logger.info("Attention slicing enabled with max setting")
+                        optimization_results.append("✓ Attention slicing")
+                    else:
+                        logger.info("Attention slicing not available for this model")
+                        optimization_results.append("- Attention slicing (not supported)")
+                except Exception as e:
+                    logger.debug(f"Attention slicing failed: {str(e)}")
+                    optimization_results.append("- Attention slicing (failed)")
 
             # Only apply CPU offloading if explicitly enabled and not empty
             enable_cpu_offloading = get_config_value('enable_cpu_offloading', ENABLE_CPU_OFFLOADING)
             if isinstance(enable_cpu_offloading, str):
                 enable_cpu_offloading = enable_cpu_offloading.lower() not in ('false', '0', 'none', '')
 
             if enable_cpu_offloading:
-                if hasattr(model, "enable_cpu_offload"):
-                    model.enable_cpu_offload()
-                    logger.info("CPU offloading enabled")
-                else:
-                    logger.info("CPU offloading not available for this model")
+                try:
+                    if hasattr(model, "enable_cpu_offload"):
+                        model.enable_cpu_offload()
+                        logger.info("CPU offloading enabled")
+                        optimization_results.append("✓ CPU offloading")
+                    else:
+                        logger.info("CPU offloading not available for this model")
+                        optimization_results.append("- CPU offloading (not supported)")
+                except Exception as e:
+                    logger.debug(f"CPU offloading failed: {str(e)}")
+                    optimization_results.append("- CPU offloading (failed)")
 
             # Only apply BetterTransformer if explicitly enabled and not empty
             enable_bettertransformer = get_config_value('enable_better_transformer', ENABLE_BETTERTRANSFORMER)
@@ -225,15 +238,38 @@ def _apply_optimizations(self, model: AutoModelForCausalLM) -> AutoModelForCausa
 
             if enable_bettertransformer:
                 try:
-                    from optimum.bettertransformer import BetterTransformer
-                    model = BetterTransformer.transform(model)
-                    logger.info("BetterTransformer optimization applied")
+                    # Check transformers version compatibility
+                    import transformers
+                    from packaging import version
+
+                    transformers_version = version.parse(transformers.__version__)
+                    if transformers_version >= version.parse("4.49.0"):
+                        logger.info("BetterTransformer is deprecated for transformers>=4.49.0, using native optimizations instead")
+                        # Use native PyTorch optimizations instead
+                        try:
+                            if hasattr(model, "to_bettertransformer"):
+                                # Some models still support the native method
+                                model = model.to_bettertransformer()
+                                logger.info("Applied native BetterTransformer optimization")
+                                optimization_results.append("✓ Native BetterTransformer")
+                            else:
+                                logger.info("Using default PyTorch optimizations (BetterTransformer not needed)")
+                                optimization_results.append("✓ Default PyTorch optimizations")
+                        except Exception as e:
+                            logger.debug(f"Native BetterTransformer not available: {str(e)}")
+                            optimization_results.append("✓ Default PyTorch optimizations")
+                    else:
+                        # Use optimum BetterTransformer for older transformers versions
+                        from optimum.bettertransformer import BetterTransformer
+                        model = BetterTransformer.transform(model)
+                        logger.info("BetterTransformer optimization applied via optimum")
+                        optimization_results.append("✓ BetterTransformer (optimum)")
                 except ImportError:
-                    logger.warning(
-                        "BetterTransformer not available - install 'optimum' for this feature")
+                    logger.info("BetterTransformer not available - using default PyTorch optimizations")
+                    optimization_results.append("✓ Default PyTorch optimizations")
                 except Exception as e:
-                    logger.warning(
-                        f"BetterTransformer optimization failed: {str(e)}")
+                    logger.debug(f"BetterTransformer optimization skipped: {str(e)}")
+                    optimization_results.append("- BetterTransformer (failed)")
 
             # Only apply Flash Attention if explicitly enabled and not empty
             enable_flash_attention = get_config_value('enable_flash_attention', ENABLE_FLASH_ATTENTION)
@@ -246,36 +282,52 @@ def _apply_optimizations(self, model: AutoModelForCausalLM) -> AutoModelForCausa
                     if hasattr(model.config, "attn_implementation"):
                         model.config.attn_implementation = "flash_attention_2"
                         logger.info("Flash Attention 2 enabled via config")
+                        optimization_results.append("✓ Flash Attention 2")
                     # For older models, try the flash_attn module
                     else:
                         import flash_attn
                         logger.info("Flash Attention enabled via module")
+                        optimization_results.append("✓ Flash Attention")
                 except ImportError:
-                    logger.warning(
-                        "Flash Attention not available - install 'flash-attn' for this feature")
+                    logger.info(
+                        "Flash Attention not available - using standard attention (install 'flash-attn' for faster inference)")
+                    optimization_results.append("- Flash Attention (not installed)")
                 except Exception as e:
-                    logger.warning(
-                        f"Flash Attention optimization failed: {str(e)}")
+                    logger.debug(f"Flash Attention optimization skipped: {str(e)}")
+                    optimization_results.append("- Flash Attention (failed)")
 
             # Enable memory efficient attention if available
             try:
                 if hasattr(model, "enable_xformers_memory_efficient_attention"):
                     model.enable_xformers_memory_efficient_attention()
                     logger.info("XFormers memory efficient attention enabled")
+                    optimization_results.append("✓ XFormers memory efficient attention")
+                else:
+                    optimization_results.append("- XFormers (not supported)")
             except Exception as e:
-                logger.info(f"XFormers memory efficient attention not available: {str(e)}")
+                logger.debug(f"XFormers memory efficient attention not available: {str(e)}")
+                optimization_results.append("- XFormers (not available)")
 
             # Enable gradient checkpointing for memory efficiency if available
             try:
                 if hasattr(model, "gradient_checkpointing_enable"):
                     model.gradient_checkpointing_enable()
                     logger.info("Gradient checkpointing enabled for memory efficiency")
+                    optimization_results.append("✓ Gradient checkpointing")
+                else:
+                    optimization_results.append("- Gradient checkpointing (not supported)")
             except Exception as e:
-                logger.info(f"Gradient checkpointing not available: {str(e)}")
+                logger.debug(f"Gradient checkpointing not available: {str(e)}")
+                optimization_results.append("- Gradient checkpointing (failed)")
 
             # Set model to evaluation mode for faster inference
             model.eval()
             logger.info("Model set to evaluation mode for faster inference")
+            optimization_results.append("✓ Evaluation mode")
+
+            # Log optimization summary
+            if optimization_results:
+                logger.info(f"Applied optimizations: {', '.join(optimization_results)}")
 
             return model
         except Exception as e:
diff --git a/locallab/utils/early_config.py b/locallab/utils/early_config.py
@@ -91,24 +91,36 @@ def enable_hf_progress_bars():
     except ImportError:
         pass
 
-    # Configure huggingface_hub
+    # Configure huggingface_hub progress bars
     try:
         import huggingface_hub
+        import os
 
         # Different versions of huggingface_hub have different ways to enable progress bars
         # Try multiple approaches to ensure compatibility
+        progress_enabled = False
 
         # Method 1: Try direct module function (newer versions)
         if hasattr(huggingface_hub, "enable_progress_bars"):
-            huggingface_hub.enable_progress_bars()
+            try:
+                huggingface_hub.enable_progress_bars()
+                progress_enabled = True
+            except Exception:
+                pass
 
-        # Method 2: Try through utils.logging (some versions)
-        try:
-            from huggingface_hub.utils import logging as hf_logging
-            if hasattr(hf_logging, "enable_progress_bars"):
-                hf_logging.enable_progress_bars()
-        except (ImportError, AttributeError):
-            pass
+        # Method 2: Try through utils.logging (older versions)
+        if not progress_enabled:
+            try:
+                from huggingface_hub.utils import logging as hf_logging
+                if hasattr(hf_logging, "enable_progress_bars"):
+                    hf_logging.enable_progress_bars()
+                    progress_enabled = True
+            except (ImportError, AttributeError):
+                pass
+
+        # Method 3: Use environment variable as fallback
+        if not progress_enabled:
+            os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "0"
 
         # Method 3: Set environment variable (works for all versions)
         os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "0"
diff --git a/locallab/utils/progress.py b/locallab/utils/progress.py
@@ -160,31 +160,57 @@ def configure_hf_hub_progress():
     This completely bypasses our custom logger for HuggingFace download progress.
     """
     try:
-        # 1. Enable HuggingFace's native progress bars
-        from huggingface_hub.utils import logging as hf_logging
-        hf_logging.enable_progress_bars()
+        # 1. Enable HuggingFace's native progress bars using the correct API
+        # Try multiple methods for different huggingface_hub versions
+        progress_enabled = False
+
+        # Method 1: Try the main module function (newer versions)
+        try:
+            import huggingface_hub
+            if hasattr(huggingface_hub, "enable_progress_bars"):
+                huggingface_hub.enable_progress_bars()
+                progress_enabled = True
+                logger.debug("Enabled HF progress bars via main module")
+        except (ImportError, AttributeError):
+            pass
+
+        # Method 2: Try through utils.logging (older versions)
+        if not progress_enabled:
+            try:
+                from huggingface_hub.utils import logging as hf_logging
+                if hasattr(hf_logging, "enable_progress_bars"):
+                    hf_logging.enable_progress_bars()
+                    progress_enabled = True
+                    logger.debug("Enabled HF progress bars via utils.logging")
+            except (ImportError, AttributeError):
+                pass
+
+        # Method 3: Try setting environment variable as fallback
+        if not progress_enabled:
+            import os
+            os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "0"
+            logger.debug("Enabled HF progress bars via environment variable")
 
         # 2. Enable HF Transfer for better download experience (only if available)
         try:
             import hf_transfer
             from huggingface_hub import constants
             constants.HF_HUB_ENABLE_HF_TRANSFER = True
+            logger.debug("Enabled HF Transfer for faster downloads")
         except ImportError:
             # hf_transfer not available, skip enabling it
             pass
 
         # 3. Make sure we're NOT overriding HuggingFace's progress callback
         # This is critical - we want to use their native implementation
-        from huggingface_hub import file_download
-        if hasattr(file_download, "_tqdm_callback"):
-            # Reset to default - we don't want any custom callback
-            file_download._tqdm_callback = None
-
-        # 4. Ensure HuggingFace Hub's own logging is properly configured
-        # This ensures HF's own progress bars are displayed correctly
-        import huggingface_hub
-        if hasattr(huggingface_hub, "enable_progress_bars"):
-            huggingface_hub.enable_progress_bars()
+        try:
+            from huggingface_hub import file_download
+            if hasattr(file_download, "_tqdm_callback"):
+                # Reset to default - we don't want any custom callback
+                file_download._tqdm_callback = None
+                logger.debug("Reset HF download callback to default")
+        except (ImportError, AttributeError):
+            pass
 
         # 5. Configure tqdm directly to ensure proper display
         import tqdm
@@ -200,11 +226,11 @@ def configure_hf_hub_progress():
         global is_downloading
         is_downloading = True
 
-        logger.debug("Configured HuggingFace Hub to use its native progress bars")
-    except ImportError:
-        logger.warning("Failed to configure HuggingFace Hub progress bars")
+        logger.debug("Successfully configured HuggingFace Hub progress bars")
+    except ImportError as e:
+        logger.debug(f"HuggingFace Hub progress configuration skipped: {str(e)}")
     except Exception as e:
-        logger.warning(f"Error configuring HuggingFace Hub progress: {str(e)}")
+        logger.debug(f"HuggingFace Hub progress configuration failed: {str(e)}")
 
 # Function to check if we're currently downloading
 def is_model_downloading():
diff --git a/setup.py b/setup.py
@@ -47,7 +47,7 @@
 
 setup(
     name="locallab",
-    version="0.11.0",
+    version="0.11.1",
     packages=find_packages(include=["locallab", "locallab.*"]),
     install_requires=install_requires,
     extras_require={