Merge pull request #496 from shankarpandala/dev

shankarpandala · web-flow · commit 4a3010d2de68 · 2025-04-06T01:15:35.000+05:30
0.2.16
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.2.15
+current_version = 0.2.16
 commit = False
 tag = False
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)
diff --git a/.github/scripts/requirements.txt b/.github/scripts/requirements.txt
@@ -0,0 +1,2 @@
+requests>=2.31.0
+beautifulsoup4>=4.12.0
diff --git a/.github/scripts/update_citations.py b/.github/scripts/update_citations.py
@@ -0,0 +1,54 @@
+import re
+import requests
+from bs4 import BeautifulSoup
+import os
+
+def get_citation_count():
+    url = "https://scholar.google.com/scholar?oi=bibs&hl=en&cites=4325808232671020176,16284230108871951652&as_sdt=5"
+    
+    # Use a real browser User-Agent to avoid being blocked
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+    }
+    
+    try:
+        response = requests.get(url, headers=headers)
+        response.raise_for_status()
+        soup = BeautifulSoup(response.text, 'html.parser')
+        
+        # Find the total results count
+        results_div = soup.find('div', {'id': 'gs_ab_md'})
+        if results_div:
+            text = results_div.get_text()
+            match = re.search(r'About\s+(\d+)\s+results', text)
+            if match:
+                return int(match.group(1))
+    except Exception as e:
+        print(f"Error fetching citations: {e}")
+    return None
+
+def update_readme(citation_count):
+    if citation_count is None:
+        return
+        
+    readme_path = "README.md"
+    with open(readme_path, 'r', encoding='utf-8') as f:
+        content = f.read()
+    
+    # Update the citations badge
+    new_content = re.sub(
+        r'\[\!\[Citations\]\(https://img\.shields\.io/badge/Citations-\d+-blue\)\]',
+        f'[![Citations](https://img.shields.io/badge/Citations-{citation_count}-blue)]',
+        content
+    )
+    
+    with open(readme_path, 'w', encoding='utf-8') as f:
+        f.write(new_content)
+
+if __name__ == "__main__":
+    citations = get_citation_count()
+    if citations:
+        update_readme(citations)
+        print(f"Updated citation count to: {citations}")
+    else:
+        print("Failed to update citations")
diff --git a/.github/workflows/update_citations.yml b/.github/workflows/update_citations.yml
@@ -0,0 +1,33 @@
+name: Update Citations
+
+on:
+  schedule:
+    - cron: '0 0 * * 0'  # Run weekly on Sunday at midnight
+  workflow_dispatch:  # Allow manual trigger
+
+jobs:
+  update-citations:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+    
+    steps:
+    - uses: actions/checkout@v3
+    
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.x'
+    
+    - name: Install dependencies
+      run: pip install -r .github/scripts/requirements.txt
+    
+    - name: Update citation count
+      run: python .github/scripts/update_citations.py
+    
+    - name: Commit and push if changed
+      run: |
+        git config --local user.email "github-actions[bot]@users.noreply.github.com"
+        git config --local user.name "github-actions[bot]"
+        git add README.md
+        git diff --quiet && git diff --staged --quiet || (git commit -m "Update citation count [skip ci]" && git push)
diff --git a/.gitignore b/.gitignore
@@ -107,4 +107,6 @@ ENV/
 # Test notebooks
 test.ipynb
 notebooks/
-notebook.ipynb
+notebook.ipynb
+
+mlruns/
diff --git a/HISTORY.md b/HISTORY.md
@@ -2,6 +2,14 @@
 title: History
 ---
 
+# 0.2.15 (2025-04-06)
+
+-   Added MLflow integration for experiment tracking
+-   Added support for Python 3.13
+-   Updated all dependencies to latest versions
+-   Improved model logging and tracking capabilities
+-   Added automatic model signature logging with MLflow
+
 # 0.2.11 (2022-02-06)
 
 -   Updated the default version to 3.9
diff --git a/README.md b/README.md
@@ -5,12 +5,20 @@
 [![Documentation Status](https://readthedocs.org/projects/lazypredict/badge/?version=latest)](https://lazypredict.readthedocs.io/en/latest/?badge=latest)
 [![Downloads](https://pepy.tech/badge/lazypredict)](https://pepy.tech/project/lazypredict)
 [![CodeFactor](https://www.codefactor.io/repository/github/shankarpandala/lazypredict/badge)](https://www.codefactor.io/repository/github/shankarpandala/lazypredict)
+[![Citations](https://img.shields.io/badge/Citations-37-blue)](https://scholar.google.com/scholar?oi=bibs&hl=en&cites=4325808232671020176,16284230108871951652&as_sdt=5)
 
 Lazy Predict helps build a lot of basic models without much code and helps understand which models work better without any parameter tuning.
 
 - Free software: MIT license
 - Documentation: <https://lazypredict.readthedocs.io>
 
+## Features
+- Over 40 built-in machine learning models
+- Automatic model selection for classification and regression 
+- Built-in MLflow integration for experiment tracking
+- Support for Python 3.8 through 3.13
+- Custom metric evaluation support
+
 ## Installation
 
 To install Lazy Predict:
@@ -148,4 +156,23 @@ print(models)
 | DecisionTreeRegressor         |          -0.470263   | -0.136112   |  83.4229 |   0.00749898 |
 | GaussianProcessRegressor      |          -0.769174   | -0.367089   |  91.5109 |   0.0770502  |
 | MLPRegressor                  |          -1.86772    | -1.21597    | 116.508  |   0.235267   |
-| KernelRidge                   |          -5.03822    | -3.6659     | 169.061  |   0.0243919  |
+| KernelRidge                   |          -5.03822    | -3.6659     | 169.061  |   0.0243919  |
+
+## MLflow Integration
+
+Lazy Predict includes built-in MLflow integration. Enable it by setting the MLflow tracking URI:
+
+```python
+import os
+os.environ['MLFLOW_TRACKING_URI'] = 'sqlite:///mlflow.db'
+
+# MLflow tracking will be automatically enabled
+reg = LazyRegressor(verbose=0, ignore_warnings=True)
+models, predictions = reg.fit(X_train, X_test, y_train, y_test)
+```
+
+Automatically tracks:
+- Model metrics (R-squared, RMSE, etc.)
+- Training time
+- Model parameters
+- Model artifacts
diff --git a/docs/examples.rst b/docs/examples.rst
@@ -99,17 +99,44 @@ Lazy Predict works seamlessly with pandas DataFrames:
 Using with MLflow
 ---------------
 
-Lazy Predict integrates with MLflow for experiment tracking:
+Lazy Predict has built-in MLflow integration for experiment tracking. You can enable it by setting the MLflow tracking URI:
 
 .. code-block:: python
 
     import os
-    os.environ['MLFLOW_TRACKING_URI'] = 'sqlite:///mlflow.db'
+    os.environ['MLFLOW_TRACKING_URI'] = 'sqlite:///mlflow.db'  # Local SQLite tracking
+    # Or for remote tracking:
+    # os.environ['MLFLOW_TRACKING_URI'] = 'http://your-mlflow-server:5000'
 
     # MLflow tracking will be automatically enabled
     reg = LazyRegressor(verbose=0, ignore_warnings=True)
     models, predictions = reg.fit(X_train, X_test, y_train, y_test)
-    # All metrics will be logged to MLflow automatically
+
+The following metrics and artifacts will be automatically logged to MLflow:
+
+* Model metrics (R-squared, RMSE, etc.)
+* Training time
+* Model parameters
+* Model signatures
+* Custom metrics (if provided)
+* Model artifacts for each trained model
+
+You can view the results in the MLflow UI:
+
+.. code-block:: bash
+
+    mlflow ui
+
+For Databricks users:
+~~~~~~~~~~~~~~~~~~
+
+If you're using Databricks, MLflow tracking is automatically configured:
+
+.. code-block:: python
+
+    # MLflow tracking will use Databricks tracking URI automatically
+    reg = LazyRegressor(verbose=0, ignore_warnings=True)
+    models, predictions = reg.fit(X_train, X_test, y_train, y_test)
 
 Getting Model Objects
 ------------------
diff --git a/docs/index.rst b/docs/index.rst
@@ -51,12 +51,28 @@ Contents
 
 Features
 --------
+* Over 40 built-in machine learning models
 * Automatic model selection for classification and regression
 * Support for both numerical and categorical features
 * Easy integration with scikit-learn pipelines
-* Model performance comparison
+* Model performance comparison and ranking
+* Built-in MLflow integration for experiment tracking
+* Support for Python 3.8 through 3.13
 * Minimal code required
-* MLflow integration for experiment tracking
+* Automatic model metrics logging
+* Custom metric evaluation support
+* Easy model access and reuse
+
+Key Benefits
+-----------
+* Rapid model prototyping and selection
+* Automated experiment tracking with MLflow
+* Comprehensive model performance comparison
+* Zero-configuration model evaluation
+* Support for local and remote tracking
+* Integration with Databricks environment
+* Parallel model training capability
+* Extensible with custom metrics
 
 Indices and Tables
 -----------------
diff --git a/lazypredict/Supervised.py b/lazypredict/Supervised.py
@@ -5,7 +5,19 @@
 
 import numpy as np
 import pandas as pd
+import sys
 from tqdm import tqdm
+try:
+    from IPython import get_ipython
+    if 'IPKernelApp' in get_ipython().config:
+        # We're in a Jupyter notebook or similar environment
+        from tqdm.notebook import tqdm as notebook_tqdm
+        use_notebook_tqdm = True
+    else:
+        use_notebook_tqdm = False
+except:
+    use_notebook_tqdm = False
+
 import datetime
 import time
 import os
@@ -311,7 +323,9 @@ def fit(self, X_train, X_test, y_train, y_test):
                 print(exception)
                 print("Invalid Classifier(s)")
 
-        for name, model in tqdm(self.classifiers):
+        # Use notebook tqdm if in Jupyter environment
+        progress_bar = notebook_tqdm if use_notebook_tqdm else tqdm
+        for name, model in progress_bar(self.classifiers):
             start = time.time()
             try:
                 # Start MLflow run for this specific model if MLflow is enabled
@@ -354,6 +368,15 @@ def fit(self, X_train, X_test, y_train, y_test):
                     if roc_auc is not None:
                         mlflow.log_metric("roc_auc", roc_auc)
                     mlflow.log_metric("training_time", time.time() - start)
+
+                    # Log the model with signature
+                    try:
+                        signature = mlflow.models.infer_signature(X_train, pipe.predict(X_train))
+                        mlflow.sklearn.log_model(pipe, f"{name}_model", signature=signature,
+                                             registered_model_name=f"lazy_classifier_{name}")
+                    except Exception as e:
+                        if not self.ignore_warnings:
+                            print(f"Failed to log model {name} to MLflow: {str(e)}")
                 
                 names.append(name)
                 Accuracy.append(accuracy)
@@ -454,7 +477,7 @@ def provide_models(self, X_train, X_test, y_train, y_test):
             Training vectors, where rows is the number of samples
             and columns is the number of features.
         y_test : array-like,
-            Testing vectors, where rows is the number of samples
+            Training vectors, where rows is the number of samples
             and columns is the number of features.
         Returns
         -------
@@ -641,7 +664,9 @@ def fit(self, X_train, X_test, y_train, y_test):
                 print(exception)
                 print("Invalid Regressor(s)")
 
-        for name, model in tqdm(self.regressors):
+        # Use notebook tqdm if in Jupyter environment
+        progress_bar = notebook_tqdm if use_notebook_tqdm else tqdm
+        for name, model in progress_bar(self.regressors):
             start = time.time()
             try:
                 # Start MLflow run for this specific model if MLflow is enabled
@@ -679,6 +704,15 @@ def fit(self, X_train, X_test, y_train, y_test):
                     mlflow.log_metric("rmse", rmse)
                     mlflow.log_metric("training_time", time.time() - start)
 
+                    # Log the model with signature
+                    try:
+                        signature = mlflow.models.infer_signature(X_train, pipe.predict(X_train))
+                        mlflow.sklearn.log_model(pipe, f"{name}_model", signature=signature, 
+                                              registered_model_name=f"lazy_regressor_{name}")
+                    except Exception as e:
+                        if not self.ignore_warnings:
+                            print(f"Failed to log model {name} to MLflow: {str(e)}")
+
                 names.append(name)
                 R2.append(r_squared)
                 ADJR2.append(adj_rsquared)
@@ -757,7 +791,7 @@ def provide_models(self, X_train, X_test, y_train, y_test):
             Training vectors, where rows is the number of samples
             and columns is the number of features.
         y_test : array-like,
-            Testing vectors, where rows is the number of samples
+            Training vectors, where rows is the number of samples
             and columns is the number of features.
         Returns
         -------
diff --git a/lazypredict/__init__.py b/lazypredict/__init__.py
@@ -4,4 +4,4 @@
 
 __author__ = """Shankar Rao Pandala"""
 __email__ = "shankar.pandala@live.com"
-__version__ = '2.15'
+__version__ = '0.2.16'
diff --git a/lazypredict/meta.yaml b/lazypredict/meta.yaml
@@ -1,5 +1,5 @@
 {% set name = "lazypredict" %}
-{% set version = "0.2.13" %}
+{% set version = "0.2.15" %}
 
 package:
   name: {{ name|lower }}
diff --git a/setup.py b/setup.py
@@ -51,6 +51,6 @@
     test_suite="tests",
     tests_require=test_requirements,
     url="https://github.com/shankarpandala/lazypredict",
-    version='2.15',
+    version='0.2.16',
     zip_safe=False,
 )
diff --git a/tests/test_supervised.py b/tests/test_supervised.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+requests>=2.31.0`
	`2`	`+beautifulsoup4>=4.12.0`
Original file line number	Diff line number	Diff line change
`@@ -51,6 +51,6 @@`
`51`	`51`	`test_suite="tests",`
`52`	`52`	`tests_require=test_requirements,`
`53`	`53`	`url="https://github.com/shankarpandala/lazypredict",`
`54`		`- version='2.15',`
	`54`	`+ version='0.2.16',`
`55`	`55`	`zip_safe=False,`
`56`	`56`	`)`