Skip to content

Commit 4a3010d

Browse files
Merge pull request #496 from shankarpandala/dev
0.2.16
2 parents 386bc3a + 688fc02 commit 4a3010d

14 files changed

+325
-39
lines changed

.bumpversion.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 0.2.15
2+
current_version = 0.2.16
33
commit = False
44
tag = False
55
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)

.github/scripts/requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
requests>=2.31.0
2+
beautifulsoup4>=4.12.0

.github/scripts/update_citations.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
import re
2+
import requests
3+
from bs4 import BeautifulSoup
4+
import os
5+
6+
def get_citation_count():
7+
url = "https://scholar.google.com/scholar?oi=bibs&hl=en&cites=4325808232671020176,16284230108871951652&as_sdt=5"
8+
9+
# Use a real browser User-Agent to avoid being blocked
10+
headers = {
11+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
12+
}
13+
14+
try:
15+
response = requests.get(url, headers=headers)
16+
response.raise_for_status()
17+
soup = BeautifulSoup(response.text, 'html.parser')
18+
19+
# Find the total results count
20+
results_div = soup.find('div', {'id': 'gs_ab_md'})
21+
if results_div:
22+
text = results_div.get_text()
23+
match = re.search(r'About\s+(\d+)\s+results', text)
24+
if match:
25+
return int(match.group(1))
26+
except Exception as e:
27+
print(f"Error fetching citations: {e}")
28+
return None
29+
30+
def update_readme(citation_count):
31+
if citation_count is None:
32+
return
33+
34+
readme_path = "README.md"
35+
with open(readme_path, 'r', encoding='utf-8') as f:
36+
content = f.read()
37+
38+
# Update the citations badge
39+
new_content = re.sub(
40+
r'\[\!\[Citations\]\(https://img\.shields\.io/badge/Citations-\d+-blue\)\]',
41+
f'[![Citations](https://img.shields.io/badge/Citations-{citation_count}-blue)]',
42+
content
43+
)
44+
45+
with open(readme_path, 'w', encoding='utf-8') as f:
46+
f.write(new_content)
47+
48+
if __name__ == "__main__":
49+
citations = get_citation_count()
50+
if citations:
51+
update_readme(citations)
52+
print(f"Updated citation count to: {citations}")
53+
else:
54+
print("Failed to update citations")
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
name: Update Citations
2+
3+
on:
4+
schedule:
5+
- cron: '0 0 * * 0' # Run weekly on Sunday at midnight
6+
workflow_dispatch: # Allow manual trigger
7+
8+
jobs:
9+
update-citations:
10+
runs-on: ubuntu-latest
11+
permissions:
12+
contents: write
13+
14+
steps:
15+
- uses: actions/checkout@v3
16+
17+
- name: Set up Python
18+
uses: actions/setup-python@v4
19+
with:
20+
python-version: '3.x'
21+
22+
- name: Install dependencies
23+
run: pip install -r .github/scripts/requirements.txt
24+
25+
- name: Update citation count
26+
run: python .github/scripts/update_citations.py
27+
28+
- name: Commit and push if changed
29+
run: |
30+
git config --local user.email "github-actions[bot]@users.noreply.github.com"
31+
git config --local user.name "github-actions[bot]"
32+
git add README.md
33+
git diff --quiet && git diff --staged --quiet || (git commit -m "Update citation count [skip ci]" && git push)

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,4 +107,6 @@ ENV/
107107
# Test notebooks
108108
test.ipynb
109109
notebooks/
110-
notebook.ipynb
110+
notebook.ipynb
111+
112+
mlruns/

HISTORY.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,14 @@
22
title: History
33
---
44

5+
# 0.2.15 (2025-04-06)
6+
7+
- Added MLflow integration for experiment tracking
8+
- Added support for Python 3.13
9+
- Updated all dependencies to latest versions
10+
- Improved model logging and tracking capabilities
11+
- Added automatic model signature logging with MLflow
12+
513
# 0.2.11 (2022-02-06)
614

715
- Updated the default version to 3.9

README.md

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,20 @@
55
[![Documentation Status](https://readthedocs.org/projects/lazypredict/badge/?version=latest)](https://lazypredict.readthedocs.io/en/latest/?badge=latest)
66
[![Downloads](https://pepy.tech/badge/lazypredict)](https://pepy.tech/project/lazypredict)
77
[![CodeFactor](https://www.codefactor.io/repository/github/shankarpandala/lazypredict/badge)](https://www.codefactor.io/repository/github/shankarpandala/lazypredict)
8+
[![Citations](https://img.shields.io/badge/Citations-37-blue)](https://scholar.google.com/scholar?oi=bibs&hl=en&cites=4325808232671020176,16284230108871951652&as_sdt=5)
89

910
Lazy Predict helps build a lot of basic models without much code and helps understand which models work better without any parameter tuning.
1011

1112
- Free software: MIT license
1213
- Documentation: <https://lazypredict.readthedocs.io>
1314

15+
## Features
16+
- Over 40 built-in machine learning models
17+
- Automatic model selection for classification and regression
18+
- Built-in MLflow integration for experiment tracking
19+
- Support for Python 3.8 through 3.13
20+
- Custom metric evaluation support
21+
1422
## Installation
1523

1624
To install Lazy Predict:
@@ -148,4 +156,23 @@ print(models)
148156
| DecisionTreeRegressor | -0.470263 | -0.136112 | 83.4229 | 0.00749898 |
149157
| GaussianProcessRegressor | -0.769174 | -0.367089 | 91.5109 | 0.0770502 |
150158
| MLPRegressor | -1.86772 | -1.21597 | 116.508 | 0.235267 |
151-
| KernelRidge | -5.03822 | -3.6659 | 169.061 | 0.0243919 |
159+
| KernelRidge | -5.03822 | -3.6659 | 169.061 | 0.0243919 |
160+
161+
## MLflow Integration
162+
163+
Lazy Predict includes built-in MLflow integration. Enable it by setting the MLflow tracking URI:
164+
165+
```python
166+
import os
167+
os.environ['MLFLOW_TRACKING_URI'] = 'sqlite:///mlflow.db'
168+
169+
# MLflow tracking will be automatically enabled
170+
reg = LazyRegressor(verbose=0, ignore_warnings=True)
171+
models, predictions = reg.fit(X_train, X_test, y_train, y_test)
172+
```
173+
174+
Automatically tracks:
175+
- Model metrics (R-squared, RMSE, etc.)
176+
- Training time
177+
- Model parameters
178+
- Model artifacts

docs/examples.rst

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -99,17 +99,44 @@ Lazy Predict works seamlessly with pandas DataFrames:
9999
Using with MLflow
100100
---------------
101101

102-
Lazy Predict integrates with MLflow for experiment tracking:
102+
Lazy Predict has built-in MLflow integration for experiment tracking. You can enable it by setting the MLflow tracking URI:
103103

104104
.. code-block:: python
105105
106106
import os
107-
os.environ['MLFLOW_TRACKING_URI'] = 'sqlite:///mlflow.db'
107+
os.environ['MLFLOW_TRACKING_URI'] = 'sqlite:///mlflow.db' # Local SQLite tracking
108+
# Or for remote tracking:
109+
# os.environ['MLFLOW_TRACKING_URI'] = 'http://your-mlflow-server:5000'
108110
109111
# MLflow tracking will be automatically enabled
110112
reg = LazyRegressor(verbose=0, ignore_warnings=True)
111113
models, predictions = reg.fit(X_train, X_test, y_train, y_test)
112-
# All metrics will be logged to MLflow automatically
114+
115+
The following metrics and artifacts will be automatically logged to MLflow:
116+
117+
* Model metrics (R-squared, RMSE, etc.)
118+
* Training time
119+
* Model parameters
120+
* Model signatures
121+
* Custom metrics (if provided)
122+
* Model artifacts for each trained model
123+
124+
You can view the results in the MLflow UI:
125+
126+
.. code-block:: bash
127+
128+
mlflow ui
129+
130+
For Databricks users:
131+
~~~~~~~~~~~~~~~~~~
132+
133+
If you're using Databricks, MLflow tracking is automatically configured:
134+
135+
.. code-block:: python
136+
137+
# MLflow tracking will use Databricks tracking URI automatically
138+
reg = LazyRegressor(verbose=0, ignore_warnings=True)
139+
models, predictions = reg.fit(X_train, X_test, y_train, y_test)
113140
114141
Getting Model Objects
115142
------------------

docs/index.rst

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,12 +51,28 @@ Contents
5151

5252
Features
5353
--------
54+
* Over 40 built-in machine learning models
5455
* Automatic model selection for classification and regression
5556
* Support for both numerical and categorical features
5657
* Easy integration with scikit-learn pipelines
57-
* Model performance comparison
58+
* Model performance comparison and ranking
59+
* Built-in MLflow integration for experiment tracking
60+
* Support for Python 3.8 through 3.13
5861
* Minimal code required
59-
* MLflow integration for experiment tracking
62+
* Automatic model metrics logging
63+
* Custom metric evaluation support
64+
* Easy model access and reuse
65+
66+
Key Benefits
67+
-----------
68+
* Rapid model prototyping and selection
69+
* Automated experiment tracking with MLflow
70+
* Comprehensive model performance comparison
71+
* Zero-configuration model evaluation
72+
* Support for local and remote tracking
73+
* Integration with Databricks environment
74+
* Parallel model training capability
75+
* Extensible with custom metrics
6076

6177
Indices and Tables
6278
-----------------

lazypredict/Supervised.py

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,19 @@
55

66
import numpy as np
77
import pandas as pd
8+
import sys
89
from tqdm import tqdm
10+
try:
11+
from IPython import get_ipython
12+
if 'IPKernelApp' in get_ipython().config:
13+
# We're in a Jupyter notebook or similar environment
14+
from tqdm.notebook import tqdm as notebook_tqdm
15+
use_notebook_tqdm = True
16+
else:
17+
use_notebook_tqdm = False
18+
except:
19+
use_notebook_tqdm = False
20+
921
import datetime
1022
import time
1123
import os
@@ -311,7 +323,9 @@ def fit(self, X_train, X_test, y_train, y_test):
311323
print(exception)
312324
print("Invalid Classifier(s)")
313325

314-
for name, model in tqdm(self.classifiers):
326+
# Use notebook tqdm if in Jupyter environment
327+
progress_bar = notebook_tqdm if use_notebook_tqdm else tqdm
328+
for name, model in progress_bar(self.classifiers):
315329
start = time.time()
316330
try:
317331
# Start MLflow run for this specific model if MLflow is enabled
@@ -354,6 +368,15 @@ def fit(self, X_train, X_test, y_train, y_test):
354368
if roc_auc is not None:
355369
mlflow.log_metric("roc_auc", roc_auc)
356370
mlflow.log_metric("training_time", time.time() - start)
371+
372+
# Log the model with signature
373+
try:
374+
signature = mlflow.models.infer_signature(X_train, pipe.predict(X_train))
375+
mlflow.sklearn.log_model(pipe, f"{name}_model", signature=signature,
376+
registered_model_name=f"lazy_classifier_{name}")
377+
except Exception as e:
378+
if not self.ignore_warnings:
379+
print(f"Failed to log model {name} to MLflow: {str(e)}")
357380

358381
names.append(name)
359382
Accuracy.append(accuracy)
@@ -454,7 +477,7 @@ def provide_models(self, X_train, X_test, y_train, y_test):
454477
Training vectors, where rows is the number of samples
455478
and columns is the number of features.
456479
y_test : array-like,
457-
Testing vectors, where rows is the number of samples
480+
Training vectors, where rows is the number of samples
458481
and columns is the number of features.
459482
Returns
460483
-------
@@ -641,7 +664,9 @@ def fit(self, X_train, X_test, y_train, y_test):
641664
print(exception)
642665
print("Invalid Regressor(s)")
643666

644-
for name, model in tqdm(self.regressors):
667+
# Use notebook tqdm if in Jupyter environment
668+
progress_bar = notebook_tqdm if use_notebook_tqdm else tqdm
669+
for name, model in progress_bar(self.regressors):
645670
start = time.time()
646671
try:
647672
# Start MLflow run for this specific model if MLflow is enabled
@@ -679,6 +704,15 @@ def fit(self, X_train, X_test, y_train, y_test):
679704
mlflow.log_metric("rmse", rmse)
680705
mlflow.log_metric("training_time", time.time() - start)
681706

707+
# Log the model with signature
708+
try:
709+
signature = mlflow.models.infer_signature(X_train, pipe.predict(X_train))
710+
mlflow.sklearn.log_model(pipe, f"{name}_model", signature=signature,
711+
registered_model_name=f"lazy_regressor_{name}")
712+
except Exception as e:
713+
if not self.ignore_warnings:
714+
print(f"Failed to log model {name} to MLflow: {str(e)}")
715+
682716
names.append(name)
683717
R2.append(r_squared)
684718
ADJR2.append(adj_rsquared)
@@ -757,7 +791,7 @@ def provide_models(self, X_train, X_test, y_train, y_test):
757791
Training vectors, where rows is the number of samples
758792
and columns is the number of features.
759793
y_test : array-like,
760-
Testing vectors, where rows is the number of samples
794+
Training vectors, where rows is the number of samples
761795
and columns is the number of features.
762796
Returns
763797
-------

lazypredict/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@
44

55
__author__ = """Shankar Rao Pandala"""
66
__email__ = "shankar.pandala@live.com"
7-
__version__ = '2.15'
7+
__version__ = '0.2.16'

lazypredict/meta.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{% set name = "lazypredict" %}
2-
{% set version = "0.2.13" %}
2+
{% set version = "0.2.15" %}
33

44
package:
55
name: {{ name|lower }}

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,6 @@
5151
test_suite="tests",
5252
tests_require=test_requirements,
5353
url="https://github.com/shankarpandala/lazypredict",
54-
version='2.15',
54+
version='0.2.16',
5555
zip_safe=False,
5656
)

0 commit comments

Comments
 (0)