Skip to content

Commit 0fabbf7

Browse files
committed
fix: issue#915 error for large integers
1 parent ea29771 commit 0fabbf7

File tree

2 files changed

+33
-1
lines changed

2 files changed

+33
-1
lines changed

src/pandas_profiling/model/summary_algorithms.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ def histogram_compute(
3636
stats = {}
3737
bins = config.plot.histogram.bins
3838
bins_arg = "auto" if bins == 0 else min(bins, n_unique)
39+
bins_arg = np.histogram_bin_edges(finite_values, bins=bins_arg)
3940
stats[name] = np.histogram(finite_values, bins=bins_arg, weights=weights)
4041

4142
max_bins = config.plot.histogram.max_bins
@@ -49,7 +50,8 @@ def chi_square(
4950
values: Optional[np.ndarray] = None, histogram: Optional[np.ndarray] = None
5051
) -> dict:
5152
if histogram is None:
52-
histogram, _ = np.histogram(values, bins="auto")
53+
bins = bins = np.histogram_bin_edges(values, bins='auto')
54+
histogram, _ = np.histogram(values, bins=bins)
5355
return dict(chisquare(histogram)._asdict())
5456

5557

tests/issues/test_issue915.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
"""
2+
Test for issue 915:
3+
https://github.com/ydataai/pandas-profiling/issues/915
4+
5+
Error for series with large integers.
6+
"""
7+
import fnmatch
8+
import pandas as pd
9+
from pandas_profiling import ProfileReport
10+
11+
def test_issue915():
12+
df = pd.DataFrame({"col": pd.Series([716277643516076032 + i for i in range(100)])})
13+
df_profile = ProfileReport(df)
14+
15+
def test_with_value(n_extreme_obs):
16+
"""Generate HTML and validate the tabs contain the proper tab titles."""
17+
df_profile.config.n_extreme_obs = n_extreme_obs
18+
df_profile.invalidate_cache()
19+
20+
reg_min = f"*<a href=* aria-controls=* role=tab data-toggle=tab>Minimum {n_extreme_obs} values</a>*"
21+
reg_max = f"*<a href=* aria-controls=* role=tab data-toggle=tab>Maximum {n_extreme_obs} values</a>*"
22+
23+
profile_html = df_profile.to_html()
24+
25+
assert fnmatch.fnmatch(profile_html, reg_min)
26+
assert fnmatch.fnmatch(profile_html, reg_max)
27+
28+
test_with_value(5)
29+
test_with_value(100)
30+
test_with_value(120)

0 commit comments

Comments
 (0)