-
Notifications
You must be signed in to change notification settings - Fork 57
Description
snippet used
# Setting what variables are time series
type_schema = {
"NO2 Mean": "timeseries",
"NO2 1st Max Value": "timeseries",
"NO2 1st Max Hour": "timeseries",
"NO2 AQI": "timeseries",
"cos": "numeric",
"cat": "numeric",
}
profile = ProfileReport(
giftcard_activity,
tsmode=True,
type_schema=type_schema,
sortby="********",
title="Time-Series EDA",
)
profile.to_file("*******.html")
Summarize dataset: 0%| | 0/5 [00:00<?, ?it/s]/usr/local/lib/python3.11/site-packages/ydata_profiling/model/pandas/describe_date_pandas.py:44: UserWarning: Discarding nonzero nanoseconds in conversion.
"min": pd.Timestamp.to_pydatetime(series.min()),
Summarize dataset: 0%| | 0/66 [00:00<?, ?it/s]
AttributeError Traceback (most recent call last)
Cell In[18], line 19
2 type_schema = {
3 "NO2 Mean": "timeseries",
4 "NO2 1st Max Value": "timeseries",
(...)
8 "cat": "numeric",
9 }
11 profile = ProfileReport(
12 giftcard_activity,
13 tsmode=True,
(...)
16 title="Time-Series EDA",
17 )
---> 19 profile.to_file("giftcard_activity_timeseries.html")
File /usr/local/lib/python3.11/site-packages/ydata_profiling/profile_report.py:379, in ProfileReport.to_file(self, output_file, silent)
376 self.config.html.assets_prefix = str(output_file.stem) + "_assets"
377 create_html_assets(self.config, output_file)
--> 379 data = self.to_html()
381 if output_file.suffix != ".html":
382 suffix = output_file.suffix
File /usr/local/lib/python3.11/site-packages/ydata_profiling/profile_report.py:496, in ProfileReport.to_html(self)
488 def to_html(self) -> str:
489 """Generate and return complete template as lengthy string
490 for using with frameworks.
491
(...)
494
495 """
--> 496 return self.html
File /usr/local/lib/python3.11/site-packages/ydata_profiling/profile_report.py:292, in ProfileReport.html(self)
289 @Property
290 def html(self) -> str:
291 if self._html is None:
--> 292 self._html = self._render_html()
293 return self._html
File /usr/local/lib/python3.11/site-packages/ydata_profiling/profile_report.py:409, in ProfileReport._render_html(self)
406 def _render_html(self) -> str:
407 from ydata_profiling.report.presentation.flavours import HTMLReport
--> 409 report = self.report
411 with tqdm(
412 total=1, desc="Render HTML", disable=not self.config.progress_bar
413 ) as pbar:
414 html = HTMLReport(copy.deepcopy(report)).render(
415 nav=self.config.html.navbar_show,
416 offline=self.config.html.use_local_assets,
(...)
424 version=self.description_set.package["ydata_profiling_version"],
425 )
File /usr/local/lib/python3.11/site-packages/ydata_profiling/profile_report.py:286, in ProfileReport.report(self)
283 @Property
284 def report(self) -> Root:
285 if self._report is None:
--> 286 self._report = get_report_structure(self.config, self.description_set)
287 return self._report
File /usr/local/lib/python3.11/site-packages/ydata_profiling/profile_report.py:268, in ProfileReport.description_set(self)
265 @Property
266 def description_set(self) -> BaseDescription:
267 if self._description_set is None:
--> 268 self._description_set = describe_df(
269 self.config,
270 self.df,
271 self.summarizer,
272 self.typeset,
273 self._sample,
274 )
275 return self._description_set
File /usr/local/lib/python3.11/site-packages/ydata_profiling/model/describe.py:74, in describe(config, df, summarizer, typeset, sample)
72 # Variable-specific
73 pbar.total += len(df.columns)
---> 74 series_description = get_series_descriptions(
75 config, df, summarizer, typeset, pbar
76 )
78 pbar.set_postfix_str("Get variable types")
79 pbar.total += 1
File /usr/local/lib/python3.11/site-packages/multimethod/init.py:369, in multimethod.call(self, *args, **kwargs)
367 func = self.dispatch(*args)
368 try:
--> 369 return func(*args, **kwargs)
370 except TypeError as ex:
371 raise DispatchError(f"Function {func.code}") from ex
File /usr/local/lib/python3.11/site-packages/ydata_profiling/model/pandas/summary_pandas.py:99, in pandas_get_series_descriptions(config, df, summarizer, typeset, pbar)
96 else:
97 # TODO: use Pool for Linux-based systems
98 with multiprocessing.pool.ThreadPool(pool_size) as executor:
---> 99 for i, (column, description) in enumerate(
100 executor.imap_unordered(multiprocess_1d, args)
101 ):
102 pbar.set_postfix_str(f"Describe variable:{column}")
103 series_description[column] = description
File /usr/local/lib/python3.11/multiprocessing/pool.py:873, in IMapIterator.next(self, timeout)
871 if success:
872 return value
--> 873 raise value
File /usr/local/lib/python3.11/multiprocessing/pool.py:125, in worker(inqueue, outqueue, initializer, initargs, maxtasks, wrap_exception)
123 job, i, func, args, kwds = task
124 try:
--> 125 result = (True, func(*args, **kwds))
126 except Exception as e:
127 if wrap_exception and func is not _helper_reraises_exception:
File /usr/local/lib/python3.11/site-packages/ydata_profiling/model/pandas/summary_pandas.py:79, in pandas_get_series_descriptions..multiprocess_1d(args)
69 """Wrapper to process series in parallel.
70
71 Args:
(...)
76 A tuple with column and the series description.
77 """
78 column, series = args
---> 79 return column, describe_1d(config, series, summarizer, typeset)
File /usr/local/lib/python3.11/site-packages/multimethod/init.py:369, in multimethod.call(self, *args, **kwargs)
367 func = self.dispatch(*args)
368 try:
--> 369 return func(*args, **kwargs)
370 except TypeError as ex:
371 raise DispatchError(f"Function {func.code}") from ex
File /usr/local/lib/python3.11/site-packages/ydata_profiling/model/pandas/summary_pandas.py:57, in pandas_describe_1d(config, series, summarizer, typeset)
54 vtype = typeset.detect_type(series)
56 typeset.type_schema[series.name] = vtype
---> 57 return summarizer.summarize(config, series, dtype=vtype)
File /usr/local/lib/python3.11/site-packages/ydata_profiling/model/summarizer.py:42, in BaseSummarizer.summarize(self, config, series, dtype)
34 def summarize(
35 self, config: Settings, series: pd.Series, dtype: Type[VisionsBaseType]
36 ) -> dict:
37 """
38
39 Returns:
40 object:
41 """
---> 42 _, _, summary = self.handle(str(dtype), config, series, {"type": str(dtype)})
43 return summary
File /usr/local/lib/python3.11/site-packages/ydata_profiling/model/handler.py:62, in Handler.handle(self, dtype, *args, **kwargs)
60 funcs = self.mapping.get(dtype, [])
61 op = compose(funcs)
---> 62 return op(*args)
File /usr/local/lib/python3.11/site-packages/ydata_profiling/model/handler.py:21, in compose..func..func2(*x)
19 return f(*x)
20 else:
---> 21 return f(*res)
File /usr/local/lib/python3.11/site-packages/ydata_profiling/model/handler.py:21, in compose..func..func2(*x)
19 return f(*x)
20 else:
---> 21 return f(*res)
File /usr/local/lib/python3.11/site-packages/ydata_profiling/model/handler.py:21, in compose..func..func2(*x)
19 return f(*x)
20 else:
---> 21 return f(*res)
File /usr/local/lib/python3.11/site-packages/ydata_profiling/model/handler.py:17, in compose..func..func2(*x)
16 def func2(*x) -> Any:
---> 17 res = g(*x)
18 if type(res) == bool:
19 return f(*x)
File /usr/local/lib/python3.11/site-packages/multimethod/init.py:369, in multimethod.call(self, *args, **kwargs)
367 func = self.dispatch(*args)
368 try:
--> 369 return func(*args, **kwargs)
370 except TypeError as ex:
371 raise DispatchError(f"Function {func.code}") from ex
File /usr/local/lib/python3.11/site-packages/ydata_profiling/model/summary_algorithms.py:73, in series_hashable..inner(config, series, summary)
71 if not summary["hashable"]:
72 return config, series, summary
---> 73 return fn(config, series, summary)
File /usr/local/lib/python3.11/site-packages/ydata_profiling/model/summary_algorithms.py:90, in series_handle_nulls..inner(config, series, summary)
87 if series.hasnans:
88 series = series.dropna()
---> 90 return fn(config, series, summary)
File /usr/local/lib/python3.11/site-packages/ydata_profiling/model/pandas/describe_date_pandas.py:44, in pandas_describe_date_1d(config, series, summary)
34 summary.update(
35 {
36 "min": pd.NaT,
(...)
39 }
40 )
41 else:
42 summary.update(
43 {
---> 44 "min": pd.Timestamp.to_pydatetime(series.min()),
45 "max": pd.Timestamp.to_pydatetime(series.max()),
46 }
47 )
49 summary["range"] = summary["max"] - summary["min"]
51 values = series.values.astype(np.int64) // 10**9
File timestamps.pyx:1150, in pandas._libs.tslibs.timestamps._Timestamp.to_pydatetime()
File timestamps.pyx:1172, in pandas._libs.tslibs.timestamps._Timestamp.to_pydatetime()
AttributeError: 'datetime.date' object has no attribute 'hour'