Getting ZeroDivisionError with Spark #1502
Unanswered
sujithpallath
asked this question in
Q&A
Replies: 0 comments
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.
-
ydata-profiling with spark is giving ZeroDivisionError: division by zero, even though the dataframe has records.
Here is the example https://github.com/ydataai/ydata-profiling/tree/develop/examples/integrations/databricks
ZeroDivisionError Traceback (most recent call last)
in <cell line: 2>()
1 #Export the report as html and display
----> 2 report_html = report.to_html()
3 displayHTML(report_html)
python3.9/site-packages/ydata_profiling/profile_report.py in to_html(self)
468
469 """
--> 470 return self.html
471
472 def to_json(self) -> str:
python3.9/site-packages/ydata_profiling/profile_report.py in html(self)
275 def html(self) -> str:
276 if self._html is None:
--> 277 self._html = self._render_html()
278 return self._html
279
python3.9/site-packages/ydata_profiling/profile_report.py in _render_html(self)
383 from ydata_profiling.report.presentation.flavours import HTMLReport
384
--> 385 report = self.report
386
387 with tqdm(
python3.9/site-packages/ydata_profiling/profile_report.py in report(self)
269 def report(self) -> Root:
270 if self._report is None:
--> 271 self._report = get_report_structure(self.config, self.description_set)
272 return self._report
273
python3.9/site-packages/ydata_profiling/profile_report.py in description_set(self)
251 def description_set(self) -> BaseDescription:
252 if self._description_set is None:
--> 253 self._description_set = describe_df(
254 self.config,
255 self.df,
python3.9/site-packages/ydata_profiling/model/describe.py in describe(config, df, summarizer, typeset, sample)
72 # Variable-specific
73 pbar.total += len(df.columns)
---> 74 series_description = get_series_descriptions(
75 config, df, summarizer, typeset, pbar
76 )
python3.9/site-packages/multimethod/init.py in call(self, *args, **kwargs)
326 func = self[tuple(func(arg) for func, arg in zip(self.type_checkers, args))]
327 try:
--> 328 return func(*args, **kwargs)
329 except TypeError as ex:
330 raise DispatchError(f"Function {func.code}") from ex
python3.9/site-packages/ydata_profiling/model/spark/summary_spark.py in spark_get_series_descriptions(config, df, summarizer, typeset, pbar)
90 args = [(name, df) for name in df.columns]
91 with multiprocessing.pool.ThreadPool(12) as executor:
---> 92 for i, (column, description) in enumerate(
93 executor.imap_unordered(multiprocess_1d, args)
94 ):
python3.9/multiprocessing/pool.py in next(self, timeout)
868 if success:
869 return value
--> 870 raise value
871
872 next = next # XXX
python3.9/multiprocessing/pool.py in worker(inqueue, outqueue, initializer, initargs, maxtasks, wrap_exception)
123 job, i, func, args, kwds = task
124 try:
--> 125 result = (True, func(*args, **kwds))
126 except Exception as e:
127 if wrap_exception and func is not _helper_reraises_exception:
python3.9/site-packages/ydata_profiling/model/spark/summary_spark.py in multiprocess_1d(args)
86 """
87 column, df = args
---> 88 return column, describe_1d(config, df.select(column), summarizer, typeset)
89
90 args = [(name, df) for name in df.columns]
python3.9/site-packages/multimethod/init.py in call(self, *args, **kwargs)
326 func = self[tuple(func(arg) for func, arg in zip(self.type_checkers, args))]
327 try:
--> 328 return func(*args, **kwargs)
329 except TypeError as ex:
330 raise DispatchError(f"Function {func.code}") from ex
python3.9/site-packages/ydata_profiling/model/spark/summary_spark.py in spark_describe_1d(config, series, summarizer, typeset)
62 }[dtype]
63
---> 64 return summarizer.summarize(config, series, dtype=vtype)
65
66
python3.9/site-packages/ydata_profiling/model/summarizer.py in summarize(self, config, series, dtype)
40 object:
41 """
---> 42 _, _, summary = self.handle(str(dtype), config, series, {"type": str(dtype)})
43 return summary
44
python3.9/site-packages/ydata_profiling/model/handler.py in handle(self, dtype, *args, **kwargs)
60 funcs = self.mapping.get(dtype, [])
61 op = compose(funcs)
---> 62 return op(*args)
63
64
python3.9/site-packages/ydata_profiling/model/handler.py in func2(*x)
19 return f(*x)
20 else:
---> 21 return f(*res)
22
23 return func2
python3.9/site-packages/ydata_profiling/model/handler.py in func2(*x)
19 return f(*x)
20 else:
---> 21 return f(*res)
22
23 return func2
python3.9/site-packages/ydata_profiling/model/handler.py in func2(*x)
15 def func(f: Callable, g: Callable) -> Callable:
16 def func2(*x) -> Any:
---> 17 res = g(*x)
18 if type(res) == bool:
19 return f(*x)
python3.9/site-packages/multimethod/init.py in call(self, *args, **kwargs)
326 func = self[tuple(func(arg) for func, arg in zip(self.type_checkers, args))]
327 try:
--> 328 return func(*args, **kwargs)
329 except TypeError as ex:
330 raise DispatchError(f"Function {func.code}") from ex
python3.9/site-packages/ydata_profiling/model/spark/describe_supported_spark.py in describe_supported_spark(config, series, summary)
29 summary["is_unique"] = n_unique == count
30 summary["n_unique"] = n_unique
---> 31 summary["p_unique"] = n_unique / count
32
33 return config, series, summary
ZeroDivisionError: division by zero
Beta Was this translation helpful? Give feedback.
All reactions