This repository was archived by the owner on Jan 12, 2024. It is now read-only.
-
-
Notifications
You must be signed in to change notification settings - Fork 2
This repository was archived by the owner on Jan 12, 2024. It is now read-only.
Error with Pandas >= 2.0 #90
Copy link
Copy link
Open
Description
I installed catalystcoop.pudl-catalog
via mamba as part of a larger environment.yml file and had 2 issues:
- sqlalchemy 2.0.15 was installed. I see that setup.py has been modified to limit it below 2.0.
- Pandas 2.0.2 was installed. This causes an issue with (at least) the
pudl.generators_eia860
table.
To reproduce:
import intake
pudl_cat = intake.cat.pudl_cat
gens = pudl_cat.pudl.generators_eia860.read()
The error is:
---------------------------------------------------------------------------
IntCastingNaNError Traceback (most recent call last)
Cell In[6], line 1
----> 1 gens = pudl_cat.pudl.generators_eia860.read()
2 gens.head()
File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/intake_sql/intake_sql.py:173](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/intake_sql/intake_sql.py:173), in SQLSourceAutoPartition.read(self)
171 def read(self):
172 self._get_schema()
--> 173 return self._dataframe.compute()
File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/base.py:310](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/base.py:310), in DaskMethodsMixin.compute(self, **kwargs)
286 def compute(self, **kwargs):
287 """Compute this dask collection
288
289 This turns a lazy Dask collection into its in-memory equivalent.
(...)
308 dask.compute
309 """
--> 310 (result,) = compute(self, traverse=False, **kwargs)
311 return result
File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/base.py:595](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/base.py:595), in compute(traverse, optimize_graph, scheduler, get, *args, **kwargs)
592 keys.append(x.__dask_keys__())
593 postcomputes.append(x.__dask_postcompute__())
--> 595 results = schedule(dsk, keys, **kwargs)
596 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/threaded.py:89](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/threaded.py:89), in get(dsk, keys, cache, num_workers, pool, **kwargs)
86 elif isinstance(pool, multiprocessing.pool.Pool):
87 pool = MultiprocessingPoolExecutor(pool)
---> 89 results = get_async(
90 pool.submit,
91 pool._max_workers,
92 dsk,
93 keys,
94 cache=cache,
95 get_id=_thread_get_id,
96 pack_exception=pack_exception,
97 **kwargs,
98 )
100 # Cleanup pools associated to dead threads
101 with pools_lock:
File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/local.py:511](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/local.py:511), in get_async(submit, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, chunksize, **kwargs)
509 _execute_task(task, data) # Re-execute locally
510 else:
--> 511 raise_exception(exc, tb)
512 res, worker_id = loads(res_info)
513 state["cache"][key] = res
File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/local.py:319](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/local.py:319), in reraise(exc, tb)
317 if exc.__traceback__ is not tb:
318 raise exc.with_traceback(tb)
--> 319 raise exc
File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/local.py:224](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/local.py:224), in execute_task(key, task_info, dumps, loads, get_id, pack_exception)
222 try:
223 task, data = loads(task_info)
--> 224 result = _execute_task(task, data)
225 id = get_id()
226 result = dumps((result, id))
File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/core.py:121](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/core.py:121), in _execute_task(arg, cache, dsk)
117 func, args = arg[0], arg[1:]
118 # Note: Don't assign the subtask results to a variable. numpy detects
119 # temporaries by their reference count and can execute certain
120 # operations in-place.
--> 121 return func(*(_execute_task(a, cache) for a in args))
122 elif not ishashable(arg):
123 return arg
File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/core.py:121](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/core.py:121), in (.0)
117 func, args = arg[0], arg[1:]
118 # Note: Don't assign the subtask results to a variable. numpy detects
119 # temporaries by their reference count and can execute certain
120 # operations in-place.
--> 121 return func(*(_execute_task(a, cache) for a in args))
122 elif not ishashable(arg):
123 return arg
File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/core.py:121](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/core.py:121), in _execute_task(arg, cache, dsk)
117 func, args = arg[0], arg[1:]
118 # Note: Don't assign the subtask results to a variable. numpy detects
119 # temporaries by their reference count and can execute certain
120 # operations in-place.
--> 121 return func(*(_execute_task(a, cache) for a in args))
122 elif not ishashable(arg):
123 return arg
File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/utils.py:73](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/utils.py:73), in apply(func, args, kwargs)
42 """Apply a function given its positional and keyword arguments.
43
44 Equivalent to ``func(*args, **kwargs)``
(...)
70 >>> dsk = {'task-name': task} # adds the task to a low level Dask task graph
71 """
72 if kwargs:
---> 73 return func(*args, **kwargs)
74 else:
75 return func(*args)
File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/dataframe/io/sql.py:412](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/dataframe/io/sql.py:412), in _read_sql_chunk(q, uri, meta, engine_kwargs, **kwargs)
410 return df
411 else:
--> 412 return df.astype(meta.dtypes.to_dict(), copy=False)
File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/pandas/core/generic.py:6305](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/pandas/core/generic.py:6305), in NDFrame.astype(self, dtype, copy, errors)
6303 else:
6304 try:
-> 6305 res_col = col.astype(dtype=cdt, copy=copy, errors=errors)
6306 except ValueError as ex:
6307 ex.args = (
6308 f"{ex}: Error while type casting for column '{col_name}'",
6309 )
File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/pandas/core/generic.py:6324](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/pandas/core/generic.py:6324), in NDFrame.astype(self, dtype, copy, errors)
6317 results = [
6318 self.iloc[:, i].astype(dtype, copy=copy)
6319 for i in range(len(self.columns))
6320 ]
6322 else:
6323 # else, only a single dtype is given
-> 6324 new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors)
6325 return self._constructor(new_data).__finalize__(self, method="astype")
6327 # GH 33113: handle empty frame or series
File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/pandas/core/internals/managers.py:451](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/pandas/core/internals/managers.py:451), in BaseBlockManager.astype(self, dtype, copy, errors)
448 elif using_copy_on_write():
449 copy = False
--> 451 return self.apply(
452 "astype",
453 dtype=dtype,
454 copy=copy,
455 errors=errors,
456 using_cow=using_copy_on_write(),
457 )
File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/pandas/core/internals/managers.py:352](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/pandas/core/internals/managers.py:352), in BaseBlockManager.apply(self, f, align_keys, **kwargs)
350 applied = b.apply(f, **kwargs)
351 else:
--> 352 applied = getattr(b, f)(**kwargs)
353 result_blocks = extend_blocks(applied, result_blocks)
355 out = type(self).from_blocks(result_blocks, self.axes)
File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/pandas/core/internals/blocks.py:511](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/pandas/core/internals/blocks.py:511), in Block.astype(self, dtype, copy, errors, using_cow)
491 """
492 Coerce to the new dtype.
493
(...)
507 Block
508 """
509 values = self.values
--> 511 new_values = astype_array_safe(values, dtype, copy=copy, errors=errors)
513 new_values = maybe_coerce_values(new_values)
515 refs = None
File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:242](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:242), in astype_array_safe(values, dtype, copy, errors)
239 dtype = dtype.numpy_dtype
241 try:
--> 242 new_values = astype_array(values, dtype, copy=copy)
243 except (ValueError, TypeError):
244 # e.g. _astype_nansafe can fail on object-dtype of strings
245 # trying to convert to float
246 if errors == "ignore":
File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:187](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:187), in astype_array(values, dtype, copy)
184 values = values.astype(dtype, copy=copy)
186 else:
--> 187 values = _astype_nansafe(values, dtype, copy=copy)
189 # in pandas we don't store numpy str dtypes, so convert to object
190 if isinstance(dtype, np.dtype) and issubclass(values.dtype.type, str):
File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:105](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:105), in _astype_nansafe(arr, dtype, copy, skipna)
100 return lib.ensure_string_array(
101 arr, skipna=skipna, convert_na_value=False
102 ).reshape(shape)
104 elif np.issubdtype(arr.dtype, np.floating) and is_integer_dtype(dtype):
--> 105 return _astype_float_to_int_nansafe(arr, dtype, copy)
107 elif is_object_dtype(arr.dtype):
108 # if we have a datetime[/timedelta](https://file+.vscode-resource.vscode-cdn.net/timedelta) array of objects
109 # then coerce to datetime64[ns] and use DatetimeArray.astype
111 if is_datetime64_dtype(dtype):
File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:150](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:150), in _astype_float_to_int_nansafe(values, dtype, copy)
146 """
147 astype with a check preventing converting NaN to an meaningless integer value.
148 """
149 if not np.isfinite(values).all():
--> 150 raise IntCastingNaNError(
151 "Cannot convert non-finite values (NA or inf) to integer"
152 )
153 if dtype.kind == "u":
154 # GH#45151
155 if not (values >= 0).all():
IntCastingNaNError: Cannot convert non-finite values (NA or inf) to integer: Error while type casting for column 'utility_id_eia'
Metadata
Metadata
Assignees
Labels
No labels