Skip to content
This repository was archived by the owner on Jan 12, 2024. It is now read-only.
This repository was archived by the owner on Jan 12, 2024. It is now read-only.

Error with Pandas >= 2.0 #90

@gschivley

Description

@gschivley

I installed catalystcoop.pudl-catalog via mamba as part of a larger environment.yml file and had 2 issues:

  1. sqlalchemy 2.0.15 was installed. I see that setup.py has been modified to limit it below 2.0.
  2. Pandas 2.0.2 was installed. This causes an issue with (at least) the pudl.generators_eia860 table.

To reproduce:

import intake

pudl_cat = intake.cat.pudl_cat

gens = pudl_cat.pudl.generators_eia860.read()

The error is:

---------------------------------------------------------------------------
IntCastingNaNError                        Traceback (most recent call last)
Cell In[6], line 1
----> 1 gens = pudl_cat.pudl.generators_eia860.read()
      2 gens.head()

File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/intake_sql/intake_sql.py:173](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/intake_sql/intake_sql.py:173), in SQLSourceAutoPartition.read(self)
    171 def read(self):
    172     self._get_schema()
--> 173     return self._dataframe.compute()

File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/base.py:310](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/base.py:310), in DaskMethodsMixin.compute(self, **kwargs)
    286 def compute(self, **kwargs):
    287     """Compute this dask collection
    288 
    289     This turns a lazy Dask collection into its in-memory equivalent.
   (...)
    308     dask.compute
    309     """
--> 310     (result,) = compute(self, traverse=False, **kwargs)
    311     return result

File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/base.py:595](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/base.py:595), in compute(traverse, optimize_graph, scheduler, get, *args, **kwargs)
    592     keys.append(x.__dask_keys__())
    593     postcomputes.append(x.__dask_postcompute__())
--> 595 results = schedule(dsk, keys, **kwargs)
    596 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])

File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/threaded.py:89](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/threaded.py:89), in get(dsk, keys, cache, num_workers, pool, **kwargs)
     86     elif isinstance(pool, multiprocessing.pool.Pool):
     87         pool = MultiprocessingPoolExecutor(pool)
---> 89 results = get_async(
     90     pool.submit,
     91     pool._max_workers,
     92     dsk,
     93     keys,
     94     cache=cache,
     95     get_id=_thread_get_id,
     96     pack_exception=pack_exception,
     97     **kwargs,
     98 )
    100 # Cleanup pools associated to dead threads
    101 with pools_lock:

File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/local.py:511](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/local.py:511), in get_async(submit, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, chunksize, **kwargs)
    509         _execute_task(task, data)  # Re-execute locally
    510     else:
--> 511         raise_exception(exc, tb)
    512 res, worker_id = loads(res_info)
    513 state["cache"][key] = res

File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/local.py:319](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/local.py:319), in reraise(exc, tb)
    317 if exc.__traceback__ is not tb:
    318     raise exc.with_traceback(tb)
--> 319 raise exc

File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/local.py:224](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/local.py:224), in execute_task(key, task_info, dumps, loads, get_id, pack_exception)
    222 try:
    223     task, data = loads(task_info)
--> 224     result = _execute_task(task, data)
    225     id = get_id()
    226     result = dumps((result, id))

File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/core.py:121](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/core.py:121), in _execute_task(arg, cache, dsk)
    117     func, args = arg[0], arg[1:]
    118     # Note: Don't assign the subtask results to a variable. numpy detects
    119     # temporaries by their reference count and can execute certain
    120     # operations in-place.
--> 121     return func(*(_execute_task(a, cache) for a in args))
    122 elif not ishashable(arg):
    123     return arg

File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/core.py:121](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/core.py:121), in (.0)
    117     func, args = arg[0], arg[1:]
    118     # Note: Don't assign the subtask results to a variable. numpy detects
    119     # temporaries by their reference count and can execute certain
    120     # operations in-place.
--> 121     return func(*(_execute_task(a, cache) for a in args))
    122 elif not ishashable(arg):
    123     return arg

File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/core.py:121](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/core.py:121), in _execute_task(arg, cache, dsk)
    117     func, args = arg[0], arg[1:]
    118     # Note: Don't assign the subtask results to a variable. numpy detects
    119     # temporaries by their reference count and can execute certain
    120     # operations in-place.
--> 121     return func(*(_execute_task(a, cache) for a in args))
    122 elif not ishashable(arg):
    123     return arg

File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/utils.py:73](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/utils.py:73), in apply(func, args, kwargs)
     42 """Apply a function given its positional and keyword arguments.
     43 
     44 Equivalent to ``func(*args, **kwargs)``
   (...)
     70 >>> dsk = {'task-name': task}  # adds the task to a low level Dask task graph
     71 """
     72 if kwargs:
---> 73     return func(*args, **kwargs)
     74 else:
     75     return func(*args)

File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/dataframe/io/sql.py:412](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/dask/dataframe/io/sql.py:412), in _read_sql_chunk(q, uri, meta, engine_kwargs, **kwargs)
    410     return df
    411 else:
--> 412     return df.astype(meta.dtypes.to_dict(), copy=False)

File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/pandas/core/generic.py:6305](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/pandas/core/generic.py:6305), in NDFrame.astype(self, dtype, copy, errors)
   6303 else:
   6304     try:
-> 6305         res_col = col.astype(dtype=cdt, copy=copy, errors=errors)
   6306     except ValueError as ex:
   6307         ex.args = (
   6308             f"{ex}: Error while type casting for column '{col_name}'",
   6309         )

File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/pandas/core/generic.py:6324](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/pandas/core/generic.py:6324), in NDFrame.astype(self, dtype, copy, errors)
   6317     results = [
   6318         self.iloc[:, i].astype(dtype, copy=copy)
   6319         for i in range(len(self.columns))
   6320     ]
   6322 else:
   6323     # else, only a single dtype is given
-> 6324     new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors)
   6325     return self._constructor(new_data).__finalize__(self, method="astype")
   6327 # GH 33113: handle empty frame or series

File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/pandas/core/internals/managers.py:451](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/pandas/core/internals/managers.py:451), in BaseBlockManager.astype(self, dtype, copy, errors)
    448 elif using_copy_on_write():
    449     copy = False
--> 451 return self.apply(
    452     "astype",
    453     dtype=dtype,
    454     copy=copy,
    455     errors=errors,
    456     using_cow=using_copy_on_write(),
    457 )

File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/pandas/core/internals/managers.py:352](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/pandas/core/internals/managers.py:352), in BaseBlockManager.apply(self, f, align_keys, **kwargs)
    350         applied = b.apply(f, **kwargs)
    351     else:
--> 352         applied = getattr(b, f)(**kwargs)
    353     result_blocks = extend_blocks(applied, result_blocks)
    355 out = type(self).from_blocks(result_blocks, self.axes)

File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/pandas/core/internals/blocks.py:511](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/pandas/core/internals/blocks.py:511), in Block.astype(self, dtype, copy, errors, using_cow)
    491 """
    492 Coerce to the new dtype.
    493 
   (...)
    507 Block
    508 """
    509 values = self.values
--> 511 new_values = astype_array_safe(values, dtype, copy=copy, errors=errors)
    513 new_values = maybe_coerce_values(new_values)
    515 refs = None

File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:242](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:242), in astype_array_safe(values, dtype, copy, errors)
    239     dtype = dtype.numpy_dtype
    241 try:
--> 242     new_values = astype_array(values, dtype, copy=copy)
    243 except (ValueError, TypeError):
    244     # e.g. _astype_nansafe can fail on object-dtype of strings
    245     #  trying to convert to float
    246     if errors == "ignore":

File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:187](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:187), in astype_array(values, dtype, copy)
    184     values = values.astype(dtype, copy=copy)
    186 else:
--> 187     values = _astype_nansafe(values, dtype, copy=copy)
    189 # in pandas we don't store numpy str dtypes, so convert to object
    190 if isinstance(dtype, np.dtype) and issubclass(values.dtype.type, str):

File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:105](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:105), in _astype_nansafe(arr, dtype, copy, skipna)
    100     return lib.ensure_string_array(
    101         arr, skipna=skipna, convert_na_value=False
    102     ).reshape(shape)
    104 elif np.issubdtype(arr.dtype, np.floating) and is_integer_dtype(dtype):
--> 105     return _astype_float_to_int_nansafe(arr, dtype, copy)
    107 elif is_object_dtype(arr.dtype):
    108     # if we have a datetime[/timedelta](https://file+.vscode-resource.vscode-cdn.net/timedelta) array of objects
    109     # then coerce to datetime64[ns] and use DatetimeArray.astype
    111     if is_datetime64_dtype(dtype):

File [/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:150](https://file+.vscode-resource.vscode-cdn.net/opt/miniconda3/envs/powergenome_catalog/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:150), in _astype_float_to_int_nansafe(values, dtype, copy)
    146 """
    147 astype with a check preventing converting NaN to an meaningless integer value.
    148 """
    149 if not np.isfinite(values).all():
--> 150     raise IntCastingNaNError(
    151         "Cannot convert non-finite values (NA or inf) to integer"
    152     )
    153 if dtype.kind == "u":
    154     # GH#45151
    155     if not (values >= 0).all():

IntCastingNaNError: Cannot convert non-finite values (NA or inf) to integer: Error while type casting for column 'utility_id_eia'

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions