Open
Description
I was unable to reproduce exactly the same problem without Modin.
KeyError reproduced when called query_compiler.to_pandas
from worker process (during pickling) with changes from modin-project/modin#6673
import modin.pandas as pd
if __name__ == "__main__":
abbreviations = pd.Series(['Major League Baseball', 'National Basketball Association'], index=['MLB', 'NBA'])
teams = pd.DataFrame({'name': ['Mariners', 'Lakers'] * 500, 'league_abbreviation': ['MLB', 'NBA'] * 500})
print(teams.set_index('name').league_abbreviation.apply(lambda abbr: abbreviations.loc[abbr]).rename('league'))
Traceback:
Traceback (most recent call last):
File "...projects\modin\test_6594.py", line 8, in <module>
print(teams.set_index('name').league_abbreviation.apply(lambda abbr: abbreviations.loc[abbr]).rename('league'))
File "...projects\modin\modin\logging\logger_decorator.py", line 129, in run_and_log
return obj(*args, **kwargs)
File "...projects\modin\modin\pandas\base.py", line 4074, in __str__
return repr(self)
File "...projects\modin\modin\logging\logger_decorator.py", line 129, in run_and_log
return obj(*args, **kwargs)
File "...projects\modin\modin\pandas\series.py", line 393, in __repr__
temp_df = self._build_repr_df(num_rows, num_cols)
File "...projects\modin\modin\logging\logger_decorator.py", line 129, in run_and_log
return obj(*args, **kwargs)
File "...projects\modin\modin\pandas\base.py", line 261, in _build_repr_df
return self.iloc[indexer]._query_compiler.to_pandas()
File "...projects\modin\modin\logging\logger_decorator.py", line 129, in run_and_log
return obj(*args, **kwargs)
File "...projects\modin\modin\core\storage_formats\pandas\query_compiler.py", line 282, in to_pandas
return self._modin_frame.to_pandas()
File "...projects\modin\modin\logging\logger_decorator.py", line 129, in run_and_log
return obj(*args, **kwargs)
File "...projects\modin\modin\core\dataframe\pandas\dataframe\utils.py", line 501, in run_f_on_minimally_updated_metadata
result = f(self, *args, **kwargs)
File "...projects\modin\modin\core\dataframe\pandas\dataframe\dataframe.py", line 4029, in to_pandas
df = self._partition_mgr_cls.to_pandas(self._partitions)
File "...projects\modin\modin\logging\logger_decorator.py", line 129, in run_and_log
return obj(*args, **kwargs)
File "...projects\modin\modin\core\dataframe\pandas\partitioning\partition_manager.py", line 702, in to_pandas
retrieved_objects = cls.get_objects_from_partitions(partitions.flatten())
File "...projects\modin\modin\logging\logger_decorator.py", line 129, in run_and_log
return obj(*args, **kwargs)
File "...projects\modin\modin\core\dataframe\pandas\partitioning\partition_manager.py", line 933, in get_objects_from_partitions
return cls._execution_wrapper.materialize(
File "...projects\modin\modin\core\execution\unidist\common\engine_wrapper.py", line 92, in materialize
return unidist.get(obj_id)
File "...\Miniconda3\envs\modin\lib\site-packages\unidist\api.py", line 160, in get
return execution_backend.get(object_refs)
File "...\Miniconda3\envs\modin\lib\site-packages\unidist\core\base\backend.py", line 286, in get
return self._backend_cls.get(object_refs)
File "...\Miniconda3\envs\modin\lib\site-packages\unidist\core\backends\mpi\backend.py", line 79, in get
return mpi.get(data_ids)
File "...\Miniconda3\envs\modin\lib\site-packages\unidist\core\backends\mpi\core\controller\api.py", line 380, in get
values = [get_impl(data_id) for data_id in data_ids]
File "...\Miniconda3\envs\modin\lib\site-packages\unidist\core\backends\mpi\core\controller\api.py", line 380, in <listcomp>
values = [get_impl(data_id) for data_id in data_ids]
File "...\Miniconda3\envs\modin\lib\site-packages\unidist\core\backends\mpi\core\controller\api.py", line 370, in get_impl
raise value
ValueError: Unknown DataID!