@@ -142,8 +142,8 @@ def execute(cls, ctx, op):
142
142
dtypes .append (dtype )
143
143
dtypes = pd .Series (dtypes , index = columns )
144
144
chunk_index = (
145
- chunk_meta [ "partition_index_row_" ] ,
146
- chunk_meta [ "partition_index_column_" ] ,
145
+ chunk_meta . get ( "partition_index_row_" , - 1 ) ,
146
+ chunk_meta . get ( "partition_index_column_" , - 1 ) ,
147
147
)
148
148
# chunk: (chunk_id, worker_address, dtype, shape, index, columns)
149
149
chunks .append (
@@ -173,7 +173,13 @@ def __init__(self, vineyard_socket=None, object_id=None, **kw):
173
173
super ().__init__ (vineyard_socket = vineyard_socket , object_id = object_id , ** kw )
174
174
175
175
def __call__ (self , meta ):
176
- return self .new_dataframe ([meta ])
176
+ return self .new_dataframe (
177
+ [meta ],
178
+ shape = meta .shape ,
179
+ dtypes = meta .dtypes ,
180
+ index_value = meta .index_value ,
181
+ columns_value = meta .columns_value ,
182
+ )
177
183
178
184
@classmethod
179
185
def tile (cls , op ):
@@ -182,21 +188,37 @@ def tile(cls, op):
182
188
183
189
ctx = get_context ()
184
190
185
- in_chunk_keys = [chunk .key for chunk in op .inputs [0 ].chunks ]
186
191
out_chunks = []
187
192
chunk_map = dict ()
188
193
dtypes , columns = None , None
189
- for chunk , infos in zip (
190
- op .inputs [0 ].chunks , ctx .get_chunks_result (in_chunk_keys )
191
- ):
194
+
195
+ in_chunk_keys = [chunk .key for chunk in op .inputs [0 ].chunks ]
196
+ in_chunk_results = ctx .get_chunks_result (in_chunk_keys )
197
+
198
+ # check if chunk indexes has unknown value
199
+ has_unknown_chunk_index = False
200
+ for infos in in_chunk_results :
201
+ for _ , info in infos .iterrows (): # pragma: no cover
202
+ if len (info ["index" ]) == 0 or - 1 in info ["index" ]:
203
+ has_unknown_chunk_index = True
204
+ break
205
+
206
+ # assume chunks are row-splitted if chunk index is unknown
207
+ chunk_location = 0
208
+
209
+ for chunk , infos in zip (op .inputs [0 ].chunks , in_chunk_results ):
192
210
for _ , info in infos .iterrows ():
193
211
chunk_op = op .copy ().reset_key ()
194
212
chunk_op .object_id = info ["id" ]
195
213
chunk_op .expect_worker = info ["worker_address" ]
196
214
dtypes = info ["dtypes" ]
197
215
columns = info ["columns" ]
198
216
shape = info ["shape" ]
199
- chunk_index = info ["index" ]
217
+ if has_unknown_chunk_index : # pragma: no cover
218
+ chunk_index = (chunk_location , 0 )
219
+ chunk_location += 1
220
+ else :
221
+ chunk_index = info ["index" ]
200
222
chunk_map [chunk_index ] = info ["shape" ]
201
223
out_chunk = chunk_op .new_chunk (
202
224
[chunk ],
@@ -251,7 +273,7 @@ def from_vineyard(df, vineyard_socket=None):
251
273
gpu = None ,
252
274
)
253
275
meta = metaop (
254
- shape = (np .nan ,),
276
+ shape = (np .nan , np . nan ),
255
277
dtypes = pd .Series ([]),
256
278
index_value = parse_index (pd .Index ([])),
257
279
columns_value = parse_index (pd .Index ([])),
0 commit comments