16
16
17
17
from ..core import ChunkData , Chunk , Entity , TilesableData
18
18
from ..serialize import Serializable , ValueType , ProviderType , DataTypeField , AnyField , SeriesField , \
19
- BoolField , Int64Field , Int32Field , ListField , SliceField , OneOfField , ReferenceField
19
+ BoolField , Int64Field , Int32Field , StringField , ListField , SliceField , OneOfField , ReferenceField
20
20
21
21
22
22
class IndexValue (Serializable ):
23
23
__slots__ = ()
24
24
25
- class Index (Serializable ):
25
+ class IndexBase (Serializable ):
26
+ _key = StringField ('key' ) # to identify if the index is the same
27
+ _is_monotonic_increasing = BoolField ('is_monotonic_increasing' )
28
+ _is_monotonic_decreasing = BoolField ('is_monotonic_decreasing' )
29
+ _is_unique = BoolField ('is_unique' )
30
+ _should_be_monotonic = BoolField ('should_be_monotonic' )
31
+ _max_val = AnyField ('max_val' )
32
+ _max_val_close = BoolField ('max_val_close' )
33
+ _min_val = AnyField ('min_val' )
34
+ _min_val_close = BoolField ('min_val_close' )
35
+
36
+ @property
37
+ def is_monotonic_increasing (self ):
38
+ return self ._is_monotonic_increasing
39
+
40
+ @property
41
+ def is_monotonic_decreasing (self ):
42
+ return self ._is_monotonic_decreasing
43
+
44
+ @property
45
+ def is_unique (self ):
46
+ return self ._is_unique
47
+
48
+ @property
49
+ def should_be_monotonic (self ):
50
+ return self ._should_be_monotonic
51
+
52
+ @property
53
+ def min_val (self ):
54
+ return self ._min_val
55
+
56
+ @property
57
+ def min_val_close (self ):
58
+ return self ._min_val_close
59
+
60
+ @property
61
+ def max_val (self ):
62
+ return self ._max_val
63
+
64
+ @property
65
+ def max_val_close (self ):
66
+ return self ._max_val_close
67
+
68
+ class Index (IndexBase ):
26
69
_name = AnyField ('name' )
27
70
_data = ListField ('data' )
28
71
_dtype = DataTypeField ('dtype' )
29
72
30
- class RangeIndex (Serializable ):
73
+ class RangeIndex (IndexBase ):
31
74
_name = AnyField ('name' )
32
75
_slice = SliceField ('slice' )
33
76
34
- class CategoricalIndex (Serializable ):
77
+ class CategoricalIndex (IndexBase ):
35
78
_name = AnyField ('name' )
36
79
_categories = ListField ('categories' )
37
80
_ordered = BoolField ('ordered' )
38
81
39
- class IntervalIndex (Serializable ):
82
+ class IntervalIndex (IndexBase ):
40
83
_name = AnyField ('name' )
41
84
_data = ListField ('data' )
42
85
_closed = BoolField ('closed' )
43
86
44
- class DatetimeIndex (Serializable ):
87
+ class DatetimeIndex (IndexBase ):
45
88
_name = AnyField ('name' )
46
89
_data = ListField ('data' )
47
90
_freq = AnyField ('freq' )
@@ -53,7 +96,7 @@ class DatetimeIndex(Serializable):
53
96
_dayfirst = BoolField ('dayfirst' )
54
97
_yearfirst = BoolField ('yearfirst' )
55
98
56
- class TimedeltaIndex (Serializable ):
99
+ class TimedeltaIndex (IndexBase ):
57
100
_name = AnyField ('name' )
58
101
_data = ListField ('data' )
59
102
_unit = AnyField ('unit' )
@@ -63,7 +106,7 @@ class TimedeltaIndex(Serializable):
63
106
_end = AnyField ('end' )
64
107
_closed = AnyField ('closed' )
65
108
66
- class PeriodIndex (Serializable ):
109
+ class PeriodIndex (IndexBase ):
67
110
_name = AnyField ('name' )
68
111
_data = ListField ('data' )
69
112
_freq = AnyField ('freq' )
@@ -80,25 +123,24 @@ class PeriodIndex(Serializable):
80
123
_tz = AnyField ('tz' )
81
124
_dtype = DataTypeField ('dtype' )
82
125
83
- class Int64Index (Serializable ):
126
+ class Int64Index (IndexBase ):
84
127
_name = AnyField ('name' )
85
128
_data = ListField ('data' )
86
129
_dtype = DataTypeField ('dtype' )
87
130
88
- class UInt64Index (Serializable ):
131
+ class UInt64Index (IndexBase ):
89
132
_name = AnyField ('name' )
90
133
_data = ListField ('data' )
91
134
_dtype = DataTypeField ('dtype' )
92
135
93
- class Float64Index (Serializable ):
136
+ class Float64Index (IndexBase ):
94
137
_name = AnyField ('name' )
95
138
_data = ListField ('data' )
96
139
_dtype = DataTypeField ('dtype' )
97
140
98
- class MultiIndex (Serializable ):
141
+ class MultiIndex (IndexBase ):
99
142
_names = ListField ('name' )
100
- _levels = ListField ('levels' )
101
- _labels = ListField ('labels' )
143
+ _data = ListField ('data' )
102
144
_sortorder = Int32Field ('sortorder' )
103
145
104
146
_index_value = OneOfField ('index_value' , index = Index ,
@@ -113,6 +155,42 @@ def __mars_tokenize__(self):
113
155
v = self ._index_value
114
156
return [type (v ).__name__ ] + [getattr (v , f , None ) for f in v .__slots__ ]
115
157
158
+ @property
159
+ def value (self ):
160
+ return self ._index_value
161
+
162
+ @property
163
+ def is_monotonic_increasing (self ):
164
+ return self ._index_value .is_monotonic_increasing
165
+
166
+ @property
167
+ def is_monotonic_decreasing (self ):
168
+ return self ._index_value .is_monotonic_decreasing
169
+
170
+ @property
171
+ def is_monotonic_increasing_or_decreasing (self ):
172
+ return self .is_monotonic_increasing or self .is_monotonic_decreasing
173
+
174
+ @property
175
+ def is_unique (self ):
176
+ return self ._index_value .is_unique
177
+
178
+ @property
179
+ def min_val (self ):
180
+ return self ._index_value .min_val
181
+
182
+ @property
183
+ def min_val_close (self ):
184
+ return self ._index_value .min_val_close
185
+
186
+ @property
187
+ def max_val (self ):
188
+ return self ._index_value .max_val
189
+
190
+ @property
191
+ def max_val_close (self ):
192
+ return self ._index_value .max_val_close
193
+
116
194
117
195
class IndexChunkData (ChunkData ):
118
196
__slots__ = ()
@@ -224,18 +302,22 @@ class DataFrameChunkData(ChunkData):
224
302
# optional field
225
303
_dtypes = SeriesField ('dtypes' )
226
304
_index_value = ReferenceField ('index_value' , IndexValue )
305
+ _columns_value = ReferenceField ('columns_value' , IndexValue )
227
306
228
307
@property
229
308
def dtypes (self ):
230
- return getattr (self , '_dtypes' , None ) or getattr (self .op , 'dtypes' , None )
309
+ dt = getattr (self , '_dtypes' , None )
310
+ if dt is not None :
311
+ return dt
312
+ return getattr (self .op , 'dtypes' , None )
231
313
232
314
@property
233
315
def index_value (self ):
234
316
return self ._index_value
235
317
236
318
@property
237
319
def columns (self ):
238
- return self ._columns
320
+ return self ._columns_value
239
321
240
322
241
323
class DataFrameChunk (Chunk ):
@@ -249,27 +331,33 @@ class DataFrameData(TilesableData):
249
331
# optional field
250
332
_dtypes = SeriesField ('dtypes' )
251
333
_index_value = ReferenceField ('index_value' , IndexValue )
334
+ _columns_value = ReferenceField ('columns_value' , IndexValue )
252
335
_chunks = ListField ('chunks' , ValueType .reference (DataFrameChunkData ),
253
336
on_serialize = lambda x : [it .data for it in x ] if x is not None else x ,
254
337
on_deserialize = lambda x : [DataFrameChunk (it ) for it in x ] if x is not None else x )
255
338
256
339
@property
257
340
def dtypes (self ):
258
- return getattr (self , '_dtypes' , None ) or getattr (self .op , 'dtypes' , None )
341
+ dt = getattr (self , '_dtypes' , None )
342
+ if dt is not None :
343
+ return dt
344
+ return getattr (self .op , 'dtypes' , None )
259
345
260
346
@property
261
347
def index_value (self ):
262
348
return self ._index_value
263
349
264
350
@property
265
351
def columns (self ):
266
- return self ._columns
352
+ return self ._columns_value
267
353
268
354
269
355
class DataFrame (Entity ):
270
356
__slots__ = ()
271
357
_allow_data_type_ = (DataFrameData ,)
272
358
273
359
360
+ INDEX_TYPE = (Index , IndexData )
361
+ SERIES_TYPE = (Series , SeriesData )
274
362
DATAFRAME_TYPE = (DataFrame , DataFrameData )
275
363
CHUNK_TYPE = (DataFrameChunk , DataFrameChunkData )
0 commit comments