Skip to content

Commit ecdfe8c

Browse files
committed
Merge branch 'bugfix/fix_fails' into feature/agg_mode
2 parents 0c2f34b + 624fb5a commit ecdfe8c

File tree

11 files changed

+33
-27
lines changed

11 files changed

+33
-27
lines changed

.github/workflows/benchmark-ci.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ jobs:
5757
git fetch upstream
5858
git merge upstream/master
5959
asv machine --yes
60-
asv continuous -e -f 1.1 --strict upstream/master HEAD
60+
asv continuous -e -f 1.1 upstream/master HEAD
6161
if: ${{ steps.build.outcome == 'success' }}
6262

6363
- name: Publish benchmarks artifact

mars/dataframe/merge/tests/test_merge_execution.py

+19-13
Original file line numberDiff line numberDiff line change
@@ -312,11 +312,15 @@ def test_join_on(setup):
312312
expected4.set_index("a2", inplace=True)
313313
result4.set_index("a2", inplace=True)
314314
pd.testing.assert_frame_equal(
315-
sort_dataframe_inplace(expected4, 0), sort_dataframe_inplace(result4, 0)
315+
sort_dataframe_inplace(expected4, 0, kind="mergesort"),
316+
sort_dataframe_inplace(result4, 0, kind="mergesort"),
316317
)
317318

318319

319320
def test_merge_one_chunk(setup):
321+
def sort_by_col1(df):
322+
return df.sort_values(by=df.columns[1], kind="mergesort")
323+
320324
df1 = pd.DataFrame(
321325
{"lkey": ["foo", "bar", "baz", "foo"], "value": [1, 2, 3, 5]},
322326
index=["a1", "a2", "a3", "a4"],
@@ -348,8 +352,8 @@ def test_merge_one_chunk(setup):
348352
result = jdf.execute().fetch()
349353

350354
pd.testing.assert_frame_equal(
351-
expected.sort_values(by=expected.columns[1]).reset_index(drop=True),
352-
result.sort_values(by=result.columns[1]).reset_index(drop=True),
355+
sort_by_col1(expected).reset_index(drop=True),
356+
sort_by_col1(result).reset_index(drop=True),
353357
)
354358

355359
# right have one chunk
@@ -361,8 +365,8 @@ def test_merge_one_chunk(setup):
361365
result = jdf.execute().fetch()
362366

363367
pd.testing.assert_frame_equal(
364-
expected.sort_values(by=expected.columns[1]).reset_index(drop=True),
365-
result.sort_values(by=result.columns[1]).reset_index(drop=True),
368+
sort_by_col1(expected).reset_index(drop=True),
369+
sort_by_col1(result).reset_index(drop=True),
366370
)
367371

368372
# left have one chunk and how="left", then one chunk tile
@@ -377,8 +381,8 @@ def test_merge_one_chunk(setup):
377381
result = jdf.execute().fetch()
378382

379383
pd.testing.assert_frame_equal(
380-
expected.sort_values(by=expected.columns[1]).reset_index(drop=True),
381-
result.sort_values(by=result.columns[1]).reset_index(drop=True),
384+
sort_by_col1(expected).reset_index(drop=True),
385+
sort_by_col1(result).reset_index(drop=True),
382386
)
383387

384388

@@ -418,7 +422,8 @@ def test_broadcast_merge(setup):
418422
expected.set_index("key", inplace=True)
419423
result.set_index("key", inplace=True)
420424
pd.testing.assert_frame_equal(
421-
sort_dataframe_inplace(expected, 0), sort_dataframe_inplace(result, 0)
425+
sort_dataframe_inplace(expected, 0, kind="mergesort"),
426+
sort_dataframe_inplace(result, 0, kind="mergesort"),
422427
)
423428

424429
# test broadcast right and how="left"
@@ -438,8 +443,8 @@ def test_broadcast_merge(setup):
438443
expected.set_index("key", inplace=True)
439444
result.set_index("key", inplace=True)
440445
pd.testing.assert_frame_equal(
441-
expected.sort_values(by=["key", "value_x"]),
442-
result.sort_values(by=["key", "value_x"]),
446+
expected.sort_values(by=["key", "value_x"], kind="mergesort"),
447+
result.sort_values(by=["key", "value_x"], kind="mergesort"),
443448
)
444449

445450
# test broadcast left
@@ -459,7 +464,8 @@ def test_broadcast_merge(setup):
459464
expected.set_index("key", inplace=True)
460465
result.set_index("key", inplace=True)
461466
pd.testing.assert_frame_equal(
462-
sort_dataframe_inplace(expected, 0), sort_dataframe_inplace(result, 0)
467+
sort_dataframe_inplace(expected, 0, kind="mergesort"),
468+
sort_dataframe_inplace(result, 0, kind="mergesort"),
463469
)
464470

465471
# test broadcast left and how="right"
@@ -479,8 +485,8 @@ def test_broadcast_merge(setup):
479485
expected.set_index("key", inplace=True)
480486
result.set_index("key", inplace=True)
481487
pd.testing.assert_frame_equal(
482-
expected.sort_values(by=["key", "value_x"]),
483-
result.sort_values(by=["key", "value_x"]),
488+
expected.sort_values(by=["key", "value_x"], kind="mergesort"),
489+
result.sort_values(by=["key", "value_x"], kind="mergesort"),
484490
)
485491

486492

mars/dataframe/utils.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -106,9 +106,9 @@ def hash_dtypes(dtypes, size):
106106
return [dtypes[index] for index in hashed_indexes]
107107

108108

109-
def sort_dataframe_inplace(df, *axis):
109+
def sort_dataframe_inplace(df, *axis, **kw):
110110
for ax in axis:
111-
df.sort_index(axis=ax, inplace=True)
111+
df.sort_index(axis=ax, inplace=True, **kw)
112112
return df
113113

114114

mars/learn/contrib/lightgbm/_predict.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def __call__(self):
7878
elif hasattr(self.model, "classes_"):
7979
dtype = np.array(self.model.classes_).dtype
8080
else:
81-
dtype = getattr(self.model, "out_dtype_", np.dtype("float"))
81+
dtype = getattr(self.model, "out_dtype_", [np.dtype("float")])[0]
8282

8383
if self.output_types[0] == OutputType.tensor:
8484
# tensor

mars/learn/contrib/lightgbm/_train.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -406,11 +406,11 @@ def execute(cls, ctx, op: "LGBMTrain"):
406406
op.model_type == LGBMModelType.RANKER
407407
or op.model_type == LGBMModelType.REGRESSOR
408408
):
409-
model.set_params(out_dtype_=np.dtype("float"))
409+
model.set_params(out_dtype_=[np.dtype("float")])
410410
elif hasattr(label_val, "dtype"):
411-
model.set_params(out_dtype_=label_val.dtype)
411+
model.set_params(out_dtype_=[label_val.dtype])
412412
else:
413-
model.set_params(out_dtype_=label_val.dtypes[0])
413+
model.set_params(out_dtype_=[label_val.dtypes[0]])
414414

415415
ctx[op.outputs[0].key] = pickle.dumps(model)
416416
finally:

mars/learn/linear_model/_base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,7 @@ def fit(self, X, y, sample_weight=None):
301301
self.coef_.execute()
302302
except LinAlgError:
303303
# TODO: implement linalg.lstsq first
304-
raise NotImplementedError("Does not support sigular matrix!")
304+
raise NotImplementedError("Does not support singular matrix!")
305305

306306
if y.ndim == 1:
307307
self.coef_ = mt.ravel(self.coef_)

mars/learn/linear_model/tests/test_base.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def test_linear_regression(setup):
5353
assert_array_almost_equal(reg.predict(X), model.predict(X))
5454

5555
# Regular model fitting, #samples <= 2, # features < 2
56-
error_msg = re.escape("Does not support sigular matrix!")
56+
error_msg = re.escape("Does not support singular matrix!")
5757

5858
X = [[1], [2]]
5959
Y = [1, 2]
@@ -69,7 +69,7 @@ def test_linear_regression(setup):
6969
assert_array_almost_equal(reg.predict(X), model.predict(X))
7070

7171
# Extra case #1: singular matrix, degenerate input
72-
error_msg = re.escape("Does not support sigular matrix!")
72+
error_msg = re.escape("Does not support singular matrix!")
7373

7474
X = [[1]]
7575
Y = [0]

mars/learn/metrics/pairwise/pairwise.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@
7272
"precomputed": None, # HACK: precomputed is always allowed, never called
7373
}
7474

75-
# These distances recquire boolean tensors, when using mars.tensor.spatial.distance
75+
# These distances require boolean tensors, when using mars.tensor.spatial.distance
7676
PAIRWISE_BOOLEAN_FUNCTIONS = [
7777
"dice",
7878
"jaccard",

mars/tensor/base/tile.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def tile(A, reps):
3030
behavior, promote `A` to d-dimensions manually before calling this
3131
function.
3232
33-
If ``A.ndim > d``, `reps` is promoted to `A`.ndim by pre-pending 1's to it.
33+
If ``A.ndim > d``, `reps` is promoted to `A`.ndim by prepending 1's to it.
3434
Thus for an `A` of shape (2, 3, 4, 5), a `reps` of (2, 2) is treated as
3535
(1, 1, 2, 2).
3636

mars/tensor/utils.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -774,7 +774,7 @@ def fetch_corner_data(tensor, session=None):
774774
# the tensor must have been executed,
775775
# thus the size could not be NaN
776776
if tensor.size > threshold:
777-
# two edges for each exis
777+
# two edges for each axis
778778
indices_iter = list(itertools.product(*(range(2) for _ in range(tensor.ndim))))
779779
corners = np.empty(shape=(2,) * tensor.ndim, dtype=object)
780780
shape = [0 for _ in range(tensor.ndim)]

setup.cfg

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ install_requires =
3030
pandas>=1.0.0,<2.0.0
3131
scipy>=1.0.0
3232
scikit-learn>=0.20
33-
numexpr>=2.6.4,!=2.8.5
33+
numexpr>=2.6.4,!=2.8.5,!=2.8.6
3434
cloudpickle>=1.5.0
3535
pyyaml>=5.1
3636
psutil>=5.9.0

0 commit comments

Comments
 (0)