|
8 | 8 | @pytest.mark.parametrize("pandas", [False, True])
|
9 | 9 | def test_basic(ray_start_regular_shared, pandas):
|
10 | 10 | ds = ray.data.range(100, override_num_blocks=10)
|
| 11 | + ds = ds.add_column("key", lambda b: b["id"] * 2) |
11 | 12 | ds = ds.add_column("embedding", lambda b: b["id"] ** 2)
|
12 | 13 | if not pandas:
|
13 | 14 | ds = ds.map_batches(
|
14 | 15 | lambda df: pyarrow.Table.from_pandas(df), batch_format="pandas"
|
15 | 16 | )
|
16 | 17 |
|
17 |
| - rad = ds.to_random_access_dataset("id", num_workers=1) |
| 18 | + rad = ds.to_random_access_dataset("key", num_workers=1) |
| 19 | + |
| 20 | + def expected(i): |
| 21 | + return {"id": i, "key": i * 2, "embedding": i**2} |
18 | 22 |
|
19 | 23 | # Test get.
|
20 | 24 | assert ray.get(rad.get_async(-1)) is None
|
21 |
| - assert ray.get(rad.get_async(100)) is None |
| 25 | + assert ray.get(rad.get_async(200)) is None |
22 | 26 | for i in range(100):
|
23 |
| - assert ray.get(rad.get_async(i)) == {"id": i, "embedding": i**2} |
24 |
| - |
25 |
| - def expected(i): |
26 |
| - return {"id": i, "embedding": i**2} |
| 27 | + assert ray.get(rad.get_async(i * 2 + 1)) is None |
| 28 | + assert ray.get(rad.get_async(i * 2)) == expected(i) |
27 | 29 |
|
28 | 30 | # Test multiget.
|
29 |
| - results = rad.multiget([-1] + list(range(10)) + [100]) |
30 |
| - assert results == [None] + [expected(i) for i in range(10)] + [None] |
| 31 | + results = rad.multiget([-1] + list(range(0, 20, 2)) + list(range(1, 21, 2)) + [200]) |
| 32 | + assert results == [None] + [expected(i) for i in range(10)] + [None] * 10 + [None] |
31 | 33 |
|
32 | 34 |
|
33 | 35 | def test_empty_blocks(ray_start_regular_shared):
|
|
0 commit comments