Skip to content

Commit 60ed6fd

Browse files
authored
[core] Deflake test_placement_group_4 (#52552)
`remove_placement_group` is an async API, so the PG might not be removed when we call the next actor task. --------- Signed-off-by: Edward Oakes <ed.nmi.oakes@gmail.com>
1 parent 3a6eb90 commit 60ed6fd

File tree

1 file changed

+10
-13
lines changed

1 file changed

+10
-13
lines changed

python/ray/tests/test_placement_group_4.py

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import pytest
22
import os
33
import sys
4+
import time
45

56
import ray
67
import ray.cluster_utils
@@ -32,9 +33,7 @@ def validate(runtime_env_dict: dict) -> str:
3233

3334
@staticmethod
3435
def create(uri: str, runtime_env_dict: dict, ctx: RuntimeEnvContext) -> float:
35-
import time
36-
37-
time.sleep(15)
36+
time.sleep(60)
3837
return 0
3938

4039

@@ -142,14 +141,14 @@ def test_remove_placement_group_worker_startup_slowly(
142141

143142
@ray.remote(num_cpus=2)
144143
class A:
145-
def f(self):
146-
return 3
144+
def ready(self):
145+
return "ok"
146+
147+
def hang(self):
148+
time.sleep(60)
147149

148150
@ray.remote(num_cpus=2, max_retries=0)
149151
def long_running_task():
150-
print(os.getpid())
151-
import time
152-
153152
time.sleep(60)
154153

155154
# Schedule a long-running task that uses
@@ -165,15 +164,13 @@ def long_running_task():
165164
placement_group=placement_group
166165
)
167166
).remote()
168-
assert ray.get(a.f.remote()) == 3
167+
assert ray.get(a.ready.remote()) == "ok"
169168

169+
# Remove the PG, check that the actor and task are failed.
170170
ray.util.remove_placement_group(placement_group)
171171

172-
# Make sure the actor has been killed
173-
# because of the removal of the pg.
174-
# TODO(@clay4444): Make it throw a `ActorPlacementGroupRemoved`.
175172
with pytest.raises(ray.exceptions.RayActorError, match="actor died"):
176-
ray.get(a.f.remote(), timeout=3.0)
173+
ray.get(a.hang.remote(), timeout=10)
177174

178175
# The long-running task should still be in the state
179176
# of leasing-worker bacause of the worker startup delay.

0 commit comments

Comments
 (0)