Skip to content

[core] Deflake test_runtime_env_pip_and_conda_4.py #52750

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
May 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions python/ray/tests/test_runtime_env_conda_and_pip.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import pytest
import sys
import platform
import time
from ray._private.test_utils import (
wait_for_condition,
chdir,
Expand All @@ -16,6 +15,7 @@
MAX_INTERNAL_PIP_FILENAME_TRIES,
)
from ray.runtime_env import RuntimeEnv
from ray.util.state import list_tasks

import yaml
import tempfile
Expand Down Expand Up @@ -116,7 +116,7 @@ class TestGC:
reason="Needs PR wheels built in CI, so only run on linux CI machines.",
)
@pytest.mark.parametrize("field", ["conda", "pip"])
@pytest.mark.parametrize("spec_format", ["file", "python_object"])
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

a few minor speedups in this file. no need to test GC logic against the file and object behavior and the sleep was unneeded

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why don't you need to test against file?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there is no meaningful difference in the GC implementation across the two conditions

@pytest.mark.parametrize("spec_format", ["python_object"])
def test_job_level_gc(
self, runtime_env_disable_URI_cache, start_cluster, field, spec_format, tmp_path
):
Expand All @@ -139,10 +139,12 @@ def f():

# Ensure that the runtime env has been installed.
assert ray.get(f.remote())
# Sleep some seconds before checking that we didn't GC. Otherwise this
# check may spuriously pass.
time.sleep(2)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i'm assuming the sleep existed to make sure some code ran after the get. Otherwise the following assert will always pass if you run directly after, removing the sleep makes the test not test that behavior

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the check on the following line doesn't really need to be there at all IMO except as a sanity check for the testing utils themselves. it's asserting that we don't GC runtime_envs for active jobs. note that:

  • if we did, many other test cases would fail as this is very basic functionality.
  • this is not really a reliable way to test for the behavior. the GC can be arbitrarily delayed so in order to be sure this is checking what we intend, the sleep needs to be arbitrarily long :)

out of an abundance of caution, I updated the PR to perform the check in a more deterministic way: wait for the task to be marked FINISHED, then perform the check a few times in a loop. this should provide the same level of guarantee without the nondeterminism/delay of the sleep

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cool makes sense

assert not check_local_files_gced(cluster)

# Check that after the task is finished, the runtime_env is not GC'd
# because the job is still alive.
wait_for_condition(lambda: list_tasks()[0].state == "FINISHED")
for _ in range(5):
assert not check_local_files_gced(cluster)

ray.shutdown()

Expand All @@ -163,7 +165,7 @@ def f():
reason="Requires PR wheels built in CI, so only run on linux CI machines.",
)
@pytest.mark.parametrize("field", ["conda", "pip"])
@pytest.mark.parametrize("spec_format", ["file", "python_object"])
@pytest.mark.parametrize("spec_format", ["python_object"])
def test_detached_actor_gc(
self, runtime_env_disable_URI_cache, start_cluster, field, spec_format, tmp_path
):
Expand Down
138 changes: 45 additions & 93 deletions python/ray/tests/test_runtime_env_conda_and_pip_4.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,13 @@
os.environ["RAY_RUNTIME_ENV_LOCAL_DEV_MODE"] = "1"


def test_in_virtualenv(start_cluster):
def test_in_virtualenv(ray_start_regular_shared):
assert (
virtualenv_utils.is_in_virtualenv() is False
and "IN_VIRTUALENV" not in os.environ
) or (virtualenv_utils.is_in_virtualenv() is True and "IN_VIRTUALENV" in os.environ)
cluster, address = start_cluster
runtime_env = {"pip": ["pip-install-test==0.5"]}

ray.init(address, runtime_env=runtime_env)

@ray.remote
@ray.remote(runtime_env={"pip": ["pip-install-test==0.5"]})
def f():
import pip_install_test # noqa: F401

Expand All @@ -33,102 +29,58 @@ def f():
assert ray.get(f.remote())


def test_multiple_pip_installs(start_cluster, monkeypatch):
@pytest.mark.skipif(
sys.platform == "win32", reason="python.exe in use during deletion."
)
def test_multiple_pip_installs(ray_start_regular_shared):
"""Test that multiple pip installs don't interfere with each other."""
monkeypatch.setenv("RUNTIME_ENV_RETRY_TIMES", "0")
cluster, address = start_cluster

if sys.platform == "win32" and "ray" not in address:
pytest.skip(
"Failing on windows, as python.exe is in use during deletion attempt."
)

ray.init(
address,
runtime_env={
"pip": ["pip-install-test"],
"env_vars": {"TEST_VAR_1": "test_1"},
},
)

@ray.remote
def f():
return True

@ray.remote(
runtime_env={
"pip": ["pip-install-test"],
"env_vars": {"TEST_VAR_2": "test_2"},
}
)
def f2():
return True
return os.environ["TEST_VAR"]

assert ray.get(
[
f.options(
runtime_env={
"pip": ["pip-install-test"],
"env_vars": {"TEST_VAR": "1"},
}
).remote(),
f.options(
runtime_env={
"pip": ["pip-install-test"],
"env_vars": {"TEST_VAR": "2"},
}
).remote(),
]
) == ["1", "2"]

@ray.remote(
runtime_env={
"pip": ["pip-install-test"],
"env_vars": {"TEST_VAR_3": "test_3"},
}
)
def f3():
return True

assert all(ray.get([f.remote(), f2.remote(), f3.remote()]))
@pytest.mark.skipif(
os.environ.get("CI") and sys.platform != "linux",
reason="Requires PR wheels built in CI, so only run on linux CI machines.",
)
def test_pip_ray_is_overwritten(ray_start_regular_shared):
@ray.remote
def f():
import pip_install_test # noqa: F401

# Test an unconstrained "ray" dependency (should work).
ray.get(f.options(runtime_env={"pip": ["pip-install-test==0.5", "ray"]}).remote())

class TestGC:
@pytest.mark.skipif(
os.environ.get("CI") and sys.platform != "linux",
reason="Requires PR wheels built in CI, so only run on linux CI machines.",
# Test a constrained "ray" dependency that matches the env (should work).
ray.get(
f.options(runtime_env={"pip": ["pip-install-test==0.5", "ray>=2.0"]}).remote()
)
@pytest.mark.parametrize("field", ["pip"])
def test_pip_ray_is_overwritten(self, start_cluster, field):
cluster, address = start_cluster

# It should be OK to install packages with ray dependency.
ray.init(address, runtime_env={"pip": ["pip-install-test==0.5", "ray"]})

@ray.remote
def f():
import pip_install_test # noqa: F401

return True

# Ensure that the runtime env has been installed.
assert ray.get(f.remote())

ray.shutdown()

# It should be OK if cluster ray meets the installing ray version.
ray.init(address, runtime_env={"pip": ["pip-install-test==0.5", "ray>=1.12.0"]})

@ray.remote
def f():
import pip_install_test # noqa: F401

return True

# Ensure that the runtime env has been installed.
assert ray.get(f.remote())

ray.shutdown()

# It will raise exceptions if ray is overwritten.
with pytest.raises(Exception):
ray.init(
address, runtime_env={"pip": ["pip-install-test==0.5", "ray<=1.6.0"]}
)

@ray.remote
def f():
import pip_install_test # noqa: F401

return True

# Ensure that the runtime env has been installed.
assert ray.get(f.remote())

ray.shutdown()
# Test a constrained "ray" dependency that doesn't match the env (shouldn't work).
with pytest.raises(Exception):
ray.get(
f.options(
runtime_env={"pip": ["pip-install-test==0.5", "ray<2.0"]}
).remote()
)


# pytest-virtualenv doesn't support Python 3.12 as of now, see more details here:
Expand Down Expand Up @@ -158,7 +110,7 @@ def test_run_in_virtualenv(cloned_virtualenv):
@pytest.mark.skipif(
"IN_VIRTUALENV" in os.environ, reason="Pip option not supported in virtual env."
)
def test_runtime_env_with_pip_config(start_cluster):
def test_runtime_env_with_pip_config(ray_start_regular_shared):
@ray.remote(
runtime_env={
"pip": {"packages": ["pip-install-test==0.5"], "pip_version": "==24.1.2"}
Expand Down