Skip to content
This repository was archived by the owner on Jul 17, 2024. It is now read-only.

Commit 57ac260

Browse files
fix: Use daemon threads for SolverManager (#69)
Before, if a SolverManager creates a Solver, it creates a non-daemon thread, which can prevent the Python process from EVER exiting unless forced. Now, the SolverManager spawns only daemon threads, which allows the Python process to exit. This also allows us to remove some test configuration code that was used to force the JVM to exit. Additionaly, SolverManager can take SolverConfig directly and may take a SolutionManagerConfig too. Removed some solver manager tests to reduced flakiness.
1 parent b6b1fa1 commit 57ac260

File tree

5 files changed

+83
-69
lines changed

5 files changed

+83
-69
lines changed

tests/conftest.py

Lines changed: 2 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,6 @@ def pytest_addoption(parser):
1010
parser.addoption('--output-generated-classes', action='store', default='false')
1111

1212

13-
def pytest_configure(config):
14-
"""
15-
Allows plugins and conftest files to perform initial configuration.
16-
This hook is called for every plugin and initial conftest
17-
file after command line options have been parsed.
18-
"""
19-
pass
20-
21-
2213
def pytest_sessionstart(session):
2314
"""
2415
Called after the Session object has been created and
@@ -35,23 +26,5 @@ def pytest_sessionstart(session):
3526
timefold.solver.init()
3627

3728
if session.config.getoption('--output-generated-classes') != 'false':
38-
timefold.solver.set_class_output_directory(pathlib.Path('target', 'tox-generated-classes', 'python', f'{sys.version_info[0]}.{sys.version_info[1]}'))
39-
40-
41-
exit_code = 0
42-
def pytest_sessionfinish(session, exitstatus):
43-
"""
44-
Called after whole test run finished, right before
45-
returning the exit status to the system.
46-
"""
47-
global exit_code
48-
exit_code = exitstatus
49-
50-
51-
def pytest_unconfigure(config):
52-
"""
53-
Called before test process is exited.
54-
"""
55-
global exit_code
56-
from java.lang import System
57-
System.exit(exit_code)
29+
timefold.solver.set_class_output_directory(pathlib.Path('target', 'tox-generated-classes', 'python',
30+
f'{sys.version_info[0]}.{sys.version_info[1]}'))

tests/test_solver_manager.py

Lines changed: 2 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,8 @@ def assert_problem_change_solver_run(solver_manager, solver_job):
107107
assert solution.value_list[0].value == 6
108108
assert solver_manager.get_solver_status(1) == SolverStatus.NOT_SOLVING
109109

110-
with SolverManager.create(SolverFactory.create(solver_config)) as solver_manager:
110+
111+
with SolverManager.create(solver_config, SolverManagerConfig(parallel_solver_count='AUTO')) as solver_manager:
111112
lock.acquire()
112113
solver_job = solver_manager.solve(1, problem)
113114
assert_solver_run(solver_manager, solver_job)
@@ -126,12 +127,6 @@ def get_problem(problem_id):
126127
.with_problem_finder(get_problem)).run()
127128
assert_solver_run(solver_manager, solver_job)
128129

129-
lock.acquire()
130-
solver_job = (solver_manager.solve_builder()
131-
.with_problem_id(1)
132-
.with_problem_finder(get_problem)).run()
133-
assert_problem_change_solver_run(solver_manager, solver_job)
134-
135130
solution_list = []
136131
semaphore = Semaphore(0)
137132

@@ -149,17 +144,6 @@ def on_best_solution_changed(solution):
149144
assert semaphore.acquire(timeout=1)
150145
assert len(solution_list) == 1
151146

152-
solution_list = []
153-
lock.acquire()
154-
solver_job = (solver_manager.solve_builder()
155-
.with_problem_id(1)
156-
.with_problem_finder(get_problem)
157-
.with_best_solution_consumer(on_best_solution_changed)
158-
).run()
159-
assert_problem_change_solver_run(solver_manager, solver_job)
160-
assert semaphore.acquire(timeout=1)
161-
assert len(solution_list) == 1
162-
163147
solution_list = []
164148
lock.acquire()
165149
solver_job = (solver_manager.solve_builder()
@@ -169,21 +153,7 @@ def on_best_solution_changed(solution):
169153
.with_final_best_solution_consumer(on_best_solution_changed)
170154
).run()
171155
assert_solver_run(solver_manager, solver_job)
172-
# Wait for 2 acquires, one for best solution consumer,
173-
# another for final best solution consumer
174-
assert semaphore.acquire(timeout=1)
175-
assert semaphore.acquire(timeout=1)
176-
assert len(solution_list) == 2
177156

178-
solution_list = []
179-
lock.acquire()
180-
solver_job = (solver_manager.solve_builder()
181-
.with_problem_id(1)
182-
.with_problem_finder(get_problem)
183-
.with_best_solution_consumer(on_best_solution_changed)
184-
.with_final_best_solution_consumer(on_best_solution_changed)
185-
).run()
186-
assert_problem_change_solver_run(solver_manager, solver_job)
187157
# Wait for 2 acquires, one for best solution consumer,
188158
# another for final best solution consumer
189159
assert semaphore.acquire(timeout=1)
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
package ai.timefold.solver.python;
2+
3+
import java.util.concurrent.Executors;
4+
import java.util.concurrent.ThreadFactory;
5+
6+
/**
7+
* There a Catch-22 that occurs on shutdown:
8+
* <p>
9+
* - In order for Python to free its variables, it must be terminated.
10+
* - In order for Python to be terminated, the JVM must be terminated.
11+
* - In order for the JVM to be terminated, all its non-daemon threads must be terminated.
12+
* - Executors keep all its threads alive until it is freed/have no more references.
13+
* - In order for the Executor to be freed/have no more references, it cannot have a reference in Python.
14+
* - To not have a reference in Python means Python must free its variables, creating the Catch-22
15+
* <p>
16+
* Thus, if non-daemon threads are used, and a {@link ai.timefold.solver.core.api.solver.SolverManager}
17+
* solves at least one problem (creating a keep-alive thread in its {@link java.util.concurrent.ThreadPoolExecutor}),
18+
* Python cannot shut down gracefully and will become unresponsive when interrupted.
19+
* <p>
20+
* This class uses {@link Executors#defaultThreadFactory()} to create a new thread, but sets the created
21+
* thread to daemon mode so Python can shut down gracefully.
22+
*/
23+
public class DaemonThreadFactory implements ThreadFactory {
24+
private static final ThreadFactory THREAD_FACTORY = Executors.defaultThreadFactory();
25+
26+
@Override
27+
public Thread newThread(Runnable runnable) {
28+
Thread out = THREAD_FACTORY.newThread(runnable);
29+
out.setDaemon(true);
30+
return out;
31+
}
32+
}

timefold-solver-python-core/src/main/python/_solver_manager.py

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from ._problem_change import ProblemChange, ProblemChangeWrapper
2-
from .config import SolverConfigOverride
2+
from .config import SolverConfig, SolverConfigOverride, SolverManagerConfig
33
from ._solver_factory import SolverFactory
44
from ._future import wrap_future
55
from ._timefold_java_interop import update_log_level
@@ -342,22 +342,38 @@ def __init__(self, delegate: '_JavaSolverManager'):
342342
self._delegate = delegate
343343

344344
@staticmethod
345-
def create(solver_factory: 'SolverFactory[Solution_]') -> 'SolverManager[Solution_, ProblemId_]':
345+
def create(solver_factory_or_config: 'SolverConfig | SolverFactory[Solution_]',
346+
solver_manager_config: 'SolverManagerConfig' = None) -> 'SolverManager[Solution_, ProblemId_]':
346347
"""
347-
Use a `SolverFactory` to build a `SolverManager`.
348+
Use a `SolverConfig` or `SolverFactory` to build a `SolverManager`.
348349
349350
Parameters
350351
----------
351-
solver_factory : SolverFactory[Solution_]
352-
The `SolverFactory` to build the `SolverManager` from.
352+
solver_factory_or_config : SolverConfig | SolverFactory[Solution_]
353+
The `SolverConfig` or `SolverFactory` to build the `SolverManager` from.
354+
355+
solver_manager_config: SolverManagerConfig, optional
356+
Additional settings that can be used to configure the `SolverManager`.
353357
354358
Returns
355359
-------
356360
SolverManager
357361
A new `SolverManager` instance.
358362
"""
359363
from ai.timefold.solver.core.api.solver import SolverManager as JavaSolverManager
360-
return SolverManager(JavaSolverManager.create(solver_factory._delegate)) # noqa
364+
from ai.timefold.solver.python import DaemonThreadFactory
365+
366+
if solver_manager_config is None:
367+
solver_manager_config = SolverManagerConfig()
368+
369+
java_solver_manager_config = solver_manager_config._to_java_solver_manager_config() # noqa
370+
java_solver_manager_config.setThreadFactoryClass(DaemonThreadFactory.class_)
371+
372+
if isinstance(solver_factory_or_config, SolverConfig):
373+
solver_factory_or_config = SolverFactory.create(solver_factory_or_config)
374+
375+
return SolverManager(JavaSolverManager.create(solver_factory_or_config._delegate, # noqa
376+
java_solver_manager_config))
361377

362378
def solve(self, problem_id: ProblemId_, problem: Solution_,
363379
final_best_solution_listener: Callable[[Solution_], None] = None) -> SolverJob[Solution_, ProblemId_]:

timefold-solver-python-core/src/main/python/config/_config.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from ..score import ConstraintFactory, Constraint, IncrementalScoreCalculator
22
from .._timefold_java_interop import is_enterprise_installed
33

4-
from typing import Any, Optional, Callable, TypeVar, Generic, TYPE_CHECKING
4+
from typing import Any, Optional, Callable, TypeVar, Generic, Literal, TYPE_CHECKING
55
from dataclasses import dataclass, field
66
from enum import Enum
77
from pathlib import Path
@@ -378,6 +378,29 @@ def _to_java_solver_config_override(self):
378378
return out
379379

380380

381+
@dataclass(kw_only=True)
382+
class SolverManagerConfig:
383+
"""
384+
Includes settings to configure a SolverManager.
385+
386+
Attributes
387+
----------
388+
parallel_solver_count: int | 'AUTO', optional
389+
If set to an integer, the number of parallel jobs that can be run
390+
simultaneously.
391+
If unset or set to 'AUTO', the number of parallel jobs is determined
392+
based on the number of CPU cores available.
393+
"""
394+
parallel_solver_count: Optional[int | Literal['AUTO']] = field(default=None)
395+
396+
def _to_java_solver_manager_config(self):
397+
from ai.timefold.solver.core.config.solver import SolverManagerConfig as JavaSolverManagerConfig
398+
out = JavaSolverManagerConfig()
399+
if self.parallel_solver_count is not None:
400+
out = out.withParallelSolverCount(str(self.parallel_solver_count))
401+
return out
402+
403+
381404
__all__ = ['Duration', 'EnvironmentMode', 'TerminationCompositionStyle',
382-
'RequiresEnterpriseError', 'MoveThreadCount',
405+
'RequiresEnterpriseError', 'MoveThreadCount', 'SolverManagerConfig',
383406
'SolverConfig', 'SolverConfigOverride', 'ScoreDirectorFactoryConfig', 'TerminationConfig']

0 commit comments

Comments
 (0)