Skip to content

Commit 11ef946

Browse files
author
bors-servo
authored
Auto merge of #142 - kennytm:stop-hardcoding-timeout, r=jdm
Allow TEST_TIMEOUT be configured. Homu currently hard codes the test timeout to 10 hours. This is unnecessarily long for many projects, including Rust. Recently, there is bug in AppVeyor or GitHub causing the status notification not delivered, and thus the queue can be stuck for 10 hours when unattended. This PR introduced a fix to allow the timeout be configured for each repository. ```toml [repo.NAME] # timeout after 3 hr 20 min timeout = 12000 ``` When the timeout is less than 1 hour, the current `check_timeout` loop will be too coarse. Therefore, I've also refactored and removed the loop in favor of a [`Timer`](https://docs.python.org/3/library/threading.html#timer-objects) object for each pending PR. <!-- Reviewable:start --> --- This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/homu/142) <!-- Reviewable:end -->
2 parents b996b9d + e29e074 commit 11ef946

File tree

2 files changed

+39
-41
lines changed

2 files changed

+39
-41
lines changed

cfg.sample.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,10 @@ try_users = []
8080
# command.
8181
#status_based_exemption = false
8282

83+
# Maximum test duration allowed for testing a PR in this repository.
84+
# Default to 10 hours.
85+
#timeout = 36000
86+
8387
# Branch names. These settings are the defaults; it makes sense to leave these
8488
# as-is.
8589
#[repo.NAME.branch]

homu/main.py

Lines changed: 35 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from . import utils
88
from .utils import lazy_debug
99
import logging
10-
from threading import Thread, Lock
10+
from threading import Thread, Lock, Timer
1111
import time
1212
import traceback
1313
import sqlite3
@@ -34,7 +34,7 @@
3434

3535
INTERRUPTED_BY_HOMU_FMT = 'Interrupted by Homu ({})'
3636
INTERRUPTED_BY_HOMU_RE = re.compile(r'Interrupted by Homu \((.+?)\)')
37-
TEST_TIMEOUT = 3600 * 10
37+
DEFAULT_TEST_TIMEOUT = 3600 * 10
3838

3939
global_cfg = {}
4040

@@ -128,7 +128,8 @@ def __init__(self, num, head_sha, status, db, repo_label, mergeable_que,
128128
self.owner = owner
129129
self.name = name
130130
self.repos = repos
131-
self.test_started = time.time() # FIXME: Save in the local database
131+
self.timeout_timer = None
132+
self.test_started = time.time()
132133

133134
def head_advanced(self, head_sha, *, use_db=True):
134135
self.head_sha = head_sha
@@ -179,6 +180,9 @@ def add_comment(self, text):
179180

180181
def set_status(self, status):
181182
self.status = status
183+
if self.timeout_timer:
184+
self.timeout_timer.cancel()
185+
self.timeout_timer = None
182186

183187
db_query(
184188
self.db,
@@ -320,6 +324,30 @@ def blocked_by_closed_tree(self):
320324
treeclosed = self.repos[self.repo_label].treeclosed
321325
return treeclosed if self.priority < treeclosed else None
322326

327+
def start_testing(self, timeout):
328+
self.test_started = time.time() # FIXME: Save in the local database
329+
self.set_status('pending')
330+
timer = Timer(timeout, self.timed_out)
331+
timer.start()
332+
self.timeout_timer = timer
333+
334+
def timed_out(self):
335+
print('* Test timed out: {}'.format(self))
336+
337+
self.merge_sha = ''
338+
self.save()
339+
self.set_status('failure')
340+
341+
desc = 'Test timed out'
342+
utils.github_create_status(
343+
self.get_repo(),
344+
self.head_sha,
345+
'failure',
346+
'',
347+
desc,
348+
context='homu')
349+
self.add_comment(':boom: {}'.format(desc))
350+
323351

324352
def sha_cmp(short, full):
325353
return len(short) >= 4 and short == full[:len(short)]
@@ -1140,8 +1168,8 @@ def start_build(state, repo_cfgs, buildbot_slots, logger, db, git_cfg):
11401168
branch,
11411169
state.merge_sha))
11421170

1143-
state.test_started = time.time()
1144-
state.set_status('pending')
1171+
timeout = repo_cfg.get('timeout', DEFAULT_TEST_TIMEOUT)
1172+
state.start_testing(timeout)
11451173

11461174
desc = '{} commit {} with merge {}...'.format(
11471175
'Trying' if state.try_ else 'Testing',
@@ -1217,8 +1245,8 @@ def start_rebuild(state, repo_cfgs):
12171245
state.add_comment(':bomb: Failed to start rebuilding: `{}`'.format(err)) # noqa
12181246
return False
12191247

1220-
state.test_started = time.time()
1221-
state.set_status('pending')
1248+
timeout = repo_cfg.get('timeout', DEFAULT_TEST_TIMEOUT)
1249+
state.start_testing(timeout)
12221250

12231251
msg_1 = 'Previous build results'
12241252
msg_2 = ' for {}'.format(', '.join('[{}]({})'.format(builder, url) for builder, url in succ_builders)) # noqa
@@ -1326,39 +1354,6 @@ def fetch_mergeability(mergeable_que):
13261354
mergeable_que.task_done()
13271355

13281356

1329-
def check_timeout(states, queue_handler):
1330-
while True:
1331-
try:
1332-
for repo_label, repo_states in states.items():
1333-
for num, state in repo_states.items():
1334-
_timout = time.time() - state.test_started >= TEST_TIMEOUT
1335-
if state.status == 'pending' and _timout:
1336-
print('* Test timed out: {}'.format(state))
1337-
1338-
state.merge_sha = ''
1339-
state.save()
1340-
state.set_status('failure')
1341-
1342-
desc = 'Test timed out'
1343-
utils.github_create_status(
1344-
state.get_repo(),
1345-
state.head_sha,
1346-
'failure',
1347-
'',
1348-
desc,
1349-
context='homu')
1350-
state.add_comment(':boom: {}'.format(desc))
1351-
1352-
queue_handler()
1353-
1354-
except Exception:
1355-
print('* Error while checking timeout')
1356-
traceback.print_exc()
1357-
1358-
finally:
1359-
time.sleep(3600)
1360-
1361-
13621357
def synchronize(repo_label, repo_cfg, logger, gh, states, repos, db, mergeable_que, my_username, repo_labels): # noqa
13631358
logger.info('Synchronizing {}...'.format(repo_label))
13641359

@@ -1666,7 +1661,6 @@ def queue_handler():
16661661
]).start()
16671662

16681663
Thread(target=fetch_mergeability, args=[mergeable_que]).start()
1669-
Thread(target=check_timeout, args=[states, queue_handler]).start()
16701664

16711665
queue_handler()
16721666

0 commit comments

Comments
 (0)