Skip to content

Commit b9c726e

Browse files
committed
add retry limits for ssh related commands
1 parent 55b239c commit b9c726e

File tree

1 file changed

+28
-2
lines changed

1 file changed

+28
-2
lines changed

src/nixos-anywhere.sh

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ fi
2727
postKexecSshPort=22
2828
buildOnRemote=n
2929
envPassword=
30+
sshRetryLimit=-1
31+
rebootRetryLimit=-1
3032

3133
declare -A diskEncryptionKeys
3234
declare -a nixCopyOptions
@@ -86,6 +88,10 @@ Options:
8688
disko: first unmount and destroy all filesystems on the disks we want to format, then run the create and mount mode
8789
install: install the system
8890
reboot: reboot the machine
91+
* --ssh-retry-limit <limit>
92+
set the number of times to retry the ssh connection before giving up
93+
* --reboot-retry-limit <limit>
94+
set the number of times to wait for the reboot before giving up.
8995
USAGE
9096
}
9197

@@ -213,6 +219,14 @@ parseArgs() {
213219
--vm-test)
214220
vmTest=y
215221
;;
222+
--ssh-retry-limit)
223+
sshRetryLimit=$2
224+
shift
225+
;;
226+
--reboot-retry-limit)
227+
rebootRetryLimit=$2
228+
shift
229+
;;
216230
*)
217231
if [[ -z ${sshConnection-} ]]; then
218232
sshConnection="$1"
@@ -316,6 +330,7 @@ uploadSshKey() {
316330
fi
317331

318332
step Uploading install SSH keys
333+
local retryCount=0
319334
until
320335
if [[ -n ${envPassword} ]]; then
321336
sshpass -e \
@@ -339,7 +354,11 @@ uploadSshKey() {
339354
"$sshConnection"
340355
fi
341356
do
342-
sleep 3
357+
sleep 5
358+
retryCount=$((retryCount + 1))
359+
if [[ $sshRetryLimit -ne -1 ]] && [[ $retryCount -ge $sshRetryLimit ]]; then
360+
abort "Reached ssh retry limit of $sshRetryLimit"
361+
fi
343362
done
344363
}
345364

@@ -581,7 +600,14 @@ main() {
581600

582601
if [[ ${phases[reboot]-} == 1 ]]; then
583602
step Waiting for the machine to become unreachable due to reboot
584-
while runSshTimeout -- exit 0; do sleep 1; done
603+
retryCount=0
604+
until runSsh -o ConnectTimeout=10 -- exit 0; do
605+
sleep 5
606+
retryCount=$((retryCount + 1))
607+
if [[ $rebootRetryLimit -ne -1 ]] && [[ $retryCount -ge $rebootRetryLimit ]]; then
608+
abort "Machine didn't come online after reboot connection limit of $rebootRetryLimit retries"
609+
fi
610+
done
585611
fi
586612

587613
step "Done!"

0 commit comments

Comments
 (0)