Skip to content

Commit d1ec1e2

Browse files
DavidGOrtegarestyled-io[bot]restyled-commits
authored
graceful shutdown (#42)
* graceful shutdown * gpu drivers only if gpu * no gpu install if no gpu closes #43 * ExecStop no need * Restyled by whitespace (#48) Co-authored-by: Restyled.io <commits@restyled.io> Co-authored-by: restyled-io[bot] <32688539+restyled-io[bot]@users.noreply.github.com> Co-authored-by: Restyled.io <commits@restyled.io>
1 parent 8378362 commit d1ec1e2

File tree

1 file changed

+29
-6
lines changed

1 file changed

+29
-6
lines changed

iterative/resource_runner.go

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ func resourceRunnerCreate(ctx context.Context, d *schema.ResourceData, m interfa
160160
})
161161
return diags
162162
}
163+
163164
d.Set("startup_script", startupScript)
164165

165166
if len(d.Get("cloud").(string)) == 0 {
@@ -243,7 +244,7 @@ export DEBIAN_FRONTEND=noninteractive
243244
echo "APT::Get::Assume-Yes \"true\";" | sudo tee -a /etc/apt/apt.conf.d/90assumeyes
244245
245246
sudo apt remove unattended-upgrades
246-
systemctl disable apt-daily-upgrade.service
247+
systemctl disable apt-daily-upgrade.service
247248
248249
sudo add-apt-repository universe -y
249250
sudo add-apt-repository ppa:git-core/ppa -y
@@ -260,9 +261,9 @@ curl -sL https://deb.nodesource.com/setup_12.x | sudo bash
260261
sudo apt update && sudo apt-get install -y nodejs
261262
262263
sudo apt install -y ubuntu-drivers-common git
263-
sudo ubuntu-drivers autoinstall
264+
sudo ubuntu-drivers autoinstall
264265
265-
curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
266+
curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
266267
curl -s -L https://nvidia.github.io/nvidia-docker/ubuntu18.04/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
267268
sudo apt update && sudo apt install -y nvidia-docker2
268269
@@ -273,15 +274,37 @@ sudo docker run --rm --gpus all nvidia/cuda:11.0-base nvidia-smi
273274
{{end}}
274275
275276
sudo npm install -g git+https://github.com/iterative/cml.git#cml-runner
276-
export HOME=/root
277+
278+
sudo bash -c 'cat << EOF > /usr/bin/cml.sh
279+
#!/bin/sh
280+
277281
export AWS_SECRET_ACCESS_KEY={{.AWS_SECRET_ACCESS_KEY}}
278282
export AWS_ACCESS_KEY_ID={{.AWS_ACCESS_KEY_ID}}
279283
export AZURE_CLIENT_ID={{.AZURE_CLIENT_ID}}
280284
export AZURE_CLIENT_SECRET={{.AZURE_CLIENT_SECRET}}
281285
export AZURE_SUBSCRIPTION_ID={{.AZURE_SUBSCRIPTION_ID}}
282286
export AZURE_TENANT_ID={{.AZURE_TENANT_ID}}
283-
nohup cml-runner{{if .name}} --name {{.name}}{{end}}{{if .labels}} --labels {{.labels}}{{end}}{{if .idle_timeout}} --idle-timeout {{.idle_timeout}}{{end}}{{if .driver}} --driver {{.driver}}{{end}}{{if .repo}} --repo {{.repo}}{{end}}{{if .token}} --token {{.token}}{{end}}{{if .tf_resource}} --tf_resource={{.tf_resource}}{{end}} {{if .instance_gpu}} --cloud-gpu {{.instance_gpu}}{{end}} < /dev/null > std.out 2> std.err &
284-
sleep 10
287+
288+
cml-runner{{if .name}} --name {{.name}}{{end}}{{if .labels}} --labels {{.labels}}{{end}}{{if .idle_timeout}} --idle-timeout {{.idle_timeout}}{{end}}{{if .driver}} --driver {{.driver}}{{end}}{{if .repo}} --repo {{.repo}}{{end}}{{if .token}} --token {{.token}}{{end}}{{if .tf_resource}} --tf_resource={{.tf_resource}}{{end}} {{if .instance_gpu}} --cloud-gpu {{.instance_gpu}}{{end}}
289+
EOF'
290+
sudo chmod +x /usr/bin/cml.sh
291+
292+
sudo bash -c 'cat << EOF > /etc/systemd/system/cml.service
293+
[Unit]
294+
Description=cml service
295+
296+
[Service]
297+
Type=oneshot
298+
RemainAfterExit=yes
299+
ExecStart=/usr/bin/cml.sh
300+
301+
[Install]
302+
WantedBy=multi-user.target
303+
EOF'
304+
sudo chmod +x /etc/systemd/system/cml.service
305+
306+
sudo systemctl daemon-reload
307+
sudo systemctl enable cml.service --now
285308
`)
286309
var customDataBuffer bytes.Buffer
287310
err = tmpl.Execute(&customDataBuffer, data)

0 commit comments

Comments
 (0)