Skip to content

Commit 67d9de8

Browse files
author
Craig O'Donnell
authored
airgap disaster recovery CI test (#632)
1 parent e08fd66 commit 67d9de8

File tree

14 files changed

+163
-65
lines changed

14 files changed

+163
-65
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ ARCH := $(shell uname -m)
44
APP_NAME = embedded-cluster
55
ADMIN_CONSOLE_CHART_URL = oci://registry.replicated.com/library
66
ADMIN_CONSOLE_CHART_NAME = admin-console
7-
ADMIN_CONSOLE_CHART_VERSION = 1.109.4
7+
ADMIN_CONSOLE_CHART_VERSION = 1.109.4-build.1
88
ADMIN_CONSOLE_IMAGE_OVERRIDE =
99
ADMIN_CONSOLE_MIGRATIONS_IMAGE_OVERRIDE =
1010
EMBEDDED_OPERATOR_CHART_URL = oci://registry.replicated.com/library

cmd/embedded-cluster/install.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,9 @@ func ensureK0sConfig(c *cli.Context) error {
278278
if ab := c.String("airgap-bundle"); ab != "" {
279279
opts = append(opts, addons.WithAirgapBundle(ab))
280280
}
281+
if c.Bool("proxy") {
282+
opts = append(opts, addons.WithProxyFromEnv())
283+
}
281284
if err := config.UpdateHelmConfigs(cfg, opts...); err != nil {
282285
return fmt.Errorf("unable to update helm configs: %w", err)
283286
}
@@ -445,6 +448,11 @@ var installCommand = &cli.Command{
445448
Usage: "Path to the airgap bundle. If set, the installation will be completed without internet access.",
446449
Hidden: true,
447450
},
451+
&cli.BoolFlag{
452+
Name: "proxy",
453+
Usage: "Use the system proxy settings for the install operation. These variables are currently only passed through to Velero and the Admin Console.",
454+
Hidden: true,
455+
},
448456
},
449457
Action: func(c *cli.Context) error {
450458
logrus.Debugf("checking if %s is already installed", binName)

cmd/embedded-cluster/restore.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,9 @@ func ensureK0sConfigForRestore(c *cli.Context) error {
296296
}
297297
cfg := config.RenderK0sConfig()
298298
opts := []addons.Option{}
299+
if c.Bool("proxy") {
300+
opts = append(opts, addons.WithProxyFromEnv())
301+
}
299302
if err := config.UpdateHelmConfigsForRestore(cfg, opts...); err != nil {
300303
return fmt.Errorf("unable to update helm configs: %w", err)
301304
}
@@ -670,6 +673,11 @@ var restoreCommand = &cli.Command{
670673
Usage: "Path to the airgap bundle. If set, the restore will be completed without internet access.",
671674
Hidden: true,
672675
},
676+
&cli.BoolFlag{
677+
Name: "proxy",
678+
Usage: "Use the system proxy settings for the restore operation. These variables are currently only passed through to Velero.",
679+
Hidden: true,
680+
},
673681
},
674682
Before: func(c *cli.Context) error {
675683
if os.Getuid() != 0 {

e2e/cluster/cluster.go

Lines changed: 36 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ type Command struct {
136136
Stdout io.WriteCloser
137137
Stderr io.WriteCloser
138138
RegularUser bool
139+
Env map[string]string
139140
}
140141

141142
// Run runs a command in a node.
@@ -144,11 +145,14 @@ func Run(ctx context.Context, t *testing.T, cmd Command) error {
144145
if err != nil {
145146
t.Fatalf("Failed to connect to LXD: %v", err)
146147
}
147-
var env map[string]string
148+
env := map[string]string{}
148149
var uid uint32
149150
if cmd.RegularUser {
150151
uid = 9999
151-
env = map[string]string{"HOME": "/home/user"}
152+
env["HOME"] = "/home/user"
153+
}
154+
for k, v := range cmd.Env {
155+
env[k] = v
152156
}
153157
req := api.InstanceExecPost{
154158
Command: cmd.Line,
@@ -220,10 +224,14 @@ func NewTestCluster(in *Input) *Output {
220224
if in.CreateRegularUser {
221225
CreateRegularUser(in, out.Proxy)
222226
}
227+
NodeHasInternet(in, out.Proxy)
228+
ConfigureProxy(in)
223229
}
224230
return out
225231
}
226232

233+
const HTTPProxy = "http://10.0.0.254:3128"
234+
227235
// CreateProxy creates a node that attaches to both networks (external and internal),
228236
// once this is done we install squid and configure it to be a proxy. We also make
229237
// sure that all nodes are configured to use the proxy as default gateway. Internet
@@ -284,7 +292,6 @@ func CreateProxy(in *Input) string {
284292
in.T.Fatalf("Failed to get proxy state %s: %v", name, err)
285293
}
286294
}
287-
ConfigureProxy(in)
288295
return name
289296
}
290297

@@ -295,33 +302,32 @@ func CreateProxy(in *Input) string {
295302
func ConfigureProxy(in *Input) {
296303
// starts by installing dependencies, setting up the second network interface ip
297304
// address and configuring iptables to allow dns requests forwarding (nat).
298-
// TODO: this was flaky, so we should find an alternative when we need it.
299-
// proxyName := fmt.Sprintf("node-%s-proxy", in.id)
300-
// for _, cmd := range [][]string{
301-
// {"apt-get", "update", "-y"},
302-
// {"apt-get", "install", "-y", "iptables", "squid"},
303-
// {"ip", "addr", "add", "10.0.0.254/24", "dev", "eth1"},
304-
// {"ip", "link", "set", "eth1", "up"},
305-
// {"sysctl", "-w", "net.ipv4.ip_forward=1"},
306-
// {"iptables", "-t", "nat", "-o", "eth0", "-A", "POSTROUTING", "-p", "udp", "--dport", "53", "-j", "MASQUERADE"},
307-
// } {
308-
// RunCommandOnNode(in, cmd, proxyName)
309-
// }
310-
311-
// // create a simple squid configuration that allows for localnet access. upload it
312-
// // to the proxy in the right location. restart squid to apply the configuration.
313-
// tmpfile, err := os.CreateTemp("", "squid-config-*.conf")
314-
// if err != nil {
315-
// in.T.Fatalf("Failed to create temp file: %v", err)
316-
// }
317-
// defer os.Remove(tmpfile.Name())
318-
// if _, err = tmpfile.WriteString("http_access allow localnet\n"); err != nil {
319-
// in.T.Fatalf("Failed to write to temp file: %v", err)
320-
// }
321-
// file := File{SourcePath: tmpfile.Name(), DestPath: "/etc/squid/conf.d/ec.conf", Mode: 0644}
322-
// tmpfile.Close()
323-
// CopyFileToNode(in, proxyName, file)
324-
// RunCommandOnNode(in, []string{"systemctl", "restart", "squid"}, proxyName)
305+
proxyName := fmt.Sprintf("node-%s-proxy", in.id)
306+
for _, cmd := range [][]string{
307+
{"apt-get", "update", "-y"},
308+
{"apt-get", "install", "-y", "iptables", "squid"},
309+
{"ip", "addr", "add", "10.0.0.254/24", "dev", "eth1"},
310+
{"ip", "link", "set", "eth1", "up"},
311+
{"sysctl", "-w", "net.ipv4.ip_forward=1"},
312+
{"iptables", "-t", "nat", "-o", "eth0", "-A", "POSTROUTING", "-p", "udp", "--dport", "53", "-j", "MASQUERADE"},
313+
} {
314+
RunCommandOnNode(in, cmd, proxyName)
315+
}
316+
317+
// create a simple squid configuration that allows for localnet access. upload it
318+
// to the proxy in the right location. restart squid to apply the configuration.
319+
tmpfile, err := os.CreateTemp("", "squid-config-*.conf")
320+
if err != nil {
321+
in.T.Fatalf("Failed to create temp file: %v", err)
322+
}
323+
defer os.Remove(tmpfile.Name())
324+
if _, err = tmpfile.WriteString("http_access allow localnet\n"); err != nil {
325+
in.T.Fatalf("Failed to write to temp file: %v", err)
326+
}
327+
file := File{SourcePath: tmpfile.Name(), DestPath: "/etc/squid/conf.d/ec.conf", Mode: 0644}
328+
tmpfile.Close()
329+
CopyFileToNode(in, proxyName, file)
330+
RunCommandOnNode(in, []string{"systemctl", "restart", "squid"}, proxyName)
325331

326332
// set the default route on all other nodes to point to the proxy we just created.
327333
// this makes it easier to ensure no internet will work on them other than dns and

e2e/playwright/tests/deploy-airgap-upgrade/test.spec.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ test('deploy airgap upgrade', async ({ page }) => {
88
await page.getByRole('button', { name: 'Deploy', exact: true }).click();
99
await expect(page.locator('.Modal-body')).toBeVisible();
1010
await page.getByRole('button', { name: 'Yes, Deploy' }).click();
11-
await expect(page.locator('#app')).toContainText('Updating cluster', { timeout: 60000 });
11+
await expect(page.locator('#app')).toContainText('Updating cluster', { timeout: 90000 });
1212
await expect(page.locator('.Modal-body')).toContainText('Cluster update in progress', { timeout: 120000 });
1313
await expect(page.locator('#app')).toContainText('Currently deployed version', { timeout: 600000 });
1414
await expect(page.locator('#app')).toContainText('Up to date', { timeout: 30000 });

e2e/restore_test.go

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ func TestSingleNodeAirgapDisasterRecovery(t *testing.T) {
185185
T: t,
186186
Nodes: 1,
187187
Image: "ubuntu/jammy",
188-
WithProxy: false, // TODO figure out how to do some form of airgapping
188+
WithProxy: true,
189189
AirgapInstallBundlePath: airgapInstallBundlePath,
190190
AirgapUpgradeBundlePath: airgapUpgradeBundlePath,
191191
})
@@ -201,18 +201,15 @@ func TestSingleNodeAirgapDisasterRecovery(t *testing.T) {
201201
t.Fatalf("fail to prepare airgap files on node %s: %v", tc.Nodes[0], err)
202202
}
203203
t.Logf("%s: installing embedded-cluster on node 0", time.Now().Format(time.RFC3339))
204-
line = []string{"single-node-airgap-install.sh"}
205-
if _, _, err := RunCommandOnNode(t, tc, 0, line); err != nil {
204+
line = []string{"single-node-airgap-install.sh", "--proxy"}
205+
withEnv := WithEnv(map[string]string{
206+
"HTTP_PROXY": cluster.HTTPProxy,
207+
"HTTPS_PROXY": cluster.HTTPProxy,
208+
"NO_PROXY": "localhost,127.0.0.1,10.96.0.0/12,.svc,.local,.default,kubernetes,kotsadm-rqlite,kotsadm-api-node",
209+
})
210+
if _, _, err := RunCommandOnNode(t, tc, 0, line, withEnv); err != nil {
206211
t.Fatalf("fail to install embedded-cluster on node %s: %v", tc.Nodes[0], err)
207212
}
208-
t.Logf("%s: installing test dependencies on node 0", time.Now().Format(time.RFC3339))
209-
commands := [][]string{
210-
{"apt-get", "update", "-y"},
211-
{"apt-get", "install", "expect", "-y"},
212-
}
213-
if err := RunCommandsOnNode(t, tc, 0, commands); err != nil {
214-
t.Fatalf("fail to install test dependencies on node %s: %v", tc.Nodes[0], err)
215-
}
216213
if err := setupPlaywright(t, tc); err != nil {
217214
t.Fatalf("fail to setup playwright: %v", err)
218215
}
@@ -229,9 +226,26 @@ func TestSingleNodeAirgapDisasterRecovery(t *testing.T) {
229226
if _, _, err := RunCommandOnNode(t, tc, 0, line); err != nil {
230227
t.Fatalf("fail to reset the installation: %v", err)
231228
}
229+
t.Logf("%s: installing test dependencies on node 0", time.Now().Format(time.RFC3339))
230+
commands := [][]string{
231+
{"apt-get", "update", "-y"},
232+
{"apt-get", "install", "expect", "-y"},
233+
}
234+
withEnv = WithEnv(map[string]string{
235+
"http_proxy": cluster.HTTPProxy,
236+
"https_proxy": cluster.HTTPProxy,
237+
})
238+
if err := RunCommandsOnNode(t, tc, 0, commands, withEnv); err != nil {
239+
t.Fatalf("fail to install test dependencies on node %s: %v", tc.Nodes[0], err)
240+
}
232241
t.Logf("%s: restoring the installation", time.Now().Format(time.RFC3339))
233242
line = append([]string{"restore-installation-airgap.exp"}, testArgs...)
234-
if _, _, err := RunCommandOnNode(t, tc, 0, line); err != nil {
243+
withEnv = WithEnv(map[string]string{
244+
"HTTP_PROXY": cluster.HTTPProxy,
245+
"HTTPS_PROXY": cluster.HTTPProxy,
246+
"NO_PROXY": "localhost,127.0.0.1,10.96.0.0/12,.svc,.local,.default,kubernetes,kotsadm-rqlite,kotsadm-api-node",
247+
})
248+
if _, _, err := RunCommandOnNode(t, tc, 0, line, withEnv); err != nil {
235249
t.Fatalf("fail to restore the installation: %v", err)
236250
}
237251
t.Logf("%s: checking installation state after restoring app", time.Now().Format(time.RFC3339))

e2e/scripts/install-playwright.sh

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@ main() {
66
apt-get install -y \
77
ca-certificates \
88
curl \
9-
gnupg \
10-
socat
9+
gnupg
1110

1211
curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg
1312
NODE_MAJOR=20

e2e/scripts/restore-installation-airgap.exp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ set dr_aws_s3_prefix [lindex $argv 3]
1212
set dr_aws_access_key_id [lindex $argv 4]
1313
set dr_aws_secret_access_key [lindex $argv 5]
1414

15-
spawn embedded-cluster restore --airgap-bundle /tmp/release.airgap
15+
spawn embedded-cluster restore --airgap-bundle /tmp/release.airgap --proxy
1616

1717
expect {
1818
"Enter information to configure access to your backup storage location." {}

e2e/scripts/single-node-airgap-install.sh

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,12 @@ check_airgap_pvc() {
125125
}
126126

127127
main() {
128-
if ! embedded-cluster install --no-prompt --license /tmp/license.yaml --airgap-bundle /tmp/release.airgap 2>&1 | tee /tmp/log ; then
128+
local additional_args=
129+
if [ -n "${1:-}" ]; then
130+
additional_args="$1"
131+
echo "Running install with additional args: $additional_args"
132+
fi
133+
if ! embedded-cluster install --no-prompt --license /tmp/license.yaml --airgap-bundle /tmp/release.airgap $additional_args 2>&1 | tee /tmp/log ; then
129134
echo "Failed to install embedded-cluster"
130135
exit 1
131136
fi

e2e/utils.go

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,20 @@ func (b *buffer) Close() error {
2121
return nil
2222
}
2323

24+
type RunCommandOption func(cmd *cluster.Command)
25+
26+
func WithEnv(env map[string]string) RunCommandOption {
27+
return func(cmd *cluster.Command) {
28+
cmd.Env = env
29+
}
30+
}
31+
2432
// RunCommandsOnNode runs a series of commands on a node.
25-
func RunCommandsOnNode(t *testing.T, cl *cluster.Output, node int, cmds [][]string) error {
33+
func RunCommandsOnNode(t *testing.T, cl *cluster.Output, node int, cmds [][]string, opts ...RunCommandOption) error {
2634
for _, cmd := range cmds {
2735
cmdstr := strings.Join(cmd, " ")
2836
t.Logf("running `%s` node %d", cmdstr, node)
29-
_, _, err := RunCommandOnNode(t, cl, node, cmd)
37+
_, _, err := RunCommandOnNode(t, cl, node, cmd, opts...)
3038
if err != nil {
3139
return err
3240
}
@@ -35,38 +43,44 @@ func RunCommandsOnNode(t *testing.T, cl *cluster.Output, node int, cmds [][]stri
3543
}
3644

3745
// RunRegularUserCommandOnNode runs a command on a node as a regular user (not root) with a timeout.
38-
func RunRegularUserCommandOnNode(t *testing.T, cl *cluster.Output, node int, line []string) (string, string, error) {
46+
func RunRegularUserCommandOnNode(t *testing.T, cl *cluster.Output, node int, line []string, opts ...RunCommandOption) (string, string, error) {
3947
stdout := &buffer{bytes.NewBuffer(nil)}
4048
stderr := &buffer{bytes.NewBuffer(nil)}
41-
cmd := cluster.Command{
49+
cmd := &cluster.Command{
4250
Node: cl.Nodes[node],
4351
Line: line,
4452
Stdout: stdout,
4553
Stderr: stderr,
4654
RegularUser: true,
4755
}
56+
for _, fn := range opts {
57+
fn(cmd)
58+
}
4859
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute)
4960
defer cancel()
50-
if err := cluster.Run(ctx, t, cmd); err != nil {
61+
if err := cluster.Run(ctx, t, *cmd); err != nil {
5162
t.Logf("stdout:\n%s\nstderr:%s\n", stdout.String(), stderr.String())
5263
return stdout.String(), stderr.String(), err
5364
}
5465
return stdout.String(), stderr.String(), nil
5566
}
5667

5768
// RunCommandOnNode runs a command on a node with a timeout.
58-
func RunCommandOnNode(t *testing.T, cl *cluster.Output, node int, line []string) (string, string, error) {
69+
func RunCommandOnNode(t *testing.T, cl *cluster.Output, node int, line []string, opts ...RunCommandOption) (string, string, error) {
5970
stdout := &buffer{bytes.NewBuffer(nil)}
6071
stderr := &buffer{bytes.NewBuffer(nil)}
61-
cmd := cluster.Command{
72+
cmd := &cluster.Command{
6273
Node: cl.Nodes[node],
6374
Line: line,
6475
Stdout: stdout,
6576
Stderr: stderr,
6677
}
78+
for _, fn := range opts {
79+
fn(cmd)
80+
}
6781
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute)
6882
defer cancel()
69-
if err := cluster.Run(ctx, t, cmd); err != nil {
83+
if err := cluster.Run(ctx, t, *cmd); err != nil {
7084
t.Logf("stdout:\n%s", stdout.String())
7185
t.Logf("stderr:\n%s", stderr.String())
7286
return stdout.String(), stderr.String(), err
@@ -75,22 +89,25 @@ func RunCommandOnNode(t *testing.T, cl *cluster.Output, node int, line []string)
7589
}
7690

7791
// RunCommandOnProxyNode runs a command on the proxy node with a timeout.
78-
func RunCommandOnProxyNode(t *testing.T, cl *cluster.Output, line []string) (string, string, error) {
92+
func RunCommandOnProxyNode(t *testing.T, cl *cluster.Output, line []string, opts ...RunCommandOption) (string, string, error) {
7993
if cl.Proxy == "" {
8094
return "", "", fmt.Errorf("no proxy node found")
8195
}
8296

8397
stdout := &buffer{bytes.NewBuffer(nil)}
8498
stderr := &buffer{bytes.NewBuffer(nil)}
85-
cmd := cluster.Command{
99+
cmd := &cluster.Command{
86100
Node: cl.Proxy,
87101
Line: line,
88102
Stdout: stdout,
89103
Stderr: stderr,
90104
}
105+
for _, fn := range opts {
106+
fn(cmd)
107+
}
91108
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute)
92109
defer cancel()
93-
if err := cluster.Run(ctx, t, cmd); err != nil {
110+
if err := cluster.Run(ctx, t, *cmd); err != nil {
94111
t.Logf("stdout:\n%s", stdout.String())
95112
t.Logf("stderr:\n%s", stderr.String())
96113
return stdout.String(), stderr.String(), err

0 commit comments

Comments
 (0)