Skip to content

Commit 0661b74

Browse files
authored
Wait for SSH to be available on CMX node (#2251)
1 parent 492b101 commit 0661b74

File tree

1 file changed

+27
-6
lines changed

1 file changed

+27
-6
lines changed

e2e/cluster/cmx/cluster.go

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,10 @@ func NewNodes(in *ClusterInput) ([]Node, error) {
111111
}
112112
nodes[i].sshEndpoint = sshEndpoint
113113

114+
if err := waitForSSH(nodes[i]); err != nil {
115+
return nil, fmt.Errorf("wait for ssh to be available on node %d: %v", i, err)
116+
}
117+
114118
privateIP, err := discoverPrivateIP(nodes[i])
115119
if err != nil {
116120
return nil, fmt.Errorf("discover node private IP: %v", err)
@@ -206,6 +210,25 @@ func getSSHEndpoint(nodeID string) (string, error) {
206210
return strings.TrimSpace(string(output)), nil
207211
}
208212

213+
func waitForSSH(node Node) error {
214+
timeout := time.After(5 * time.Minute)
215+
tick := time.Tick(5 * time.Second)
216+
var lastErr error
217+
218+
for {
219+
select {
220+
case <-timeout:
221+
return fmt.Errorf("timed out after 5 minutes: last error: %w", lastErr)
222+
case <-tick:
223+
_, _, err := runCommandOnNode(node, []string{"uptime"})
224+
if err == nil {
225+
return nil
226+
}
227+
lastErr = err
228+
}
229+
}
230+
}
231+
209232
func (c *Cluster) Airgap() error {
210233
// Update network policy to airgap
211234
output, err := exec.Command("replicated", "network", "update", "policy", "--id", c.network.ID, "--policy=airgap").CombinedOutput()
@@ -245,17 +268,15 @@ func (c *Cluster) waitUntilAirgapped(node int) error {
245268
func (c *Cluster) WaitForReboot() {
246269
time.Sleep(30 * time.Second)
247270
for i := range c.Nodes {
248-
c.refreshSSHEndpoint(i)
271+
c.waitForSSH(i)
249272
c.waitForClockSync(i)
250273
}
251274
}
252275

253-
func (c *Cluster) refreshSSHEndpoint(node int) {
254-
sshEndpoint, err := getSSHEndpoint(c.Nodes[node].ID)
255-
if err != nil {
256-
c.t.Fatalf("failed to refresh ssh endpoint for node %d: %v", node, err)
276+
func (c *Cluster) waitForSSH(node int) {
277+
if err := waitForSSH(c.Nodes[node]); err != nil {
278+
c.t.Fatalf("failed to wait for ssh to be available on node %d: %v", node, err)
257279
}
258-
c.Nodes[node].sshEndpoint = sshEndpoint
259280
}
260281

261282
func (c *Cluster) waitForClockSync(node int) {

0 commit comments

Comments
 (0)