Skip to content

Commit 5a5461d

Browse files
authored
feat: wait for coredns and metrics-server, mk2 (#624)
* Revert "Revert "feat: wait for coredns and metrics-server" (#623)" This reverts commit 965a58b. * only wait for metrics server, not coredns * watch for k8s infra status in the background * improve error messages when coredns or metrics server fail to become healthy * fix fmt * move k8s health warnings prints to defer func * store both errors in response channel at once
1 parent ccea8de commit 5a5461d

File tree

3 files changed

+74
-42
lines changed

3 files changed

+74
-42
lines changed

cmd/embedded-cluster/restore.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -684,6 +684,19 @@ var restoreCommand = &cli.Command{
684684
if err := waitForK0s(); err != nil {
685685
return fmt.Errorf("unable to wait for node: %w", err)
686686
}
687+
688+
kcli, err := kubeutils.KubeClient()
689+
if err != nil {
690+
return fmt.Errorf("unable to create kube client: %w", err)
691+
}
692+
errCh := kubeutils.WaitForKubernetes(c.Context, kcli)
693+
defer func() {
694+
for len(errCh) > 0 {
695+
err := <-errCh
696+
logrus.Error(fmt.Errorf("the Kubernetes Infrastructure failed to become ready: %w", err))
697+
}
698+
}()
699+
687700
logrus.Debugf("running outro")
688701
if err := runOutroForRestore(c); err != nil {
689702
return fmt.Errorf("unable to run outro: %w", err)

pkg/addons/applier.go

Lines changed: 9 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,13 @@ package addons
66
import (
77
"context"
88
"fmt"
9-
"time"
10-
119
"github.com/k0sproject/k0s/pkg/apis/k0s/v1beta1"
1210
k0sconfig "github.com/k0sproject/k0s/pkg/apis/k0s/v1beta1"
1311
embeddedclusterv1beta1 "github.com/replicatedhq/embedded-cluster-kinds/apis/v1beta1"
1412
"github.com/replicatedhq/embedded-cluster-kinds/types"
1513
kotsv1beta1 "github.com/replicatedhq/kotskinds/apis/kots/v1beta1"
1614
"github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2"
1715
"github.com/sirupsen/logrus"
18-
corev1 "k8s.io/api/core/v1"
1916
"sigs.k8s.io/controller-runtime/pkg/client"
2017

2118
"github.com/replicatedhq/embedded-cluster/pkg/addons/adminconsole"
@@ -62,6 +59,15 @@ func (a *Applier) Outro(ctx context.Context) error {
6259
if err != nil {
6360
return fmt.Errorf("unable to load addons: %w", err)
6461
}
62+
63+
errCh := kubeutils.WaitForKubernetes(ctx, kcli)
64+
defer func() {
65+
for len(errCh) > 0 {
66+
err := <-errCh
67+
logrus.Error(fmt.Errorf("the Kubernetes Infrastructure failed to become ready: %w", err))
68+
}
69+
}()
70+
6571
for _, addon := range addons {
6672
if err := addon.Outro(ctx, kcli); err != nil {
6773
return err
@@ -319,39 +325,6 @@ func (a *Applier) Versions(additionalCharts []v1beta1.Chart) (map[string]string,
319325
return versions, nil
320326
}
321327

322-
// waitForKubernetes waits until we manage to make a successful connection to the
323-
// Kubernetes API server.
324-
func (a *Applier) waitForKubernetes(ctx context.Context) error {
325-
loading := spinner.Start()
326-
defer func() {
327-
loading.Closef("Kubernetes API server is ready")
328-
}()
329-
kcli, err := kubeutils.KubeClient()
330-
if err != nil {
331-
return fmt.Errorf("unable to create kubernetes client: %w", err)
332-
}
333-
ticker := time.NewTicker(3 * time.Second)
334-
defer ticker.Stop()
335-
counter := 1
336-
loading.Infof("1/n Waiting for Kubernetes API server to be ready")
337-
for {
338-
select {
339-
case <-ticker.C:
340-
case <-ctx.Done():
341-
return ctx.Err()
342-
}
343-
counter++
344-
if err := kcli.List(ctx, &corev1.NamespaceList{}); err != nil {
345-
loading.Infof(
346-
"%d/n Waiting for Kubernetes API server to be ready.",
347-
counter,
348-
)
349-
continue
350-
}
351-
return nil
352-
}
353-
}
354-
355328
func spinForInstallation(ctx context.Context, cli client.Client) error {
356329
installSpin := spinner.Start()
357330
installSpin.Infof("Waiting for additional components to be ready")

pkg/kubeutils/kubeutils.go

Lines changed: 52 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,11 @@ func WaitForNamespace(ctx context.Context, cli client.Client, ns string) error {
4040
return ready, nil
4141
},
4242
); err != nil {
43-
return fmt.Errorf("timed out waiting for namespace %s: %v", ns, lasterr)
43+
if lasterr != nil {
44+
return fmt.Errorf("timed out waiting for namespace %s: %v", ns, lasterr)
45+
} else {
46+
return fmt.Errorf("timed out waiting for namespace %s", ns)
47+
}
4448
}
4549
return nil
4650

@@ -60,7 +64,11 @@ func WaitForDeployment(ctx context.Context, cli client.Client, ns, name string)
6064
return ready, nil
6165
},
6266
); err != nil {
63-
return fmt.Errorf("timed out waiting for %s to deploy: %v", name, lasterr)
67+
if lasterr != nil {
68+
return fmt.Errorf("timed out waiting for %s to deploy: %v", name, lasterr)
69+
} else {
70+
return fmt.Errorf("timed out waiting for %s to deploy", name)
71+
}
6472
}
6573
return nil
6674
}
@@ -79,7 +87,11 @@ func WaitForDaemonset(ctx context.Context, cli client.Client, ns, name string) e
7987
return ready, nil
8088
},
8189
); err != nil {
82-
return fmt.Errorf("timed out waiting for %s to deploy: %v", name, lasterr)
90+
if lasterr != nil {
91+
return fmt.Errorf("timed out waiting for %s to deploy: %v", name, lasterr)
92+
} else {
93+
return fmt.Errorf("timed out waiting for %s to deploy", name)
94+
}
8395
}
8496
return nil
8597
}
@@ -98,7 +110,11 @@ func WaitForService(ctx context.Context, cli client.Client, ns, name string) err
98110
return svc.Spec.ClusterIP != "", nil
99111
},
100112
); err != nil {
101-
return fmt.Errorf("timed out waiting for service %s to have an IP: %v", name, lasterr)
113+
if lasterr != nil {
114+
return fmt.Errorf("timed out waiting for service %s to have an IP: %v", name, lasterr)
115+
} else {
116+
return fmt.Errorf("timed out waiting for service %s to have an IP", name)
117+
}
102118
}
103119
return nil
104120
}
@@ -153,7 +169,11 @@ func WaitForInstallation(ctx context.Context, cli client.Client, writer *spinner
153169
},
154170
); err != nil {
155171
if wait.Interrupted(err) {
156-
return fmt.Errorf("timed out waiting for the installation to finish: %v", lasterr)
172+
if lasterr != nil {
173+
return fmt.Errorf("timed out waiting for the installation to finish: %v", lasterr)
174+
} else {
175+
return fmt.Errorf("timed out waiting for the installation to finish")
176+
}
157177
}
158178
return fmt.Errorf("error waiting for installation: %v", err)
159179
}
@@ -211,7 +231,11 @@ func WaitForNodes(ctx context.Context, cli client.Client) error {
211231
return readynodes == len(nodes.Items), nil
212232
},
213233
); err != nil {
214-
return fmt.Errorf("timed out waiting for nodes to be ready: %v", lasterr)
234+
if lasterr != nil {
235+
return fmt.Errorf("timed out waiting for nodes to be ready: %v", lasterr)
236+
} else {
237+
return fmt.Errorf("timed out waiting for nodes to be ready")
238+
}
215239
}
216240
return nil
217241
}
@@ -262,3 +286,25 @@ func IsDaemonsetReady(ctx context.Context, cli client.Client, ns, name string) (
262286
}
263287
return false, nil
264288
}
289+
290+
// WaitForKubernetes waits for coredns and metrics-server to be ready in kube-system, and returns an error channel.
291+
// if either of them fails to become healthy, an error is returned via the channel.
292+
func WaitForKubernetes(ctx context.Context, cli client.Client) <-chan error {
293+
errch := make(chan error, 2)
294+
295+
go func() {
296+
err := WaitForDeployment(ctx, cli, "kube-system", "coredns")
297+
if err != nil {
298+
errch <- fmt.Errorf("CoreDNS failed to become healthy: %w", err)
299+
}
300+
}()
301+
302+
go func() {
303+
err := WaitForDeployment(ctx, cli, "kube-system", "metrics-server")
304+
if err != nil {
305+
errch <- fmt.Errorf("Metrics Server failed to become healthy: %w", err)
306+
}
307+
}()
308+
309+
return errch
310+
}

0 commit comments

Comments
 (0)