Skip to content

Commit 6d3cc96

Browse files
CR-8961 network test before runtime installation (#250)
* network test first draft * wip * added skip-network-test flag * handle env vars * small fix * generated docs * small refactor of testNetwork * move functions to util * delete pod when test finished * bump * handle defer errors * extracted logic to checkPodLastState * some improvements * small improve * gen docs * refactor reporter * indentation Co-authored-by: Noam Gal <noam.gal@codefresh.io>
1 parent 06b7a47 commit 6d3cc96

File tree

9 files changed

+226
-5
lines changed

9 files changed

+226
-5
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
VERSION=v0.0.227
1+
VERSION=v0.0.228
22

33
OUT_DIR=dist
44
YEAR?=$(shell date +"%Y")

cmd/commands/runtime.go

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ type (
7777
IngressController string
7878
Insecure bool
7979
InstallDemoResources bool
80+
SkipClusterChecks bool
8081
Version *semver.Version
8182
GsCloneOpts *git.CloneOptions
8283
InsCloneOpts *git.CloneOptions
@@ -218,6 +219,7 @@ func NewRuntimeInstallCommand() *cobra.Command {
218219
cmd.Flags().StringVar(&installationOpts.GitIntegrationRegistrationOpts.Token, "personal-git-token", "", "The Personal git token for your user")
219220
cmd.Flags().StringVar(&installationOpts.versionStr, "version", "", "The runtime version to install (default: latest)")
220221
cmd.Flags().BoolVar(&installationOpts.InstallDemoResources, "demo-resources", true, "Installs demo resources (default: true)")
222+
cmd.Flags().BoolVar(&installationOpts.SkipClusterChecks, "skip-cluster-checks", false, "Skips the cluster's checks")
221223
cmd.Flags().DurationVar(&store.Get().WaitTimeout, "wait-timeout", store.Get().WaitTimeout, "How long to wait for the runtime components to be ready")
222224
cmd.Flags().StringVar(&gitIntegrationCreationOpts.APIURL, "provider-api-url", "", "Git provider API url")
223225
cmd.Flags().BoolVar(&store.Get().BypassIngressClassCheck, "bypass-ingress-class-check", false, "Disables the ingress class check during pre-installation")
@@ -227,7 +229,7 @@ func NewRuntimeInstallCommand() *cobra.Command {
227229
})
228230

229231
installationOpts.GsCloneOpts = &git.CloneOptions{
230-
FS: fs.Create(memfs.New()),
232+
FS: fs.Create(memfs.New()),
231233
CreateIfNotExist: true,
232234
}
233235

@@ -783,6 +785,17 @@ func preInstallationChecks(ctx context.Context, opts *RuntimeInstallOptions) err
783785
return fmt.Errorf("existing runtime check failed: %w", err)
784786
}
785787

788+
if !opts.SkipClusterChecks {
789+
err = util.RunNetworkTest(ctx, opts.KubeFactory, cfConfig.GetCurrentContext().URL)
790+
if err != nil {
791+
log.G(ctx).Info("Network test finished successfully")
792+
}
793+
}
794+
handleCliStep(reporter.InstallStepRunPreCheckClusterChecks, "Running cluster checks", err, false)
795+
if err != nil {
796+
return fmt.Errorf(fmt.Sprintf("cluster network tests failed: %v ", err))
797+
}
798+
786799
err = kubeutil.EnsureClusterRequirements(ctx, opts.KubeFactory, opts.RuntimeName)
787800
handleCliStep(reporter.InstallStepRunPreCheckValidateClusterRequirements, "Ensuring cluster requirements", err, false)
788801
if err != nil {

docs/commands/cli-v2_runtime_install.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ cli-v2 runtime install [runtime_name] [flags]
4141
--provider string The git provider, one of: gitea|github|gitlab
4242
--provider-api-url string Git provider API url
4343
--repo string Repository URL [GIT_REPO]
44+
--skip-cluster-checks Skips the cluster's checks
4445
--version string The runtime version to install (default: latest)
4546
--wait-timeout duration How long to wait for the runtime components to be ready (default 8m0s)
4647
```

docs/releases/release_notes.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ cf version
2323

2424
```bash
2525
# download and extract the binary
26-
curl -L --output - https://github.com/codefresh-io/cli-v2/releases/download/v0.0.227/cf-linux-amd64.tar.gz | tar zx
26+
curl -L --output - https://github.com/codefresh-io/cli-v2/releases/download/v0.0.228/cf-linux-amd64.tar.gz | tar zx
2727

2828
# move the binary to your $PATH
2929
mv ./cf-linux-amd64 /usr/local/bin/cf
@@ -36,7 +36,7 @@ cf version
3636

3737
```bash
3838
# download and extract the binary
39-
curl -L --output - https://github.com/codefresh-io/cli-v2/releases/download/v0.0.227/cf-darwin-amd64.tar.gz | tar zx
39+
curl -L --output - https://github.com/codefresh-io/cli-v2/releases/download/v0.0.228/cf-darwin-amd64.tar.gz | tar zx
4040

4141
# move the binary to your $PATH
4242
mv ./cf-darwin-amd64 /usr/local/bin/cf

manifests/runtime.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ metadata:
55
namespace: "{{ namespace }}"
66
spec:
77
defVersion: 1.0.0
8-
version: 0.0.227
8+
version: 0.0.228
99
bootstrapSpecifier: github.com/codefresh-io/cli-v2/manifests/argo-cd
1010
components:
1111
- name: events

pkg/reporter/reporter.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ const (
7575
InstallStepRunPreCheckEnsureCliVersion CliStep = "install.run.pre-check.step.ensure-cli-version"
7676
InstallStepRunPreCheckRuntimeCollision CliStep = "install.run.pre-check.step.runtime-collision"
7777
InstallStepRunPreCheckExisitingRuntimes CliStep = "install.run.pre-check.step.existing-runtimes"
78+
InstallStepRunPreCheckClusterChecks CliStep = "install.run.pre-check.step.cluster-checks"
7879
InstallStepRunPreCheckValidateClusterRequirements CliStep = "install.run.pre-check.step.validate-cluster-requirements"
7980
InstallPhaseRunPreCheckFinish CliStep = "install.run.pre-check.phase.finish"
8081
InstallPhaseStart CliStep = "install.run.phase.start"

pkg/store/store.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,10 @@ type Store struct {
130130
RolloutResourceName string
131131
RolloutReporterServiceAccount string
132132
SegmentWriteKey string
133+
DefaultNamespace string
134+
NetworkTesterName string
135+
NetworkTesterGenerateName string
136+
NetworkTesterImage string
133137
}
134138

135139
// Get returns the global store
@@ -217,6 +221,11 @@ func init() {
217221
s.SegmentWriteKey = segmentWriteKey
218222
s.RequirementsLink = "https://codefresh.io/csdp-docs/docs/runtime/requirements/"
219223
s.DownloadCliLink = "https://codefresh.io/csdp-docs/docs/clients/csdp-cli/"
224+
s.DefaultNamespace = "default"
225+
s.NetworkTesterName = "cf-network-tester"
226+
s.NetworkTesterGenerateName = "cf-network-tester-"
227+
s.NetworkTesterImage = "codefresh/cf-venona-network-tester:latest"
228+
220229
initVersion()
221230
}
222231

pkg/util/kube/kube.go

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"github.com/argoproj-labs/argocd-autopilot/pkg/kube"
2323
"github.com/codefresh-io/cli-v2/pkg/store"
2424
authv1 "k8s.io/api/authorization/v1"
25+
batchv1 "k8s.io/api/batch/v1"
2526
v1 "k8s.io/api/core/v1"
2627
"k8s.io/apimachinery/pkg/api/resource"
2728
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -41,6 +42,16 @@ type (
4142
memorySize string
4243
rbac []rbacValidation
4344
}
45+
46+
LaunchJobOptions struct {
47+
Client kubernetes.Interface
48+
Namespace string
49+
JobName *string
50+
Image *string
51+
Env []v1.EnvVar
52+
RestartPolicy v1.RestartPolicy
53+
BackOffLimit int32
54+
}
4455
)
4556

4657
func EnsureClusterRequirements(ctx context.Context, kubeFactory kube.Factory, namespace string) error {
@@ -202,3 +213,36 @@ func testNode(n v1.Node, req validationRequest) []string {
202213

203214
return result
204215
}
216+
217+
func LaunchJob(ctx context.Context, opts LaunchJobOptions) error {
218+
jobs := opts.Client.BatchV1().Jobs(opts.Namespace)
219+
220+
jobSpec := &batchv1.Job{
221+
ObjectMeta: metav1.ObjectMeta{
222+
Name: *opts.JobName,
223+
Namespace: opts.Namespace,
224+
},
225+
Spec: batchv1.JobSpec{
226+
Template: v1.PodTemplateSpec{
227+
Spec: v1.PodSpec{
228+
Containers: []v1.Container{
229+
{
230+
Name: *opts.JobName,
231+
Image: *opts.Image,
232+
Env: opts.Env,
233+
},
234+
},
235+
RestartPolicy: opts.RestartPolicy,
236+
},
237+
},
238+
BackoffLimit: &opts.BackOffLimit,
239+
},
240+
}
241+
242+
_, err := jobs.Create(ctx, jobSpec, metav1.CreateOptions{})
243+
if err != nil {
244+
return fmt.Errorf("failed to create K8s job '%s' : %w", *opts.JobName, err)
245+
}
246+
247+
return nil
248+
}

pkg/util/util.go

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,20 +15,28 @@
1515
package util
1616

1717
import (
18+
"bytes"
1819
"context"
1920
"fmt"
21+
"io"
2022
"os"
2123
"os/signal"
2224
"regexp"
2325
"strings"
2426
"sync"
2527
"time"
2628

29+
"github.com/argoproj-labs/argocd-autopilot/pkg/kube"
2730
"github.com/briandowns/spinner"
2831
"github.com/codefresh-io/cli-v2/pkg/log"
2932
"github.com/codefresh-io/cli-v2/pkg/reporter"
3033
"github.com/codefresh-io/cli-v2/pkg/store"
34+
kubeutil "github.com/codefresh-io/cli-v2/pkg/util/kube"
35+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3136

37+
v1 "k8s.io/api/core/v1"
38+
kerrors "k8s.io/apimachinery/pkg/api/errors"
39+
"k8s.io/client-go/kubernetes"
3240
"k8s.io/client-go/tools/clientcmd"
3341
)
3442

@@ -188,3 +196,148 @@ func reportCancel(status reporter.CliStepStatus) {
188196
})
189197
}
190198

199+
func RunNetworkTest(ctx context.Context, kubeFactory kube.Factory, urls ...string) error {
200+
const networkTestsTimeout = 120 * time.Second
201+
var testerPodName string
202+
203+
envVars := map[string]string{
204+
"URLS": strings.Join(urls, ","),
205+
"IN_CLUSTER": "1",
206+
}
207+
env := prepareEnvVars(envVars)
208+
209+
client, err := kubeFactory.KubernetesClientSet()
210+
if err != nil {
211+
return fmt.Errorf("failed to create kubernetes client: %w", err)
212+
}
213+
214+
err = kubeutil.LaunchJob(ctx, kubeutil.LaunchJobOptions{
215+
Client: client,
216+
Namespace: store.Get().DefaultNamespace,
217+
JobName: &store.Get().NetworkTesterName,
218+
Image: &store.Get().NetworkTesterImage,
219+
Env: env,
220+
RestartPolicy: v1.RestartPolicyNever,
221+
BackOffLimit: 0,
222+
})
223+
if err != nil {
224+
return err
225+
}
226+
227+
defer func() {
228+
deferErr := client.BatchV1().Jobs(store.Get().DefaultNamespace).Delete(ctx, store.Get().NetworkTesterName, metav1.DeleteOptions{})
229+
if deferErr != nil {
230+
log.G(ctx).Error("fail to delete job resource '%s': %s", store.Get().NetworkTesterName, deferErr.Error())
231+
}
232+
}()
233+
234+
log.G(ctx).Info("Running network test...")
235+
236+
ticker := time.NewTicker(5 * time.Second)
237+
defer ticker.Stop()
238+
var podLastState *v1.Pod
239+
timeoutChan := time.After(networkTestsTimeout)
240+
241+
Loop:
242+
for {
243+
select {
244+
case <-ticker.C:
245+
log.G(ctx).Debug("Waiting for network tester to finish")
246+
247+
if testerPodName == "" {
248+
testerPodName, err = getTesterPodName(ctx, client)
249+
if err != nil {
250+
return err
251+
}
252+
}
253+
254+
pod, err := client.CoreV1().Pods(store.Get().DefaultNamespace).Get(ctx, testerPodName, metav1.GetOptions{})
255+
if err != nil {
256+
if statusError, errIsStatusError := err.(*kerrors.StatusError); errIsStatusError {
257+
if statusError.ErrStatus.Reason == metav1.StatusReasonNotFound {
258+
log.G(ctx).Debug("Network tester pod not found")
259+
}
260+
}
261+
}
262+
if len(pod.Status.ContainerStatuses) == 0 {
263+
log.G(ctx).Debug("Network tester pod: creating container")
264+
continue
265+
}
266+
if pod.Status.ContainerStatuses[0].State.Running != nil {
267+
log.G(ctx).Debug("Network tester pod: running")
268+
}
269+
if pod.Status.ContainerStatuses[0].State.Waiting != nil {
270+
log.G(ctx).Debug("Network tester pod: waiting")
271+
}
272+
if pod.Status.ContainerStatuses[0].State.Terminated != nil {
273+
log.G(ctx).Debug("Network tester pod: terminated")
274+
podLastState = pod
275+
break Loop
276+
}
277+
case <-timeoutChan:
278+
return fmt.Errorf("network test timeout reached!")
279+
}
280+
}
281+
282+
defer func() {
283+
deferErr := client.CoreV1().Pods(store.Get().DefaultNamespace).Delete(ctx, testerPodName, metav1.DeleteOptions{})
284+
if deferErr != nil {
285+
log.G(ctx).Error("fail to delete tester pod '%s': %s", testerPodName, deferErr.Error())
286+
}
287+
}()
288+
289+
return checkPodLastState(ctx, client, testerPodName,podLastState)
290+
}
291+
292+
func prepareEnvVars(vars map[string]string) []v1.EnvVar {
293+
var env []v1.EnvVar
294+
295+
for key, value := range vars {
296+
env = append(env, v1.EnvVar{
297+
Name: key,
298+
Value: value,
299+
})
300+
}
301+
302+
return env
303+
}
304+
305+
func getTesterPodName(ctx context.Context, client kubernetes.Interface) (string, error) {
306+
pods, err := client.CoreV1().Pods(store.Get().DefaultNamespace).List(ctx, metav1.ListOptions{})
307+
if err != nil {
308+
return "", fmt.Errorf("failed to get pods from cluster: %w", err)
309+
}
310+
311+
for _, pod := range pods.Items {
312+
if pod.ObjectMeta.GenerateName == store.Get().NetworkTesterGenerateName {
313+
return pod.ObjectMeta.Name, nil
314+
}
315+
}
316+
317+
return "", nil
318+
}
319+
320+
func checkPodLastState(ctx context.Context, client kubernetes.Interface, name string, podLastState *v1.Pod) error {
321+
req := client.CoreV1().Pods(store.Get().DefaultNamespace).GetLogs(name, &v1.PodLogOptions{})
322+
podLogs, err := req.Stream(ctx)
323+
if err != nil {
324+
return fmt.Errorf("Failed to get network-tester pod logs: %w", err)
325+
}
326+
defer podLogs.Close()
327+
328+
logsBuf := new(bytes.Buffer)
329+
_, err = io.Copy(logsBuf, podLogs)
330+
if err != nil {
331+
return fmt.Errorf("Failed to read network-tester pod logs: %w", err)
332+
}
333+
logs := strings.Trim(logsBuf.String(), "\n")
334+
log.G(ctx).Debug(logs)
335+
336+
if podLastState.Status.ContainerStatuses[0].State.Terminated.ExitCode != 0 {
337+
terminationMessage := strings.Trim(podLastState.Status.ContainerStatuses[0].State.Terminated.Message, "\n")
338+
return fmt.Errorf("Network test failed with: %s", terminationMessage)
339+
}
340+
341+
return nil
342+
}
343+

0 commit comments

Comments
 (0)