diff --git a/.dockerignore b/.dockerignore index fbb7b5aa8..00d597129 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,4 +1,7 @@ +.github/ +.idea/ +artifacts/ build/ converter/ dashboards/ diff --git a/.github/workflows/istio-tests.yaml b/.github/workflows/istio-tests.yaml index 64d373f96..529d77328 100644 --- a/.github/workflows/istio-tests.yaml +++ b/.github/workflows/istio-tests.yaml @@ -130,6 +130,7 @@ jobs: make deploy ISTIO_VERSION=${{ matrix.istioVersion }} make install-istio make e2e-client-test + make e2e-test make undeploy ISTIO_VERSION=${{ matrix.istioVersion }} make uninstall-istio diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index ecce390d3..ec628eab1 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -97,6 +97,17 @@ jobs: asset_name: coherence-operator.yaml asset_content_type: text/plain + - name: Upload Restricted Release Yaml + id: upload-release-yaml + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ github.event.release.upload_url }} + asset_path: /tmp/coherence-operator/_output/coherence-operator-restricted.yaml + asset_name: coherence-operator-restricted.yaml + asset_content_type: text/plain + - name: Upload Release CRD id: upload-release-crd uses: actions/upload-release-asset@v1 diff --git a/.gitignore b/.gitignore index bbec88481..e5ec9676e 100644 --- a/.gitignore +++ b/.gitignore @@ -26,6 +26,10 @@ bin/ bundle/ temp/ +# OpenShift pre-flight +artifacts/ +preflight.log + # licensed .licenses/ meta/ diff --git a/Makefile b/Makefile index ae08d7c9a..34ecc1180 100644 --- a/Makefile +++ b/Makefile @@ -773,6 +773,7 @@ copyright: ## Check copyright headers @java -cp hack/glassfish-copyright-maven-plugin-2.1.jar \ org.glassfish.copyright.Copyright -C hack/copyright.txt \ -X .adoc \ + -X artifacts/ \ -X bin/ \ -X build/ \ -X clientset/ \ @@ -812,6 +813,7 @@ copyright: ## Check copyright headers -X mvnw \ -X mvnw.cmd \ -X .png \ + -X preflight.log \ -X PROJECT \ -X .sh \ -X tanzu/package/package.yml \ @@ -1612,6 +1614,9 @@ $(BUILD_MANIFESTS_PKG): $(TOOLS_BIN)/kustomize $(TOOLS_BIN)/yq cp config/namespace/namespace.yaml $(BUILD_OUTPUT)/coherence-operator.yaml $(KUSTOMIZE) build $(BUILD_DEPLOY)/default >> $(BUILD_OUTPUT)/coherence-operator.yaml $(SED) -e 's/name: coherence-operator-env-vars-.*/name: coherence-operator-env-vars/g' $(BUILD_OUTPUT)/coherence-operator.yaml + $(KUSTOMIZE) build $(BUILD_DEPLOY)/overlays/restricted >> $(BUILD_OUTPUT)/coherence-operator-restricted.yaml + $(SED) -e 's/name: coherence-operator-env-vars-.*/name: coherence-operator-env-vars/g' $(BUILD_OUTPUT)//coherence-operator-restricted.yaml + $(SED) -e 's/ClusterRole/Role/g' $(BUILD_OUTPUT)//coherence-operator-restricted.yaml cd $(BUILD_MANIFESTS)/crd && $(TOOLS_BIN)/yq --no-doc -s '.metadata.name + ".yaml"' temp.yaml rm $(BUILD_MANIFESTS)/crd/temp.yaml mv $(BUILD_MANIFESTS)/crd/coherence.coherence.oracle.com.yaml $(BUILD_MANIFESTS)/crd/coherence.oracle.com_coherence.yaml @@ -2404,15 +2409,23 @@ uninstall-metallb: ## Uninstall MetalLB # ---------------------------------------------------------------------------------------------------------------------- .PHONY: install-istio install-istio: delete-istio-config get-istio ## Install the latest version of Istio into k8s (or override the version using the ISTIO_VERSION env var) +ifeq (true,$(ISTIO_USE_CONFIG)) $(ISTIO_HOME)/bin/istioctl install -f $(BUILD_OUTPUT)/istio-config.yaml -y kubectl -n istio-system wait --for condition=available deployment.apps/istiod-$(ISTIO_REVISION) +else + $(ISTIO_HOME)/bin/istioctl install --set profile=demo -y + kubectl -n istio-system wait --for condition=available deployment.apps/istiod +endif kubectl -n istio-system wait --for condition=available deployment.apps/istio-ingressgateway kubectl -n istio-system wait --for condition=available deployment.apps/istio-egressgateway kubectl apply -f $(SCRIPTS_DIR)/istio-strict.yaml kubectl -n $(OPERATOR_NAMESPACE) apply -f $(SCRIPTS_DIR)/istio-operator.yaml kubectl label namespace $(OPERATOR_NAMESPACE) istio-injection=enabled --overwrite=true + kubectl label namespace $(OPERATOR_NAMESPACE) istio.io/rev=$(ISTIO_REVISION) --overwrite=true kubectl label namespace $(OPERATOR_NAMESPACE_CLIENT) istio-injection=enabled --overwrite=true + kubectl label namespace $(OPERATOR_NAMESPACE_CLIENT) istio.io/rev=$(ISTIO_REVISION) --overwrite=true kubectl label namespace $(CLUSTER_NAMESPACE) istio-injection=enabled --overwrite=true + kubectl label namespace $(CLUSTER_NAMESPACE) istio.io/rev=$(ISTIO_REVISION) --overwrite=true kubectl apply -f $(ISTIO_HOME)/samples/addons # ---------------------------------------------------------------------------------------------------------------------- diff --git a/api/v1/coherenceresourcespec_types.go b/api/v1/coherenceresourcespec_types.go index edf815d7a..c4a811824 100644 --- a/api/v1/coherenceresourcespec_types.go +++ b/api/v1/coherenceresourcespec_types.go @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, 2024, Oracle and/or its affiliates. + * Copyright (c) 2020, 2025, Oracle and/or its affiliates. * Licensed under the Universal Permissive License v 1.0 as shown at * http://oss.oracle.com/licenses/upl. */ @@ -671,20 +671,16 @@ func (in *CoherenceResourceSpec) CreatePodTemplateSpec(deployment CoherenceResou podLabels[k] = v } - var annotations map[string]string + annotations := make(map[string]string) + // Add the default Istio config annotation. + // Adding this first allows it to be overridden in Coherence or CoherenceJob spec + annotations[AnnotationIstioConfig] = DefaultIstioConfigAnnotationValue + globalAnnotations := deployment.CreateGlobalAnnotations() - if globalAnnotations != nil { - if annotations == nil { - annotations = make(map[string]string) - } - for k, v := range globalAnnotations { - annotations[k] = v - } + for k, v := range globalAnnotations { + annotations[k] = v } if in.Annotations != nil { - if annotations == nil { - annotations = make(map[string]string) - } for k, v := range in.Annotations { annotations[k] = v } diff --git a/api/v1/common_test.go b/api/v1/common_test.go index bbd10700e..42f01447c 100644 --- a/api/v1/common_test.go +++ b/api/v1/common_test.go @@ -450,9 +450,13 @@ func createMinimalExpectedPodSpec(deployment coh.CoherenceResource) corev1.PodTe initContainer.Image = *operatorImage } + annotations := make(map[string]string) + annotations[coh.AnnotationIstioConfig] = coh.DefaultIstioConfigAnnotationValue + podTemplate := corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ - Labels: podLabels, + Labels: podLabels, + Annotations: annotations, }, Spec: corev1.PodSpec{ InitContainers: []corev1.Container{initContainer}, diff --git a/api/v1/constants.go b/api/v1/constants.go index e1d2f7d74..37ce40ef4 100644 --- a/api/v1/constants.go +++ b/api/v1/constants.go @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, 2024, Oracle and/or its affiliates. + * Copyright (c) 2020, 2025, Oracle and/or its affiliates. * Licensed under the Universal Permissive License v 1.0 as shown at * http://oss.oracle.com/licenses/upl. */ @@ -64,6 +64,12 @@ const ( AnnotationFeatureSuspend = "com.oracle.coherence.operator/feature.suspend" // AnnotationOperatorVersion is the Operator version annotations AnnotationOperatorVersion = "com.oracle.coherence.operator/version" + // AnnotationIstioConfig is the Istio config annotation applied to Pods. + AnnotationIstioConfig = "proxy.istio.io/config" + // DefaultIstioConfigAnnotationValue is the default for the istio config annotation. + // This makes the Istio Sidecar the first container in the Pod to allow it to ideally + // be started before the Coherence container + DefaultIstioConfigAnnotationValue = "{ \"holdApplicationUntilProxyStarts\": true }" // DefaultServiceAccount is the default k8s service account name. DefaultServiceAccount = "default" diff --git a/config/manager/no-jobs-patch.yaml b/config/manager/no-jobs-patch.yaml new file mode 100644 index 000000000..29b507ea5 --- /dev/null +++ b/config/manager/no-jobs-patch.yaml @@ -0,0 +1,4 @@ +- op: add + path: /spec/template/spec/containers/0/args/- + value: + - --install-job-crd=false diff --git a/config/overlays/restricted/cluster_role.yaml b/config/overlays/restricted/cluster_role.yaml new file mode 100644 index 000000000..c8ef6b4c8 --- /dev/null +++ b/config/overlays/restricted/cluster_role.yaml @@ -0,0 +1,9 @@ +# ------------------------------------------------------------- +# This is the Cluster Roles required by the Coherence Operator +# to self-manage its CRDs and Web-Hooks. +# ------------------------------------------------------------- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: crd-webhook-install-role +$patch: delete diff --git a/config/overlays/restricted/cluster_role_binding.yaml b/config/overlays/restricted/cluster_role_binding.yaml new file mode 100644 index 000000000..08c14b10b --- /dev/null +++ b/config/overlays/restricted/cluster_role_binding.yaml @@ -0,0 +1,5 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: crd-webhook-install-rolebinding +$patch: delete diff --git a/config/overlays/restricted/kustomization.yaml b/config/overlays/restricted/kustomization.yaml new file mode 100644 index 000000000..6317684a4 --- /dev/null +++ b/config/overlays/restricted/kustomization.yaml @@ -0,0 +1,15 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - ../../default + +patches: + - path: single-namespace-patch.yaml + target: + kind: Deployment + name: controller-manager + - path: node-viewer-role.yaml + - path: node_viewer_role_binding.yaml + - path: cluster_role.yaml + - path: cluster_role_binding.yaml diff --git a/config/overlays/restricted/node-viewer-role.yaml b/config/overlays/restricted/node-viewer-role.yaml new file mode 100644 index 000000000..4dfb96767 --- /dev/null +++ b/config/overlays/restricted/node-viewer-role.yaml @@ -0,0 +1,5 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: node-viewer-role +$patch: delete diff --git a/config/overlays/restricted/node_viewer_role_binding.yaml b/config/overlays/restricted/node_viewer_role_binding.yaml new file mode 100644 index 000000000..9e035b494 --- /dev/null +++ b/config/overlays/restricted/node_viewer_role_binding.yaml @@ -0,0 +1,5 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: node-viewer-rolebinding +$patch: delete diff --git a/config/overlays/restricted/single-namespace-patch.yaml b/config/overlays/restricted/single-namespace-patch.yaml new file mode 100644 index 000000000..9d58ad9a9 --- /dev/null +++ b/config/overlays/restricted/single-namespace-patch.yaml @@ -0,0 +1,16 @@ +- op: add + path: /spec/template/spec/containers/0/env/- + value: + name: WATCH_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace +- op: add + path: /spec/template/spec/containers/0/args/- + value: --enable-webhook=false +- op: add + path: /spec/template/spec/containers/0/args/- + value: --install-crd=false +- op: add + path: /spec/template/spec/containers/0/args/- + value: --node-lookup-enabled=false diff --git a/controllers/job/job_controller.go b/controllers/job/job_controller.go index 0a65d575b..c6b128063 100644 --- a/controllers/job/job_controller.go +++ b/controllers/job/job_controller.go @@ -430,7 +430,7 @@ func (in *ReconcileJob) maybeExecuteProbe(ctx context.Context, job *batchv1.Job, probeStatus := status.FindJobProbeStatus(name) podCondition := in.findPodReadyCondition(pod) if in.shouldExecuteProbe(probeStatus, podCondition) { - _, err := p.RunProbe(ctx, pod, &action.Probe) + _, err := p.RunProbe(ctx, pod, deployment.GetWkaServiceName(), &action.Probe) if err == nil { logger.Info(fmt.Sprintf("Executed probe using pod %s", name), "Error", "nil") probeStatus.Success = ptr.To(true) diff --git a/controllers/statefulset/statefulset_controller.go b/controllers/statefulset/statefulset_controller.go index 45f9b595f..b0212f792 100644 --- a/controllers/statefulset/statefulset_controller.go +++ b/controllers/statefulset/statefulset_controller.go @@ -209,7 +209,7 @@ func (in *ReconcileStatefulSet) execActions(ctx context.Context, sts *appsv1.Sta for _, action := range spec.Actions { if action.Probe != nil { - if ok := coherenceProbe.ExecuteProbe(ctx, sts, action.Probe); !ok { + if ok := coherenceProbe.ExecuteProbe(ctx, sts, deployment.GetWkaServiceName(), action.Probe); !ok { log.Info("Action probe execution failed.", "probe", action.Probe) } } @@ -367,7 +367,7 @@ func (in *ReconcileStatefulSet) patchStatefulSet(ctx context.Context, deployment return reconcile.Result{}, nil } - return strategy.RollingUpgrade(ctx, current, in.GetClientSet().KubeClient) + return strategy.RollingUpgrade(ctx, current, deployment.GetWkaServiceName(), in.GetClientSet().KubeClient) } } diff --git a/controllers/statefulset/upgrade_strategy.go b/controllers/statefulset/upgrade_strategy.go index 3ce45501b..344dbc4b9 100644 --- a/controllers/statefulset/upgrade_strategy.go +++ b/controllers/statefulset/upgrade_strategy.go @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, 2024, Oracle and/or its affiliates. + * Copyright (c) 2020, 2025, Oracle and/or its affiliates. * Licensed under the Universal Permissive License v 1.0 as shown at * http://oss.oracle.com/licenses/upl. */ @@ -21,8 +21,15 @@ import ( ) type UpgradeStrategy interface { + // IsOperatorManaged returns true if this strategy requires the Operator to manage the upgrade IsOperatorManaged() bool - RollingUpgrade(context.Context, *appsv1.StatefulSet, kubernetes.Interface) (reconcile.Result, error) + // RollingUpgrade performs the rolling upgrade + // Parameters: + // context.Context - the Go context to use + // *appsv1.StatefulSet - a pointer to the StatefulSet to upgrade + // string - the name of the WKA service + // kubernetes.Interface - the K8s client + RollingUpgrade(context.Context, *appsv1.StatefulSet, string, kubernetes.Interface) (reconcile.Result, error) } func GetUpgradeStrategy(c coh.CoherenceResource, p probe.CoherenceProbe) UpgradeStrategy { @@ -66,7 +73,7 @@ var _ UpgradeStrategy = ByPodUpgradeStrategy{} type ByPodUpgradeStrategy struct { } -func (in ByPodUpgradeStrategy) RollingUpgrade(context.Context, *appsv1.StatefulSet, kubernetes.Interface) (reconcile.Result, error) { +func (in ByPodUpgradeStrategy) RollingUpgrade(context.Context, *appsv1.StatefulSet, string, kubernetes.Interface) (reconcile.Result, error) { return reconcile.Result{}, nil } @@ -81,7 +88,7 @@ var _ UpgradeStrategy = ManualUpgradeStrategy{} type ManualUpgradeStrategy struct { } -func (in ManualUpgradeStrategy) RollingUpgrade(context.Context, *appsv1.StatefulSet, kubernetes.Interface) (reconcile.Result, error) { +func (in ManualUpgradeStrategy) RollingUpgrade(context.Context, *appsv1.StatefulSet, string, kubernetes.Interface) (reconcile.Result, error) { return reconcile.Result{}, nil } @@ -98,8 +105,8 @@ type ByNodeUpgradeStrategy struct { scalingProbe *coh.Probe } -func (in ByNodeUpgradeStrategy) RollingUpgrade(ctx context.Context, sts *appsv1.StatefulSet, c kubernetes.Interface) (reconcile.Result, error) { - return rollingUpgrade(in.cp, in.scalingProbe, &PodNodeName{}, "NodeName", ctx, sts, c) +func (in ByNodeUpgradeStrategy) RollingUpgrade(ctx context.Context, sts *appsv1.StatefulSet, svc string, c kubernetes.Interface) (reconcile.Result, error) { + return rollingUpgrade(in.cp, in.scalingProbe, &PodNodeName{}, "NodeName", ctx, sts, svc, c) } func (in ByNodeUpgradeStrategy) IsOperatorManaged() bool { @@ -116,8 +123,8 @@ type ByNodeLabelUpgradeStrategy struct { label string } -func (in ByNodeLabelUpgradeStrategy) RollingUpgrade(ctx context.Context, sts *appsv1.StatefulSet, c kubernetes.Interface) (reconcile.Result, error) { - return rollingUpgrade(in.cp, in.scalingProbe, &PodNodeLabel{Label: in.label}, in.label, ctx, sts, c) +func (in ByNodeLabelUpgradeStrategy) RollingUpgrade(ctx context.Context, sts *appsv1.StatefulSet, svc string, c kubernetes.Interface) (reconcile.Result, error) { + return rollingUpgrade(in.cp, in.scalingProbe, &PodNodeLabel{Label: in.label}, in.label, ctx, sts, svc, c) } func (in ByNodeLabelUpgradeStrategy) IsOperatorManaged() bool { @@ -170,7 +177,7 @@ func (p *PodNodeLabel) GetNodeId(ctx context.Context, c kubernetes.Interface, po // ----- helper methods ---------------------------------------------------------------------------- -func rollingUpgrade(cp probe.CoherenceProbe, scalingProbe *coh.Probe, fn PodNodeIdSupplier, idName string, ctx context.Context, sts *appsv1.StatefulSet, c kubernetes.Interface) (reconcile.Result, error) { +func rollingUpgrade(cp probe.CoherenceProbe, scalingProbe *coh.Probe, fn PodNodeIdSupplier, idName string, ctx context.Context, sts *appsv1.StatefulSet, svc string, c kubernetes.Interface) (reconcile.Result, error) { var err error var replicas int32 @@ -263,7 +270,7 @@ func rollingUpgrade(cp probe.CoherenceProbe, scalingProbe *coh.Probe, fn PodNode // We have Pods to be upgraded nodeId, _ := fn.GetNodeId(ctx, c, pods.Items[0]) // Check Pods are "safe" - if cp.ExecuteProbeForSubSetOfPods(ctx, sts, scalingProbe, pods, podsToUpdate) { + if cp.ExecuteProbeForSubSetOfPods(ctx, sts, svc, scalingProbe, pods, podsToUpdate) { // delete the Pods log.Info("Upgrading all Pods for Node identifier", "Namespace", sts.Namespace, "Name", sts.Name, "NodeId", idName, "IdValue", nodeId, "Count", len(podsToUpdate.Items)) err = deletePods(ctx, podsToUpdate, c) diff --git a/docs/installation/01_installation.adoc b/docs/installation/01_installation.adoc index 0a3bcfc47..ceb7e854e 100644 --- a/docs/installation/01_installation.adoc +++ b/docs/installation/01_installation.adoc @@ -21,6 +21,8 @@ easily be installed into a Kubernetes cluster. * <> * Installation Options ** <> +*** <> +*** <> ** <> *** <> *** <> @@ -174,9 +176,56 @@ Then download with: kubectl apply -f https://github.com/oracle/coherence-operator/releases/download/${VERSION}/coherence-operator.yaml ---- +[#manifest-restrict] +=== Installing Without Cluster Roles + +The default install for the Operator is to have one Operator deployment that manages all Coherence resources across +all the namespaces in a Kubernetes cluster. This requires the Operator to have cluster role RBAC permissions +to manage and monitor all the resources. + +Sometimes, for security reasons or for example in a shared Kubernetes cluster this is not desirable. +The Operator can therefore be installed with plain namespaced scoped roles and role bindings. +The Operator release includes a single yaml file named `coherence-operator-restricted.yaml` that may be used to install +the Operator into a single namespace without any cluster roles. + +The Operator installed with this yaml + +* will not install the CRDs +* will not use WebHooks +* wil not look-up Node labels for Coherence site and rack configurations + +[source,bash] +---- +kubectl apply -f https://github.com/oracle/coherence-operator/releases/download/v3.4.2/coherence-operator-restricted.yaml +---- + +[IMPORTANT] +==== +When installing the Operator in a restricted mode, the CRDs must have already been manually installed into +the Kubernetes cluster. +==== + +[#manual-crd] +=== Manually Install the CRDs + +Although by default the Operator will install its CRDs, they can be manually installed into Kubernetes. +This may be required where the Operator is running with restricted permissions as described above. + +The Operator release artifacts include small versions of the two CRDs which can be installed with the following commands: + +[source,bash] +---- +kubectl apply -f https://github.com/oracle/coherence-operator/releases/download/v3.4.2/coherence.oracle.com_coherence_small.yaml +kubectl apply -f https://github.com/oracle/coherence-operator/releases/download/v3.4.2/coherencejob.oracle.com_coherence_small.yaml +---- + +The small versions of the CRDs are identical to the full versions but hav a cut down OpenAPI spec with a lot of comments +removed so that the CRDs are small enough to be installed with `kubectl apply` + === Change the Operator Replica Count -When installing with single manifest yaml file, the replica count can be changed by editing the yaml file itself to change the occurrence of `replicas: 3` in the manifest yaml to `replicas: 1` +When installing with single manifest yaml file, the replica count can be changed by editing the yaml file itself +to change the occurrence of `replicas: 3` in the manifest yaml to `replicas: 1` For example, this could be done using `sed` [source,bash] diff --git a/hack/label-nodes.sh b/hack/label-nodes.sh new file mode 100755 index 000000000..c8f3ddcfe --- /dev/null +++ b/hack/label-nodes.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +# +# Copyright (c) 2020, 2025, Oracle and/or its affiliates. +# Licensed under the Universal Permissive License v 1.0 as shown at +# http://oss.oracle.com/licenses/upl. +# + +NODES=$(kubectl get nodes -o name) +for NODE in $NODES; do + kubectl label $NODE topology.kubernetes.io/zone=twighlght-zone --overwrite + kubectl label $NODE topology.kubernetes.io/region=AYT --overwrite + kubectl label $NODE oci.oraclecloud.com/fault-domain=fd-one --overwrite + kubectl label $NODE coherence.oracle.com/test=test-one --overwrite +done diff --git a/pkg/probe/http.go b/pkg/probe/http.go index 8e00286b8..c4f5e3eb7 100644 --- a/pkg/probe/http.go +++ b/pkg/probe/http.go @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, 2023, Oracle and/or its affiliates. + * Copyright (c) 2020, 2025, Oracle and/or its affiliates. * Licensed under the Universal Permissive License v 1.0 as shown at * http://oss.oracle.com/licenses/upl. */ @@ -69,6 +69,7 @@ func DoHTTPProbe(url *url.URL, headers http.Header, client GetHTTPInterface) (Re if headers.Get("Host") != "" { req.Host = headers.Get("Host") } + log.Info("Executing HTTP Probe", "URL", url.String(), "Headers", headers) res, err := client.Do(req) if err != nil { // Convert errors into failures to catch timeouts. diff --git a/pkg/probe/probe.go b/pkg/probe/probe.go index 91fba0ac1..852c85ba0 100644 --- a/pkg/probe/probe.go +++ b/pkg/probe/probe.go @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, 2024, Oracle and/or its affiliates. + * Copyright (c) 2020, 2025, Oracle and/or its affiliates. * Licensed under the Universal Permissive License v 1.0 as shown at * http://oss.oracle.com/licenses/upl. */ @@ -53,7 +53,7 @@ func (in *CoherenceProbe) SetGetPodHostName(fn func(pod corev1.Pod) string) { in.getPodHostName = fn } -func (in *CoherenceProbe) GetPodHostName(pod corev1.Pod) string { +func (in *CoherenceProbe) GetPodIpOrHostName(pod corev1.Pod) string { hostName, found := pod.Labels[operator.LabelTestHostName] if found { return hostName @@ -67,6 +67,17 @@ func (in *CoherenceProbe) GetPodHostName(pod corev1.Pod) string { return in.getPodHostName(pod) } +func (in *CoherenceProbe) GetPodHostName(pod corev1.Pod, svc string) string { + hostName, found := pod.Labels[operator.LabelTestHostName] + if found { + return hostName + } + if in.getPodHostName != nil { + return in.getPodHostName(pod) + } + return fmt.Sprintf("%s.%s.%s", pod.Name, svc, pod.Namespace) +} + func (in *CoherenceProbe) SetTranslatePort(fn func(name string, port int) int) { if in == nil { return @@ -92,7 +103,7 @@ func (in *CoherenceProbe) IsStatusHA(ctx context.Context, deployment coh.Coheren spec, found := deployment.GetStatefulSetSpec() if found { p := spec.GetScalingProbe() - return in.ExecuteProbe(ctx, sts, p) + return in.ExecuteProbe(ctx, sts, deployment.GetWkaServiceName(), p) } return true } @@ -145,7 +156,7 @@ func (in *CoherenceProbe) SuspendServices(ctx context.Context, deployment coh.Co } log.Info("Suspending Coherence services in StatefulSet "+sts.Name, "Namespace", ns, "Name", name) - if in.ExecuteProbe(ctx, sts, stsSpec.GetSuspendProbe()) { + if in.ExecuteProbe(ctx, sts, deployment.GetWkaServiceName(), stsSpec.GetSuspendProbe()) { return ServiceSuspendSuccessful } return ServiceSuspendFailed @@ -161,16 +172,16 @@ func (in *CoherenceProbe) GetPodsForStatefulSet(ctx context.Context, sts *appsv1 return pods, err } -func (in *CoherenceProbe) ExecuteProbe(ctx context.Context, sts *appsv1.StatefulSet, probe *coh.Probe) bool { +func (in *CoherenceProbe) ExecuteProbe(ctx context.Context, sts *appsv1.StatefulSet, svc string, probe *coh.Probe) bool { pods, err := in.GetPodsForStatefulSet(ctx, sts) if err != nil { log.Error(err, "Error getting list of Pods for StatefulSet "+sts.Name) return false } - return in.ExecuteProbeForSubSetOfPods(ctx, sts, probe, pods, pods) + return in.ExecuteProbeForSubSetOfPods(ctx, sts, svc, probe, pods, pods) } -func (in *CoherenceProbe) ExecuteProbeForSubSetOfPods(ctx context.Context, sts *appsv1.StatefulSet, probe *coh.Probe, stsPods, pods corev1.PodList) bool { +func (in *CoherenceProbe) ExecuteProbeForSubSetOfPods(ctx context.Context, sts *appsv1.StatefulSet, svc string, probe *coh.Probe, stsPods, pods corev1.PodList) bool { logger := log.WithValues("Namespace", sts.GetNamespace(), "Name", sts.GetName()) // All Pods must be in the Running Phase @@ -200,7 +211,7 @@ func (in *CoherenceProbe) ExecuteProbeForSubSetOfPods(ctx context.Context, sts * log.Info("Using pod " + pod.Name + " to execute probe") } - ha, err := in.RunProbe(ctx, pod, probe) + ha, err := in.RunProbe(ctx, pod, svc, probe) if err == nil { log.Info(fmt.Sprintf("Executed probe using pod %s result=%t", pod.Name, ha)) return ha @@ -228,12 +239,12 @@ func (in *CoherenceProbe) IsPodReady(pod corev1.Pod) (bool, string) { return false, string(pod.Status.Phase) } -func (in *CoherenceProbe) RunProbe(ctx context.Context, pod corev1.Pod, handler *coh.Probe) (bool, error) { +func (in *CoherenceProbe) RunProbe(ctx context.Context, pod corev1.Pod, svc string, handler *coh.Probe) (bool, error) { switch { case handler.Exec != nil: return in.ProbeUsingExec(ctx, pod, handler) case handler.HTTPGet != nil: - return in.ProbeUsingHTTP(pod, handler) + return in.ProbeUsingHTTP(pod, svc, handler) case handler.TCPSocket != nil: return in.ProbeUsingTCP(pod, handler) default: @@ -262,12 +273,12 @@ func (in *CoherenceProbe) ProbeUsingExec(ctx context.Context, pod corev1.Pod, ha return exitCode == 0, nil } -func (in *CoherenceProbe) ProbeUsingHTTP(pod corev1.Pod, handler *coh.Probe) (bool, error) { +func (in *CoherenceProbe) ProbeUsingHTTP(pod corev1.Pod, svc string, handler *coh.Probe) (bool, error) { var ( - scheme corev1.URIScheme - host string - port int - path string + scheme corev1.URIScheme + hostOrIP string + port int + path string ) action := handler.HTTPGet @@ -279,9 +290,9 @@ func (in *CoherenceProbe) ProbeUsingHTTP(pod corev1.Pod, handler *coh.Probe) (bo } if action.Host == "" { - host = in.GetPodHostName(pod) + hostOrIP = in.GetPodIpOrHostName(pod) } else { - host = action.Host + hostOrIP = action.Host } port, err := in.findPort(pod, action.Port) @@ -295,12 +306,14 @@ func (in *CoherenceProbe) ProbeUsingHTTP(pod corev1.Pod, handler *coh.Probe) (bo path = action.Path } - u, err := url.Parse(fmt.Sprintf("%s://%s:%d/%s", scheme, host, port, path)) + u, err := url.Parse(fmt.Sprintf("%s://%s:%d/%s", scheme, hostOrIP, port, path)) if err != nil { return false, err } header := http.Header{} + header.Set("Host", in.GetPodHostName(pod, svc)) + if action.HTTPHeaders != nil { for _, h := range action.HTTPHeaders { hh, found := header[h.Name] @@ -315,7 +328,7 @@ func (in *CoherenceProbe) ProbeUsingHTTP(pod corev1.Pod, handler *coh.Probe) (bo p := NewHTTPProbe() result, s, err := p.Probe(u, header, handler.GetTimeout()) - log.Info(fmt.Sprintf("HTTP Probe URL: %s result=%v msg=%s error=%v", u.String(), result, s, err)) + log.Info("Executed HTTP Probe", "URL", u, "Result", fmt.Sprintf("%v", result), "Msg", s, "Error", err) return result == Success, err } @@ -329,7 +342,7 @@ func (in *CoherenceProbe) ProbeUsingTCP(pod corev1.Pod, handler *coh.Probe) (boo action := handler.TCPSocket if action.Host == "" { - host = in.GetPodHostName(pod) + host = in.GetPodIpOrHostName(pod) } else { host = action.Host } diff --git a/pkg/runner/runner.go b/pkg/runner/runner.go index 392aba4c1..0a8da4101 100644 --- a/pkg/runner/runner.go +++ b/pkg/runner/runner.go @@ -21,6 +21,7 @@ import ( "io" "k8s.io/apimachinery/pkg/api/resource" "net/http" + "net/url" "os" "os/exec" "path/filepath" @@ -905,19 +906,36 @@ func httpGetWithBackoff(url string, details *RunDetails) string { // Do a http get for the specified url and return the response body for // a 200 response or empty string for a non-200 response or error. -func httpGet(url string, client http.Client) (string, int, error) { - log.Info("Performing http get", "url", url) +func httpGet(urlString string, client http.Client) (string, int, error) { + log.Info("Performing http get", "url", urlString) - resp, err := client.Get(url) + u, err := url.Parse(urlString) if err != nil { - return "", http.StatusInternalServerError, errors.Wrapf(err, "failed to get URL %s", url) + return "", http.StatusInternalServerError, errors.Wrapf(err, "failed to parse URL %s", urlString) + } + + req, err := http.NewRequest("GET", urlString, nil) + if err != nil { + return "", http.StatusInternalServerError, errors.Wrapf(err, "failed to create request for URL %s", urlString) + } + + req.Host = u.Host + + h := http.Header{} + h.Set("Host", u.Host) + h.Set("User-Agent", fmt.Sprintf("coherence-operator-runner/%s", operator.GetVersion())) + req.Header = h + + resp, err := client.Do(req) + if err != nil { + return "", http.StatusInternalServerError, errors.Wrapf(err, "failed to get URL %s", urlString) } //noinspection GoUnhandledErrorResult defer resp.Body.Close() body, err := io.ReadAll(resp.Body) if err != nil { - return "", resp.StatusCode, errors.Wrapf(err, "failed to read response body from URL %s", url) + return "", resp.StatusCode, errors.Wrapf(err, "failed to read response body from URL %s", urlString) } s := string(body)