From d1ed0d4ccfa741d5215ac0c14e97499861fea691 Mon Sep 17 00:00:00 2001 From: Tim O'Keefe Date: Fri, 11 Jul 2025 11:28:49 -0400 Subject: [PATCH] OSSM-4815: Document HA for a mesh --- ...ssm-installing-openshift-service-mesh.adoc | 14 +++- .../ossm-about-istio-high-availability.adoc | 14 ++++ ...ossm-api-settings-mesh-ha-autoscaling.adoc | 30 ++++++++ ...ssm-api-settings-mesh-ha-replicacount.adoc | 20 ++++++ ...ossm-configuring-istio-ha-autoscaling.adoc | 72 +++++++++++++++++++ ...ssm-configuring-istio-ha-replicacount.adoc | 68 ++++++++++++++++++ 6 files changed, 216 insertions(+), 2 deletions(-) create mode 100644 modules/ossm-about-istio-high-availability.adoc create mode 100644 modules/ossm-api-settings-mesh-ha-autoscaling.adoc create mode 100644 modules/ossm-api-settings-mesh-ha-replicacount.adoc create mode 100644 modules/ossm-configuring-istio-ha-autoscaling.adoc create mode 100644 modules/ossm-configuring-istio-ha-replicacount.adoc diff --git a/install/ossm-installing-openshift-service-mesh.adoc b/install/ossm-installing-openshift-service-mesh.adoc index da19b04002ce..f1c73a18c2b1 100644 --- a/install/ossm-installing-openshift-service-mesh.adoc +++ b/install/ossm-installing-openshift-service-mesh.adoc @@ -18,7 +18,7 @@ include::modules/ossm-about-istio-control-plane-update-strategies.adoc [leveloffset=+2] include::modules/ossm-installing-operator.adoc[leveloffset=+1] -[role="_additional-resources"] +[role="_additional-resources-multiple-meshes"] .Additional resources * xref:../install/ossm-deploying-multiple-service-meshes-on-single-cluster.adoc#ossm-deploying-multiple-service-meshes-on-single-cluster[Deploying multiple service meshes on a single cluster] @@ -41,8 +41,18 @@ include::modules/ossm-about-accessing-bookinfo-application-using-gateway.adoc[le include::modules/ossm-accessing-bookinfo-application-using-istio-gateway-injection.adoc[leveloffset=+2] include::modules/ossm-accessing-bookinfo-application-using-gateway-api.adoc[leveloffset=+2] -[role="_additional-resources"] +[role="_additional-resources-gateway-api"] .Additional resources * link:https://docs.redhat.com/en/documentation/openshift_container_platform/latest/html/networking/configuring-ingress-cluster-traffic[Configuring ingress cluster traffic] include::modules/ossm-customizing-istio-configuration.adoc[leveloffset=+1] + +include::modules/ossm-about-istio-high-availability.adoc[leveloffset=+1] +include::modules/ossm-configuring-istio-ha-autoscaling.adoc[leveloffset=+2] +include::modules/ossm-api-settings-mesh-ha-autoscaling.adoc[leveloffset=+3] + +[role="_additional-resources-pod-scaling"] +.Additional resources +* link:https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/[Horizontal Pod Autoscaling] + +include::modules/ossm-configuring-istio-ha-replicacount.adoc[leveloffset=+2] \ No newline at end of file diff --git a/modules/ossm-about-istio-high-availability.adoc b/modules/ossm-about-istio-high-availability.adoc new file mode 100644 index 000000000000..c4e528b2ada2 --- /dev/null +++ b/modules/ossm-about-istio-high-availability.adoc @@ -0,0 +1,14 @@ +// This module is used in the following assemblies: +// * service-mesh-docs-main/install/ossm-installing-service-mesh.adoc + +:_mod-docs-content-type: CONCEPT +[id="ossm-about-istio-high-availability_{context}"] += About Istio High Availability + +Running the {istio} control plane in High Availability (HA) mode prevents single points of failure, and ensures continuous mesh operation even if an `istiod` pod fails. By using HA, if one `istiod` pod becomes unavailable, another one continues to manage and configure the {istio} control plane, preventing service outages or disruptions. HA provides scalability by distributing the control plane workload, enables graceful upgrades, supports disaster recovery operations, and protects against zone-wide mesh outages. + +There are two ways for a system administrator to configure HA for the {istio} deployment: + +* Defining a static replica count: This approach involves setting a fixed number of `istiod` pods, providing a consistent level of redundancy. + +* Using autoscaling: This approach dynamically adjusts the number of `istiod` pods based on resource utilization or custom metrics, providing more efficient resource consumption for fluctuating workloads. \ No newline at end of file diff --git a/modules/ossm-api-settings-mesh-ha-autoscaling.adoc b/modules/ossm-api-settings-mesh-ha-autoscaling.adoc new file mode 100644 index 000000000000..c765836e2cba --- /dev/null +++ b/modules/ossm-api-settings-mesh-ha-autoscaling.adoc @@ -0,0 +1,30 @@ +// This module is used in the following assemblies: +// * service-mesh-docs-main/install/ossm-installing-service-mesh.adoc + +:_mod-docs-content-type: REFERENCE +[id="ossm-api-settings-mesh-ha-autoscaling_{context}"] += API settings for Service Mesh HA autoscaling mode + +Use the following `istio` custom resource definition (CRD) parameters when you configure a service mesh for High Availability (HA) by using autoscaling. + +.HA API parameters +[cols="1,1"] +|=== +|Parameter |Description + +|`autoScaleMin` | Defines the minimum number of `istiod` pods for an istio deployment. Each pod contains one instance of the Istio control plane. + +{ocp-short-name} only uses this parameter when the {istio} deployment uses the Horizontal Pod Autoscaler (HPA) configuration. +|`autoScaleMax` | Defines the maximum number of `istiod` pods for an {istio} deployment. Each pod contains one instance of the {istio} control plane. + +For {ocp-short-name} to automatically scale the number of `istiod` pods based on load, you must set this parameter to a value that is greater than the value that you defined for the `autoScaleMin` parameter. + +You must also configure metrics for autoscaling to work properly. If no metrics are configured, the autoscaler does not scale up or down. + +{ocp-short-name} only uses this parameter when the {istio} deployment uses the HPA configuration. +|`cpu.targetAverageUtilization` | Defines the target CPU utilization for the `istiod` pod. If the average CPU usage exceeds the threshold that this parameter defines, the HPA automatically increases the number of replica pods. +|`memory.targetAverageUtilization` | Defines the target memory utilization for the `istiod` pod. If the average memory usage exceeds the threshold that this parameter defines, the HPA automatically increases the number of replica pods. +|`behavior` | You can use the `behavior` field to define additional policies that {ocp-short-name} uses to scale {istio} resources up or down. + +For more information, see link:https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/#configurable-scaling-behavior[Configurable Scaling Behavior]. +|=== diff --git a/modules/ossm-api-settings-mesh-ha-replicacount.adoc b/modules/ossm-api-settings-mesh-ha-replicacount.adoc new file mode 100644 index 000000000000..d03d832fc6ce --- /dev/null +++ b/modules/ossm-api-settings-mesh-ha-replicacount.adoc @@ -0,0 +1,20 @@ +// This module is used in the following assemblies: +// * service-mesh-docs-main/install/ossm-installing-service-mesh.adoc + +:_mod-docs-content-type: REFERENCE +[id="ossm-api-settings-mesh-ha-replicacount_{context}"] += API settings for Service Mesh HA replica count mode + +Use the following `istio` custom resource definition (CRD) parameters when you configure a service mesh for High Availability (HA) by using replica count. + +.HA API parameters +[cols="1,1"] +|=== +|Parameter |Description + +|`replicaCount` | Defines the number of `istiod` pods for an istio deployment. Each pod contains one instance of the `istio` control plane. The default setting is `1`. + +You must set `replicaCount` to a value of `2` or greater to support HA. + +{ocp-short-name} only uses this parameter when the `istio` deployment does not use the Horizontal Pod Autoscaler (HPA) configuration. +|=== \ No newline at end of file diff --git a/modules/ossm-configuring-istio-ha-autoscaling.adoc b/modules/ossm-configuring-istio-ha-autoscaling.adoc new file mode 100644 index 000000000000..45fc12af7a05 --- /dev/null +++ b/modules/ossm-configuring-istio-ha-autoscaling.adoc @@ -0,0 +1,72 @@ +// This procedure is used in the following assembly: +// * service-mesh-docs-main/install/ossm-installing-service-mesh.adoc + +:_mod-docs-content-type: PROCEDURE +[id="ossm-configuring-istio-ha-autoscaling_{context}"] += Configuring Istio HA by using autoscaling + +Configure the {istio} control plane in High Availability (HA) mode to prevent a single point of failure, and ensure continuous mesh operation even if one of the `istiod` pods fails. Autoscaling defines the minimum and maximum number of {istio} control plane pods that can operate. {ocp-product-title} uses these values to scale the number of control planes in operation based on resource utilization, such as CPU or memory, to efficiently respond to the varying number of workloads and overall traffic patterns within the mesh. + +.Prerequisites + +* You are logged in to the {ocp-product-title} web console as a user with the `cluster-admin` role. + +* You installed the {SMProductName} Operator. + +* You deployed the {istio} resource. + +.Procedure + +. In the {ocp-product-title} web console, click *Installed Operators*. + +. Click {SMProductName} 3 Operator. + +. Click *Istio*. + +. Click the name of the {istio} installation. For example, `default`. + +. Click *YAML*. + +. Modify the {istio} custom resource. ++ +.Example configuration +[source,yaml,subs="attributes,verbatim"] +---- +apiVersion: sailoperator.io/v1 +kind: Istio +metadata: + name: default +spec: + namespace: istio-system + values: + pilot: + autoscaleMin: 2 # <1> + autoscaleMax: 5 # <2> + cpu: + targetAverageUtilization: 80 # <3> + memory: + targetAverageUtilization: 80 # <4> +---- +<1> Defines the minimum number of {istio} control plane replicas that always run. +<2> Defines the maximum number of {istio} control plane replicas, allowing for scaling based on load. To support HA, there must be at least two replicas. +<3> Defines the target CPU utilization for autoscaling to 80%. If the average CPU usage exceeds this threshold, the Horizontal Pod Autoscaler (HPA) automatically increases the number of replicas. +<4> Defines the target memory utilization for autoscaling to 80%. If the average memory usage exceeds this threshold, the HPA automatically increases the number of replicas. + +.Verification + +. Verify the status of the {istio} control pods by running the following command: ++ +[source,terminal] +---- +$ oc get pods -n istio-system -l app=istiod +---- ++ +.Example output +[source,terminal] +---- +NAME READY STATUS RESTARTS AGE +istiod-7c7b6564c9-nwhsg 1/1 Running 0 70s +istiod-7c7b6564c9-xkmsl 1/1 Running 0 85s +---- ++ +Two `istiod` pods are running. Two pods, the minimum requirement for an HA {istio} control plane, indicates that a basic HA setup is in place. \ No newline at end of file diff --git a/modules/ossm-configuring-istio-ha-replicacount.adoc b/modules/ossm-configuring-istio-ha-replicacount.adoc new file mode 100644 index 000000000000..683cab11b831 --- /dev/null +++ b/modules/ossm-configuring-istio-ha-replicacount.adoc @@ -0,0 +1,68 @@ +// This procedure is used in the following assembly: +// * service-mesh-docs-main/install/ossm-installing-service-mesh.adoc + +:_mod-docs-content-type: PROCEDURE +[id="ossm-configuring-istio-ha-replicacount_{context}"] += Configuring Istio HA by using replica count + +Configure the {istio} control plane in High Availability (HA) mode to prevent a single point of failure, and ensure continuous mesh operation even if one of the `istiod` pods fails. The replica count defines a fixed number of {istio} control plane pods that can operate. Use replica count for mesh environments where the control plane workload is relatively stable or predictable, or when you prefer to manually scale the `istiod` pod. + +.Prerequisites + +* You are logged in to the {ocp-product-title} web console as a user with the `cluster-admin` role. + +* You installed the {SMProductName} Operator. + +* You deployed the {istio} resource. + +.Procedure + +. Obtain the name of the {istio} resource by running the following command: ++ +[source,terminal] +---- +$ oc get istio -n istio-sytem +---- ++ +.Example output +[source,terminal] +---- +NAME REVISIONS READY IN USE ACTIVE REVISION STATUS VERSION AGE +default 1 1 0 default Healthy v1.24.6 24m +---- ++ +The name of the {istio} resource is `default`. + +. Update the {istio} custom resource by adding the `autoscaleEnabled` and `replicaCount` parameters by running the following command: ++ +[source,terminal] +---- +$ oc patch istio default -n istio-system --type merge -p ' +spec: + values: + pilot: + autoscaleEnabled: false <1> + replicaCount: 2 <2> +' +---- +<1> Disables autoscaling and ensures that the number of replicas remains fixed. +<2> Defines the number of {istio} control plane replicas. To support HA, there must be at least two replicas. + +.Verification + +. Verify the status of the {Istio} control pods by running the following command: ++ +[source,terminal] +---- +$ oc get pods -n istio-system -l app=istiod +---- ++ +.Example output +[source,terminal] +---- +NAME READY STATUS RESTARTS AGE +istiod-7c7b6564c9-nwhsg 1/1 Running 0 70s +istiod-7c7b6564c9-xkmsl 1/1 Running 0 85s +---- ++ +Two `istiod` pods are running, which is the minimum requirement for an HA {istio} control plane and indicates that a basic HA setup is in place. \ No newline at end of file