Skip to content

Commit b3eae63

Browse files
committed
K8SPS-73 - Add operator-self-healing test
1 parent 2251903 commit b3eae63

31 files changed

+738
-21
lines changed

e2e-tests/conf/chaos-network-loss.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
apiVersion: chaos-mesh.org/v1alpha1
2+
kind: NetworkChaos
3+
metadata:
4+
name: network-loss-example
5+
spec:
6+
action: loss
7+
mode: one
8+
selector:
9+
pods:
10+
test-namespace:
11+
- pod-name
12+
loss:
13+
loss: "100"
14+
correlation: "100"
15+
duration: "60s"

e2e-tests/conf/chaos-pod-failure.yml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
apiVersion: chaos-mesh.org/v1alpha1
2+
kind: PodChaos
3+
metadata:
4+
name: pod-failure-example
5+
spec:
6+
action: pod-failure
7+
mode: one
8+
value: ""
9+
duration: "60s"
10+
selector:
11+
pods:
12+
test-namespace:
13+
- pod-name

e2e-tests/conf/chaos-pod-kill.yml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
apiVersion: chaos-mesh.org/v1alpha1
2+
kind: PodChaos
3+
metadata:
4+
name: pod-kill-example
5+
spec:
6+
action: pod-kill
7+
mode: one
8+
selector:
9+
pods:
10+
test-namespace:
11+
- pod-name

e2e-tests/functions

Lines changed: 116 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ deploy_pmm_server() {
5050
--set platform="${platform}" \
5151
"https://percona-charts.storage.googleapis.com/pmm-server-${PMM_SERVER_VERSION}.tgz"
5252
fi
53-
SERVICE="postgres"
53+
local SERVICE="postgres"
5454
until kubectl -n "${NAMESPACE}" exec monitoring-0 -- bash -c "pgrep -x $SERVICE >/dev/null"; do
5555
echo "Retry $retry"
5656
sleep 5
@@ -63,13 +63,13 @@ deploy_pmm_server() {
6363
}
6464

6565
get_pmm_api_key() {
66-
ADMIN_PASSWORD=$(kubectl -n "${NAMESPACE}" exec monitoring-0 -- bash -c "printenv | grep ADMIN_PASSWORD | cut -d '=' -f2")
66+
local ADMIN_PASSWORD=$(kubectl -n "${NAMESPACE}" exec monitoring-0 -- bash -c "printenv | grep ADMIN_PASSWORD | cut -d '=' -f2")
6767
echo $(curl --insecure -X POST -H "Content-Type: application/json" -d '{"name":"operator", "role": "Admin"}' "https://admin:$ADMIN_PASSWORD@"$(get_service_ip monitoring-service)"/graph/api/auth/keys" | jq .key)
6868
}
6969

7070
deploy_minio() {
71-
accessKey="$(kubectl -n "${NAMESPACE}" get secret minio-secret -o jsonpath='{.data.AWS_ACCESS_KEY_ID}' | base64 -d)"
72-
secretKey="$(kubectl -n "${NAMESPACE}" get secret minio-secret -o jsonpath='{.data.AWS_SECRET_ACCESS_KEY}' | base64 -d)"
71+
local accessKey="$(kubectl -n "${NAMESPACE}" get secret minio-secret -o jsonpath='{.data.AWS_ACCESS_KEY_ID}' | base64 -d)"
72+
local secretKey="$(kubectl -n "${NAMESPACE}" get secret minio-secret -o jsonpath='{.data.AWS_SECRET_ACCESS_KEY}' | base64 -d)"
7373

7474
helm uninstall -n "${NAMESPACE}" minio-service || :
7575
helm repo remove minio || :
@@ -299,6 +299,7 @@ get_mysql_users() {
299299

300300
get_service_ip() {
301301
local service=$1
302+
302303
while (kubectl get service/$service -n "${NAMESPACE}" -o 'jsonpath={.spec.type}' 2>&1 || :) | grep -q NotFound; do
303304
sleep 1
304305
done
@@ -379,16 +380,43 @@ wait_pod() {
379380
set -o xtrace
380381
}
381382

383+
wait_deployment() {
384+
local name=$1
385+
local target_namespace=${2:-"$namespace"}
386+
387+
sleep 10
388+
set +o xtrace
389+
retry=0
390+
echo -n $name
391+
until [ -n "$(kubectl -n ${target_namespace} get deployment $name -o jsonpath='{.status.replicas}')" \
392+
-a "$(kubectl -n ${target_namespace} get deployment $name -o jsonpath='{.status.replicas}')" \
393+
== "$(kubectl -n ${target_namespace} get deployment $name -o jsonpath='{.status.readyReplicas}')" ]; do
394+
sleep 1
395+
echo -n .
396+
let retry+=1
397+
if [ $retry -ge 360 ]; then
398+
kubectl logs $(get_operator_pod) -c operator \
399+
| grep -v 'level=info' \
400+
| grep -v 'level=debug' \
401+
| tail -100
402+
echo max retry count $retry reached. something went wrong with operator or kubernetes cluster
403+
exit 1
404+
fi
405+
done
406+
echo
407+
set -o xtrace
408+
}
409+
382410
check_auto_tuning() {
383-
RAM_SIZE=$1
384-
RDS_MEM_INSTANCE=12582880
385-
CUSTOM_INNODB_SIZE=$2
386-
CUSTOM_CONNECTIONS=$3
411+
local RAM_SIZE=$1
412+
local RDS_MEM_INSTANCE=12582880
413+
local CUSTOM_INNODB_SIZE=$2
414+
local CUSTOM_CONNECTIONS=$3
387415

388-
INNODB_SIZE=$(run_mysql \
416+
local INNODB_SIZE=$(run_mysql \
389417
'SELECT @@innodb_buffer_pool_size;' \
390418
"-h $(get_haproxy_svc "$(get_cluster_name)") -uroot -proot_password")
391-
CONNECTIONS=$(run_mysql \
419+
local CONNECTIONS=$(run_mysql \
392420
'SELECT @@max_connections;' \
393421
"-h $(get_haproxy_svc "$(get_cluster_name)") -uroot -proot_password")
394422

@@ -451,9 +479,8 @@ get_primary_from_haproxy() {
451479
verify_certificate_sans() {
452480
local certificate=$1
453481
local expected_sans=$2
454-
455-
have=$(mktemp)
456-
want=$(mktemp)
482+
local have=$(mktemp)
483+
local want=$(mktemp)
457484

458485
kubectl -n "${NAMESPACE}" get certificate "${certificate}" -o jsonpath='{.spec.dnsNames}' | jq '.' >"${have}"
459486
echo "${expected_sans}" | jq '.' >"${want}"
@@ -462,21 +489,19 @@ verify_certificate_sans() {
462489
}
463490

464491
check_passwords_leak() {
465-
466-
secrets=$(kubectl get secrets -o json | jq -r '.items[].data | to_entries | .[] | select(.key | (endswith(".crt") or endswith(".key") or endswith(".pub") or endswith(".pem") or endswith(".p12")) | not) | .value')
467-
468-
passwords="$(for i in $secrets; do base64 -d <<< $i; echo; done) $secrets"
469-
pods=$(kubectl -n "${NAMESPACE}" get pods -o name | awk -F "/" '{print $2}')
492+
local secrets=$(kubectl get secrets -o json | jq -r '.items[].data | to_entries | .[] | select(.key | (endswith(".crt") or endswith(".key") or endswith(".pub") or endswith(".pem") or endswith(".p12")) | not) | .value')
493+
local passwords="$(for i in $secrets; do base64 -d <<< $i; echo; done) $secrets"
494+
local pods=$(kubectl -n "${NAMESPACE}" get pods -o name | awk -F "/" '{print $2}')
470495

471496
collect_logs() {
472497
NS=$1
473498
for p in $pods; do
474-
containers=$(kubectl -n "$NS" get pod $p -o jsonpath='{.spec.containers[*].name}')
499+
local containers=$(kubectl -n "$NS" get pod $p -o jsonpath='{.spec.containers[*].name}')
475500
for c in $containers; do
476501
kubectl -n "$NS" logs $p -c $c >${TEMP_DIR}/logs_output-$p-$c.txt
477502
echo logs saved in: ${TEMP_DIR}/logs_output-$p-$c.txt
478503
for pass in $passwords; do
479-
count=$(grep -c --fixed-strings -- "$pass" ${TEMP_DIR}/logs_output-$p-$c.txt || :)
504+
local count=$(grep -c --fixed-strings -- "$pass" ${TEMP_DIR}/logs_output-$p-$c.txt || :)
480505
if [[ $count != 0 ]]; then
481506
echo leaked passwords are found in log ${TEMP_DIR}/logs_output-$p-$c.txt
482507
false
@@ -489,7 +514,77 @@ check_passwords_leak() {
489514

490515
collect_logs $NAMESPACE
491516
if [ -n "$OPERATOR_NS" ]; then
492-
pods=$(kubectl -n "${OPERATOR_NS}" get pods -o name | awk -F "/" '{print $2}')
517+
local pods=$(kubectl -n "${OPERATOR_NS}" get pods -o name | awk -F "/" '{print $2}')
493518
collect_logs $OPERATOR_NS
494519
fi
495520
}
521+
522+
deploy_chaos_mesh() {
523+
destroy_chaos_mesh
524+
525+
helm repo add chaos-mesh https://charts.chaos-mesh.org
526+
helm install chaos-mesh chaos-mesh/chaos-mesh --namespace=${NAMESPACE} --set chaosDaemon.runtime=containerd --set chaosDaemon.socketPath=/run/containerd/containerd.sock --set dashboard.create=false --version 2.5.1
527+
sleep 10
528+
}
529+
530+
destroy_chaos_mesh() {
531+
local chaos_mesh_ns=$(helm list --all-namespaces --filter chaos-mesh | tail -n1 | awk -F' ' '{print $2}' | sed 's/NAMESPACE//')
532+
533+
for i in $(kubectl api-resources | grep chaos-mesh | awk '{print $1}'); do timeout 30 kubectl delete ${i} --all --all-namespaces || :; done
534+
if [ -n "${chaos_mesh_ns}" ]; then
535+
helm uninstall chaos-mesh --namespace ${chaos_mesh_ns} || :
536+
fi
537+
timeout 30 kubectl delete crd $(kubectl get crd | grep 'chaos-mesh.org' | awk '{print $1}') || :
538+
timeout 30 kubectl delete clusterrolebinding $(kubectl get clusterrolebinding | grep 'chaos-mesh' | awk '{print $1}') || :
539+
timeout 30 kubectl delete clusterrole $(kubectl get clusterrole | grep 'chaos-mesh' | awk '{print $1}') || :
540+
timeout 30 kubectl delete MutatingWebhookConfiguration $(kubectl get MutatingWebhookConfiguration | grep 'chaos-mesh' | awk '{print $1}') || :
541+
timeout 30 kubectl delete ValidatingWebhookConfiguration $(kubectl get ValidatingWebhookConfiguration | grep 'chaos-mesh' | awk '{print $1}') || :
542+
timeout 30 kubectl delete ValidatingWebhookConfiguration $(kubectl get ValidatingWebhookConfiguration | grep 'validate-auth' | awk '{print $1}') || :
543+
}
544+
545+
kill_pods() {
546+
local ns=$1
547+
local selector=$2
548+
local pod_label=$3
549+
local label_value=$4
550+
551+
if [ "${selector}" == "pod" ]; then
552+
yq eval '
553+
.metadata.name = "chaos-pod-kill-'${RANDOM}'" |
554+
del(.spec.selector.pods.test-namespace) |
555+
.spec.selector.pods.'${ns}'[0] = "'${pod_label}'"' ${TESTS_CONFIG_DIR}/chaos-pod-kill.yml \
556+
| kubectl apply --namespace ${ns} -f -
557+
elif [ "${selector}" == "label" ]; then
558+
yq eval '
559+
.metadata.name = "chaos-kill-label-'${RANDOM}'" |
560+
.spec.mode = "all" |
561+
del(.spec.selector.pods) |
562+
.spec.selector.labelSelectors."'${pod_label}'" = "'${label_value}'"' ${TESTS_CONFIG_DIR}/chaos-pod-kill.yml \
563+
| kubectl apply --namespace ${ns} -f -
564+
fi
565+
sleep 5
566+
}
567+
568+
failure_pod() {
569+
local ns=$1
570+
local pod=$2
571+
572+
yq eval '
573+
.metadata.name = "chaos-pod-failure-'${RANDOM}'" |
574+
del(.spec.selector.pods.test-namespace) |
575+
.spec.selector.pods.'${ns}'[0] = "'${pod}'"' ${TESTS_CONFIG_DIR}/chaos-pod-failure.yml \
576+
| kubectl apply --namespace ${ns} -f -
577+
sleep 5
578+
}
579+
580+
network_loss() {
581+
local ns=$1
582+
local pod=$2
583+
584+
yq eval '
585+
.metadata.name = "chaos-pod-network-loss-'${RANDOM}'" |
586+
del(.spec.selector.pods.test-namespace) |
587+
.spec.selector.pods.'${ns}'[0] = "'${pod}'"' ${TESTS_CONFIG_DIR}/chaos-network-loss.yml \
588+
| kubectl apply --namespace ${ns} -f -
589+
sleep 5
590+
}

e2e-tests/run-distro.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ haproxy
1212
init-deploy
1313
monitoring
1414
one-pod
15+
operator-self-healing
1516
scaling
1617
service-per-pod
1718
sidecars

e2e-tests/run-minikube.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ gr-tls-cert-manager
1111
haproxy
1212
init-deploy
1313
one-pod
14+
operator-self-healing
1415
sidecars
1516
tls-cert-manager
1617
users

e2e-tests/run-pr.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ init-deploy
1616
limits
1717
monitoring
1818
one-pod
19+
operator-self-healing
1920
scaling
2021
service-per-pod
2122
sidecars

e2e-tests/run-release.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ init-deploy
1515
limits
1616
monitoring
1717
one-pod
18+
operator-self-healing
1819
scaling
1920
service-per-pod
2021
sidecars
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
apiVersion: kuttl.dev/v1beta1
2+
kind: TestAssert
3+
timeout: 120
4+
---
5+
apiVersion: apiextensions.k8s.io/v1
6+
kind: CustomResourceDefinition
7+
metadata:
8+
name: perconaservermysqls.ps.percona.com
9+
spec:
10+
group: ps.percona.com
11+
names:
12+
kind: PerconaServerMySQL
13+
listKind: PerconaServerMySQLList
14+
plural: perconaservermysqls
15+
shortNames:
16+
- ps
17+
singular: perconaservermysql
18+
scope: Namespaced
19+
---
20+
apiVersion: kuttl.dev/v1beta1
21+
kind: TestAssert
22+
metadata:
23+
name: check-operator-deploy-status
24+
timeout: 120
25+
commands:
26+
- script: kubectl assert exist-enhanced deployment percona-server-mysql-operator -n ${OPERATOR_NS:-$NAMESPACE} --field-selector status.readyReplicas=1
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
apiVersion: kuttl.dev/v1beta1
2+
kind: TestStep
3+
timeout: 10
4+
commands:
5+
- script: |-
6+
set -o errexit
7+
set -o xtrace
8+
9+
source ../../functions
10+
11+
deploy_operator
12+
deploy_non_tls_cluster_secrets
13+
deploy_tls_cluster_secrets
14+
deploy_client

0 commit comments

Comments
 (0)