@@ -100,7 +100,8 @@ deploy_tls_cluster_secrets() {
100
100
}
101
101
102
102
deploy_client () {
103
- kubectl -n " ${NAMESPACE} " apply -f " ${TESTS_CONFIG_DIR} /client.yaml"
103
+ yq eval " $( printf ' .spec.containers[0].image="%s"' " ${IMAGE_MYSQL} " ) " " ${TESTS_CONFIG_DIR} /client.yaml" | \
104
+ kubectl -n " ${NAMESPACE} " apply -f -
104
105
}
105
106
106
107
apply_s3_storage_secrets () {
@@ -385,10 +386,29 @@ run_mysqlsh() {
385
386
wait_pod $client_pod 1>&2
386
387
387
388
kubectl -n " ${NAMESPACE} " exec " ${pod:- mysql-client} " -- \
388
- bash -c " printf '%s\n' \" ${command} \" | mysqlsh --sql --quiet-start=2 $uri " 2>&1 \
389
+ bash -c " printf '%s\n' \" ${command} \" | mysqlsh --sql --quiet-start=2 $uri " 2> /dev/null \
389
390
| tail -n +2
390
391
}
391
392
393
+ get_innodb_cluster_status () {
394
+ local uri=" $1 "
395
+
396
+ client_pod=$( get_client_pod)
397
+ wait_pod $client_pod 1>&2
398
+
399
+ kubectl -n " ${NAMESPACE} " exec " ${client_pod} " -- mysqlsh --js --quiet-start=2 --uri ${uri} -- cluster status
400
+ }
401
+
402
+ wait_until_innodb_ok () {
403
+ local uri=" $1 "
404
+
405
+ local retry=0
406
+ until [[ $( get_innodb_cluster_status ${uri} | jq -r .defaultReplicaSet.status) == " OK" ]]; do
407
+ sleep 5
408
+ retry=$(( retry + 1 ))
409
+ done
410
+ }
411
+
392
412
run_curl () {
393
413
kubectl -n " ${NAMESPACE} " exec mysql-client -- bash -c " curl -s -k $* "
394
414
}
@@ -397,6 +417,13 @@ get_innodb_cluster_name() {
397
417
echo $( get_cluster_name) | tr -cd ' [^a-zA-Z0-9_]+'
398
418
}
399
419
420
+ get_mysqlsh_uri_for_pod () {
421
+ local pod=$1
422
+
423
+
424
+ echo " root:root_password@${pod} .$( get_cluster_name) -mysql.${NAMESPACE} "
425
+ }
426
+
400
427
get_mysqlsh_uri () {
401
428
local idx=${1:- 0}
402
429
@@ -409,7 +436,7 @@ get_gr_status() {
409
436
410
437
client_pod=$( get_client_pod)
411
438
412
- kubectl -n " ${NAMESPACE} " exec " ${pod:- mysql-client} " -- mysqlsh --uri $uri --cluster --result-format json -- cluster status \
439
+ kubectl -n " ${NAMESPACE} " exec " ${pod:- mysql-client} " -- mysqlsh --js -- uri $uri --cluster --result-format json -- cluster status \
413
440
| sed -e ' s/mysql: //' \
414
441
| (grep -v ' Using a password on the command line interface can be insecure.' || :)
415
442
}
@@ -525,7 +552,7 @@ get_router_pods() {
525
552
get_mysql_users () {
526
553
local args=$1
527
554
528
- run_mysql " SELECT user FROM mysql.user" " ${args} " | grep -vE " mysql|root"
555
+ run_mysql " SELECT user FROM mysql.user" " ${args} " | grep -vE " mysql|root|percona.telemetry "
529
556
}
530
557
531
558
get_service_ip () {
@@ -780,19 +807,14 @@ deploy_chaos_mesh() {
780
807
781
808
helm repo add chaos-mesh https://charts.chaos-mesh.org
782
809
if [ -n " ${MINIKUBE} " ]; then
783
- helm install chaos-mesh chaos-mesh/chaos-mesh --namespace=${NAMESPACE} --set chaosDaemon.runtime=docker --set dashboard.create=false --version ${CHAOS_MESH_VER} --wait
810
+ helm install chaos-mesh chaos-mesh/chaos-mesh --namespace=${NAMESPACE} --set chaosDaemon.runtime=docker --set dashboard.create=false --version ${CHAOS_MESH_VER}
784
811
else
785
812
helm install chaos-mesh chaos-mesh/chaos-mesh --namespace=${NAMESPACE} --set chaosDaemon.runtime=containerd --set chaosDaemon.socketPath=/run/containerd/containerd.sock --set dashboard.create=false --version ${CHAOS_MESH_VER}
786
813
fi
787
814
if [[ -n $OPENSHIFT ]]; then
788
815
oc adm policy add-scc-to-user privileged -z chaos-daemon --namespace=${NAMESPACE}
789
816
fi
790
-
791
- echo " Waiting for chaos-mesh DaemonSet to be ready..."
792
- until [ " $( kubectl get daemonset chaos-daemon -n ${NAMESPACE} -o jsonpath=' {.status.numberReady}' ) " = " $( kubectl get daemonset chaos-daemon -n ${NAMESPACE} -o jsonpath=' {.status.desiredNumberScheduled}' ) " ]; do
793
- echo " Waiting for DaemonSet chaos-daemon..."
794
- sleep 5
795
- done
817
+ sleep 10
796
818
}
797
819
798
820
destroy_chaos_mesh () {
@@ -824,17 +846,17 @@ kill_pods() {
824
846
local selector=$2
825
847
local pod_label=$3
826
848
local label_value=$4
827
- local chaos_suffix =$5
849
+ local chaos_name =$5
828
850
829
851
if [ " ${selector} " == " pod" ]; then
830
852
yq eval '
831
- .metadata.name = "chaos-pod-kill- ' ${chaos_suffix } ' " |
853
+ .metadata.name = "' ${chaos_name } ' " |
832
854
del(.spec.selector.pods.test-namespace) |
833
855
.spec.selector.pods.' ${ns} ' [0] = "' ${pod_label} ' "' ${TESTS_CONFIG_DIR} /chaos-pod-kill.yml \
834
856
| kubectl apply --namespace ${ns} -f -
835
857
elif [ " ${selector} " == " label" ]; then
836
858
yq eval '
837
- .metadata.name = "chaos-kill-label- ' ${chaos_suffix } ' " |
859
+ .metadata.name = "' ${chaos_name } ' " |
838
860
.spec.mode = "all" |
839
861
del(.spec.selector.pods) |
840
862
.spec.selector.labelSelectors."' ${pod_label} ' " = "' ${label_value} ' "' ${TESTS_CONFIG_DIR} /chaos-pod-kill.yml \
@@ -846,10 +868,10 @@ kill_pods() {
846
868
failure_pod () {
847
869
local ns=$1
848
870
local pod=$2
849
- local chaos_suffix =$3
871
+ local chaos_name =$3
850
872
851
873
yq eval '
852
- .metadata.name = "chaos-pod-failure- ' ${chaos_suffix } ' " |
874
+ .metadata.name = "' ${chaos_name } ' " |
853
875
del(.spec.selector.pods.test-namespace) |
854
876
.spec.selector.pods.' ${ns} ' [0] = "' ${pod} ' "' ${TESTS_CONFIG_DIR} /chaos-pod-failure.yml \
855
877
| kubectl apply --namespace ${ns} -f -
@@ -859,16 +881,133 @@ failure_pod() {
859
881
network_loss () {
860
882
local ns=$1
861
883
local pod=$2
862
- local chaos_suffix =$3
884
+ local chaos_name =$3
863
885
864
886
yq eval '
865
- .metadata.name = "chaos-pod-network-loss- ' ${chaos_suffix } ' " |
887
+ .metadata.name = "' ${chaos_name } ' " |
866
888
del(.spec.selector.pods.test-namespace) |
867
889
.spec.selector.pods.' ${ns} ' [0] = "' ${pod} ' "' ${TESTS_CONFIG_DIR} /chaos-network-loss.yml \
868
890
| kubectl apply --namespace ${ns} -f -
869
891
sleep 5
870
892
}
871
893
894
+ wait_until_chaos_applied () {
895
+ local chaos_type=$1
896
+ local chaos_name=$2
897
+
898
+ local resource
899
+ case ${chaos_type} in
900
+ " kill" |" failure" |" full-cluster-crash" )
901
+ resource=podchaos/${chaos_name}
902
+ ;;
903
+ " network" )
904
+ resource=networkchaos/${chaos_name}
905
+ ;;
906
+ esac
907
+
908
+ local retry=0
909
+ until [[ ${retry} == 30 ]]; do
910
+ sleep 10
911
+ retry=$(( retry + 1 ))
912
+
913
+ succeeded=$( kubectl -n ${NAMESPACE} get ${resource} -o yaml \
914
+ | yq ' .status.experiment.containerRecords[].events[]
915
+ | select(.operation == "Apply" and .type == "Succeeded")' )
916
+
917
+ if [[ -n ${succeeded} ]]; then
918
+ return
919
+ fi
920
+ done
921
+
922
+ echo " Timeout (300s) exceeded while waiting for chaos to be applied"
923
+ exit 1
924
+ }
925
+
926
+ wait_until_chaos_recovered () {
927
+ local chaos_type=$1
928
+ local chaos_name=$2
929
+
930
+ local resource
931
+ case ${chaos_type} in
932
+ " kill" |" failure" )
933
+ resource=podchaos/${chaos_name}
934
+ ;;
935
+ " network" )
936
+ resource=networkchaos/${chaos_name}
937
+ ;;
938
+ esac
939
+
940
+ local retry=0
941
+ until [[ ${retry} == 30 ]]; do
942
+ sleep 10
943
+ retry=$(( retry + 1 ))
944
+
945
+ succeeded=$( kubectl -n ${NAMESPACE} get ${resource} -o yaml \
946
+ | yq ' .status.experiment.containerRecords[].events[]
947
+ | select(.operation == "Recover" and .type == "Succeeded")' )
948
+
949
+ if [[ -n ${succeeded} ]]; then
950
+ return
951
+ fi
952
+ done
953
+
954
+ echo " Timeout (300s) exceeded while waiting for chaos to be recovered"
955
+ exit 1
956
+ }
957
+
958
+ check_primary_chaos () {
959
+ local chaos_type=$1
960
+ local ns=$2
961
+ local primary_before_failure=$3
962
+
963
+ local chaos_name
964
+ case ${chaos_type} in
965
+ " kill" )
966
+ chaos_name=" chaos-pod-kill-primary"
967
+ kill_pods " ${ns} " " pod" " ${primary_before_failure} " " " " ${chaos_name} "
968
+ ;;
969
+ " full-cluster-crash" )
970
+ chaos_name=" chaos-kill-label-cluster-crash"
971
+ kill_pods " ${ns} " " label" " app.kubernetes.io/instance" " gr-self-healing" " ${chaos_name} "
972
+ ;;
973
+ " failure" )
974
+ chaos_name=" chaos-pod-failure-primary"
975
+ failure_pod " ${ns} " " ${primary_before_failure} " " ${chaos_name} "
976
+ ;;
977
+ " network" )
978
+ chaos_name=" chaos-pod-network-loss-primary"
979
+ network_loss " ${ns} " " ${primary_before_failure} " " ${chaos_name} "
980
+ ;;
981
+ esac
982
+
983
+ wait_until_chaos_applied ${chaos_type} ${chaos_name}
984
+ if [[ ${chaos_type} == " failure" || ${chaos_type} == " network" ]]; then
985
+ wait_until_chaos_recovered ${chaos_type} ${chaos_name}
986
+ fi
987
+
988
+ wait_cluster_consistency_gr " $( get_cluster_name) " 3 3
989
+
990
+ primary_after_failure=$( get_primary_from_group_replication)
991
+ uri=$( get_mysqlsh_uri_for_pod ${primary_after_failure} )
992
+ wait_until_innodb_ok ${uri}
993
+
994
+ if [[ " ${primary_before_failure} " == " ${primary_after_failure} " ]]; then
995
+ echo " primary pod was not killed! something went wrong."
996
+ exit 1
997
+ fi
998
+
999
+ uri=$( get_mysqlsh_uri_for_pod $( get_primary_from_group_replication) )
1000
+ online_members=$( get_innodb_cluster_status ${uri} \
1001
+ | jq .defaultReplicaSet.topology[].status \
1002
+ | grep ONLINE \
1003
+ | wc -l)
1004
+
1005
+ if [[ ${online_members} != 3 ]]; then
1006
+ echo " expected 3 online members, got ${online_members} "
1007
+ exit 1
1008
+ fi
1009
+ }
1010
+
872
1011
renew_certificate () {
873
1012
certificate=" $1 "
874
1013
0 commit comments