Skip to content

Commit b560d47

Browse files
eleo007egegunes
authored andcommitted
K8SPSMDB-1265: clean up basmdb-backup and use kubectl wait for wait_restore (#1911)
Use kubectl wait instead of regular loop in `wait_restore()` Add retry for `demand-backup-sharded` test backup presence in minio storage Delete backups during test cleanup before removing finalizers from objects.
1 parent 82ae079 commit b560d47

File tree

4 files changed

+55
-15
lines changed
  • e2e-tests

4 files changed

+55
-15
lines changed

e2e-tests/demand-backup-incremental-sharded/run

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ run_recovery_check() {
5656
fi
5757
echo
5858

59-
wait_cluster_consistency ${cluster} 42
59+
wait_cluster_consistency ${cluster} 60
6060
wait_for_pbm_operations ${cluster}
6161

6262
if [[ $base == true ]]; then

e2e-tests/demand-backup-sharded/run

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,10 +148,18 @@ fi
148148

149149
desc 'check backup and restore -- minio'
150150
backup_dest_minio=$(get_backup_dest "$backup_name_minio")
151-
kubectl_bin run -i --rm aws-cli --image=perconalab/awscli --restart=Never -- \
151+
retry=0
152+
until kubectl_bin run -i --rm aws-cli --image=perconalab/awscli --restart=Never -- \
152153
/usr/bin/env AWS_ACCESS_KEY_ID=some-access-key AWS_SECRET_ACCESS_KEY=some-secret-key AWS_DEFAULT_REGION=us-east-1 \
153154
/usr/bin/aws --endpoint-url http://minio-service:9000 s3 ls "s3://${backup_dest_minio}/rs0/" \
154-
| grep "myApp.test.gz"
155+
| grep "myApp.test.gz"; do
156+
sleep 1
157+
let retry+=1
158+
if [ $retry -ge 60 ]; then
159+
echo "Max retry count $retry reached. Something went wrong with writing backup"
160+
exit 1
161+
fi
162+
done
155163
insert_data_mongos "100501" "myApp" "" "$custom_port"
156164
insert_data_mongos "100501" "myApp1" "" "$custom_port"
157165
insert_data_mongos "100501" "myApp2" "" "$custom_port"

e2e-tests/functions

Lines changed: 42 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -233,14 +233,15 @@ wait_backup() {
233233
echo -n .
234234
let retry+=1
235235
current_status=$(kubectl_bin get psmdb-backup $backup_name -o jsonpath='{.status.state}')
236-
if [[ $retry -ge 360 || ${current_status} == 'error' ]]; then
236+
if [[ $retry -ge 600 || ${current_status} == 'error' ]]; then
237237
kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \
238238
| grep -v 'level=info' \
239239
| grep -v 'level=debug' \
240240
| grep -v 'Getting tasks for pod' \
241241
| grep -v 'Getting pods from source' \
242-
| tail -100
243-
echo "Backup object psmdb-backup/${backup_name} is in ${current_state} state."
242+
| tail -200
243+
kubectl_bin get psmdb-backup
244+
echo "Backup object psmdb-backup/${backup_name} is in ${current_status} state."
244245
echo something went wrong with operator or kubernetes cluster
245246
exit 1
246247
fi
@@ -379,35 +380,54 @@ wait_restore() {
379380
local ok_if_ready=${6:-0}
380381

381382
set +o xtrace
383+
# We need to run wait till object is created, otherwise wait fails at once
384+
echo -n "Waiting for the psmdb-restore/restore-$backup_name object to be created"
385+
retry_object=0
386+
until kubectl_bin get psmdb-restore restore-$backup_name >/dev/null 2>&1; do
387+
echo -n .
388+
let retry_object+=1
389+
if [[ ${retry_object} -ge 60 ]]; then
390+
echo "psmdb-restore/restore-$backup_name object was not created."
391+
exit 1
392+
fi
393+
sleep 1
394+
done
395+
echo "OK"
396+
397+
echo -n "Waiting psmdb-restore/restore-${backup_name} to reach state \"${target_state}\" "
382398
retry=0
383-
echo -n "waiting psmdb-restore/restore-${backup_name} to reach ${target_state} state"
384-
local current_state=
385-
until [[ ${current_state} == ${target_state} ]]; do
386-
sleep 0.5
399+
retry_count=$((wait_time / 60))
400+
until kubectl wait psmdb-restore restore-${backup_name} --for=jsonpath='{.status.state}'=${target_state} --timeout=60s >/dev/null 2>&1; do
387401
echo -n .
388402
let retry+=1
389403
current_state=$(kubectl_bin get psmdb-restore restore-$backup_name -o jsonpath='{.status.state}')
390404
if [[ ${ok_if_ready} == 1 && ${current_state} == 'ready' ]]; then
391405
echo "OK"
392406
break
393407
fi
394-
if [[ $retry -ge $wait_time || ${current_state} == 'error' ]]; then
395-
desc "operator logs:"
396-
kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) | tail -100
408+
if [[ ${retry} -ge ${retry_count} || ${current_state} == 'error' ]]; then
409+
kubectl_bin logs ${OPERATOR_NS:+-n $OPERATOR_NS} $(get_operator_pod) \
410+
| grep -v 'level=info' \
411+
| grep -v 'level=debug' \
412+
| grep -v 'Getting tasks for pod' \
413+
| grep -v 'Getting pods from source' \
414+
| tail -100
397415

398416
if is_physical_backup ${backup_name}; then
399417
collect_physical_restore_logs
400418
fi
401419

402420
kubectl_bin get psmdb-restore restore-${backup_name} -o yaml
403-
log "Restore object restore-${backup_name} is in ${current_state} state."
421+
422+
echo "Restore object restore-${backup_name} is in ${current_state} state."
423+
echo something went wrong with operator or kubernetes cluster
404424
exit 1
405425
fi
406426
done
407427
echo "OK"
408428
set_debug
409429

410-
if [ $wait_cluster_consistency -eq 1 ]; then
430+
if [[ $wait_cluster_consistency -eq 1 ]]; then
411431
wait_cluster_consistency "${cluster_name}"
412432
fi
413433
}
@@ -1054,6 +1074,14 @@ delete_crd() {
10541074
kubectl_bin delete -f "${src_dir}/deploy/$rbac_yaml" --ignore-not-found || true
10551075
}
10561076

1077+
delete_backups() {
1078+
desc 'Delete psmdb-backup'
1079+
if [ $(kubectl_bin get psmdb-backup --no-headers | wc -l) != 0 ]; then
1080+
kubectl_bin get psmdb-backup
1081+
kubectl_bin delete psmdb-backup --all
1082+
fi
1083+
}
1084+
10571085
destroy() {
10581086
local namespace="$1"
10591087
local ignore_logs="${2:-true}"
@@ -1074,6 +1102,8 @@ destroy() {
10741102
#TODO: maybe will be enabled later
10751103
#diff $test_dir/compare/operator.log $tmp_dir/operator.log
10761104

1105+
delete_backups
1106+
10771107
delete_crd
10781108

10791109
destroy_cert_manager || true

e2e-tests/pitr-physical/run

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,8 @@ main() {
7878
sleep 10
7979
done
8080

81+
sleep 10
82+
8183
check_recovery $backup_name_minio-2 date "${last_chunk}" "-2nd" "$cluster"
8284

8385
run_backup $backup_name_minio 3 physical

0 commit comments

Comments
 (0)