Skip to content

Commit 028e8c3

Browse files
sergelogvinovhorsnmarukovichinelpandzic
authored
K8SPSMDB-1003: Kubernetes node zone/region tag (#1360)
* K8SPSMDB-1003 - kubernetes node tags zone/region Add kubernetes node tags zone/region to the monogo nodes. * Remove worning message if we do not have special permission. * fix test * fix cross-site test * fix image * update test * delete unsused * update cross-site test * fix PR comments * fix * fix * fix * fix --------- Co-authored-by: Viacheslav Sarzhan <slava.sarzhan@percona.com> Co-authored-by: Natalia Marukovich <nmarukovich@gmail.com> Co-authored-by: Natalia Marukovich <natalia.marukovich@percona.com> Co-authored-by: Inel Pandzic <inel.pandzic@percona.com>
1 parent 282394a commit 028e8c3

File tree

6 files changed

+88
-14
lines changed

6 files changed

+88
-14
lines changed

deploy/cw-bundle.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18336,6 +18336,14 @@ rules:
1833618336
- update
1833718337
- patch
1833818338
- delete
18339+
- apiGroups:
18340+
- ""
18341+
resources:
18342+
- nodes
18343+
verbs:
18344+
- get
18345+
- list
18346+
- watch
1833918347
- apiGroups:
1834018348
- ""
1834118349
resources:

deploy/cw-rbac.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,14 @@ rules:
3535
- update
3636
- patch
3737
- delete
38+
- apiGroups:
39+
- ""
40+
resources:
41+
- nodes
42+
verbs:
43+
- get
44+
- list
45+
- watch
3846
- apiGroups:
3947
- ""
4048
resources:

e2e-tests/cross-site-sharded/run

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,26 @@ unset OPERATOR_NS
1313
main_cluster="cross-site-sharded-main"
1414
replica_cluster="cross-site-sharded-replica"
1515

16+
wait_for_members() {
17+
local endpoint="$1"
18+
local rsName="$2"
19+
local nodes_amount=0
20+
until [[ ${nodes_amount} == 6 ]]; do
21+
nodes_amount=$(run_mongos 'rs.conf().members.length' "clusterAdmin:clusterAdmin123456@$endpoint" "mongodb" ":27017" \
22+
| egrep -v 'I NETWORK|W NETWORK|Error saving history file|Percona Server for MongoDB|connecting to:|Unable to reach primary for set|Implicit session:|versions do not match|Error saving history file:|bye' \
23+
| $sed -re 's/ObjectId\("[0-9a-f]+"\)//; s/-[0-9]+.svc/-xxx.svc/')
24+
25+
echo "waiting for all members to be configured in ${rsName}"
26+
let retry+=1
27+
if [ $retry -ge 15 ]; then
28+
echo "Max retry count $retry reached. something went wrong with mongo cluster. Config for endpoint $endpoint has $nodes_amount but expected 6."
29+
exit 1
30+
fi
31+
echo -n .
32+
sleep 10
33+
done
34+
}
35+
1636
desc "create main cluster"
1737
create_infra "$namespace"
1838

@@ -118,7 +138,10 @@ sleep 30
118138

119139
desc "create replica PSMDB cluster $cluster"
120140
apply_cluster "$test_dir/conf/${replica_cluster}.yml"
121-
sleep 300
141+
142+
wait_for_running $replica_cluster-rs0 3 "false"
143+
wait_for_running $replica_cluster-rs1 3 "false"
144+
wait_for_running $replica_cluster-cfg 3 "false"
122145

123146
replica_cfg_0_endpoint=$(get_service_ip cross-site-sharded-replica-cfg-0 'cfg')
124147
replica_cfg_1_endpoint=$(get_service_ip cross-site-sharded-replica-cfg-1 'cfg')
@@ -141,7 +164,10 @@ kubectl_bin patch psmdb ${main_cluster} --type=merge --patch '{
141164
}
142165
}'
143166

144-
sleep 60
167+
wait_for_members $replica_cfg_0_endpoint cfg
168+
wait_for_members $replica_rs0_0_endpoint rs0
169+
wait_for_members $replica_rs1_0_endpoint rs1
170+
145171
kubectl_bin config set-context $(kubectl_bin config current-context) --namespace="$replica_namespace"
146172

147173
desc 'check if all 3 Pods started'
@@ -165,8 +191,8 @@ compare_mongos_cmd "find" "myApp:myPass@$main_cluster-mongos.$namespace"
165191

166192
desc 'test failover'
167193
kubectl_bin config set-context $(kubectl_bin config current-context) --namespace="$namespace"
194+
168195
kubectl_bin delete psmdb $main_cluster
169-
sleep 60
170196

171197
desc 'run disaster recovery script for replset: cfg'
172198
run_script_mongos "${test_dir}/disaster_recovery.js" "clusterAdmin:clusterAdmin123456@$replica_cfg_0_endpoint" "mongodb" ":27017"
@@ -180,7 +206,9 @@ run_script_mongos "${test_dir}/disaster_recovery.js" "clusterAdmin:clusterAdmin1
180206
desc 'make replica cluster managed'
181207
kubectl_bin config set-context $(kubectl_bin config current-context) --namespace="$replica_namespace"
182208
kubectl_bin patch psmdb ${replica_cluster} --type=merge --patch '{"spec":{"unmanaged": false}}'
183-
sleep 120
209+
210+
wait_for_running $replica_cluster-rs0 3
211+
wait_for_running $replica_cluster-cfg 3
184212

185213
desc "check failover status"
186214
compare_mongos_cmd "find" "myApp:myPass@$replica_cluster-mongos.$replica_namespace"

e2e-tests/serviceless-external-nodes/run

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ cat $tmp_dir/psmdb.yaml \
4646

4747
wait_cluster_consistency ${cluster}
4848

49+
# waiting the config will be ready.
50+
sleep 30
4951
run_mongo 'rs.status().members.forEach(function(z){printjson(z.name);printjson(z.stateStr); })' "clusterAdmin:clusterAdmin123456@${cluster}-rs0-0.${cluster}-rs0.${namespace}" "mongodb" | egrep -v 'I NETWORK|W NETWORK|Error saving history file|Percona Server for MongoDB|connecting to:|Unable to reach primary for set|Implicit session:|versions do not match|Error saving history file:|bye' >"$tmp_dir/rs.txt"
5052

5153
cat "${test_dir}/compare/rs.txt" \

pkg/controller/perconaservermongodb/mgo.go

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
api "github.com/percona/percona-server-mongodb-operator/pkg/apis/psmdb/v1"
2020
"github.com/percona/percona-server-mongodb-operator/pkg/psmdb"
2121
"github.com/percona/percona-server-mongodb-operator/pkg/psmdb/mongo"
22+
"github.com/percona/percona-server-mongodb-operator/pkg/util"
2223
)
2324

2425
var errReplsetLimit = fmt.Errorf("maximum replset member (%d) count reached", mongo.MaxMembers)
@@ -267,6 +268,20 @@ func (r *ReconcilePerconaServerMongoDB) updateConfigMembers(ctx context.Context,
267268
return 0, fmt.Errorf("get host for pod %s: %v", pod.Name, err)
268269
}
269270

271+
nodeLabels := mongo.ReplsetTags{
272+
"nodeName": pod.Spec.NodeName,
273+
"podName": pod.Name,
274+
"serviceName": cr.Name,
275+
}
276+
277+
labels, err := psmdb.GetNodeLabels(ctx, r.client, cr, pod)
278+
if err == nil {
279+
nodeLabels = util.MapMerge(nodeLabels, mongo.ReplsetTags{
280+
"region": labels[corev1.LabelTopologyRegion],
281+
"zone": labels[corev1.LabelTopologyZone],
282+
})
283+
}
284+
270285
member := mongo.ConfigMember{
271286
ID: key,
272287
Host: host,
@@ -293,16 +308,11 @@ func (r *ReconcilePerconaServerMongoDB) updateConfigMembers(ctx context.Context,
293308
member.ArbiterOnly = true
294309
member.Priority = 0
295310
case "mongod", "cfg":
296-
member.Tags = mongo.ReplsetTags{
297-
"podName": pod.Name,
298-
"serviceName": cr.Name,
299-
}
311+
member.Tags = nodeLabels
300312
case "nonVoting":
301-
member.Tags = mongo.ReplsetTags{
302-
"podName": pod.Name,
303-
"serviceName": cr.Name,
304-
"nonVoting": "true",
305-
}
313+
member.Tags = util.MapMerge(mongo.ReplsetTags{
314+
"nonVoting": "true",
315+
}, nodeLabels)
306316
member.Priority = 0
307317
member.Votes = 0
308318
}
@@ -597,7 +607,7 @@ func (r *ReconcilePerconaServerMongoDB) handleReplsetInit(ctx context.Context, c
597607
"sh", "-c",
598608
fmt.Sprintf(
599609
`
600-
cat <<-EOF | %s
610+
cat <<-EOF | %s
601611
rs.initiate(
602612
{
603613
_id: '%s',

pkg/psmdb/getters.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package psmdb
33
import (
44
"context"
55
"sort"
6+
"time"
67

78
appsv1 "k8s.io/api/apps/v1"
89
corev1 "k8s.io/api/core/v1"
@@ -165,3 +166,20 @@ func GetExportedServices(ctx context.Context, cl client.Client, cr *api.PerconaS
165166

166167
return seList, nil
167168
}
169+
170+
func GetNodeLabels(ctx context.Context, cl client.Client, cr *api.PerconaServerMongoDB, pod corev1.Pod) (map[string]string, error) {
171+
// Set a timeout for the request, to avoid hanging forever
172+
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
173+
defer cancel()
174+
175+
node := &corev1.Node{}
176+
177+
err := cl.Get(ctx, client.ObjectKey{
178+
Name: pod.Spec.NodeName,
179+
}, node)
180+
if err != nil {
181+
return nil, errors.Wrapf(err, "failed to get node %s", pod.Spec.NodeName)
182+
}
183+
184+
return node.Labels, nil
185+
}

0 commit comments

Comments
 (0)