Skip to content

Commit 6baecee

Browse files
committed
Prevent to switch failover mode to 'eventual' in case of new replica set has been added into a cluster
1 parent 3dc926d commit 6baecee

File tree

3 files changed

+234
-17
lines changed

3 files changed

+234
-17
lines changed

controllers/cluster_controller.go

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -437,16 +437,24 @@ func (r *ClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
437437
if stsAnnotations["tarantool.io/failoverEnabled"] == "1" {
438438
reqLogger.Info("failover is enabled, not retrying")
439439
} else {
440-
if err := topologyClient.SetFailover(true); err != nil {
441-
reqLogger.Error(err, "failed to enable cluster failover")
442-
} else {
443-
reqLogger.Info("enabled failover")
440+
enabled, err := topologyClient.GetFailover()
441+
if err != nil {
442+
reqLogger.Error(err, "failed to get failover status")
443+
continue
444+
}
444445

445-
stsAnnotations["tarantool.io/failoverEnabled"] = "1"
446-
sts.SetAnnotations(stsAnnotations)
447-
if err := r.Update(context.TODO(), &sts); err != nil {
448-
reqLogger.Error(err, "failed to set failover enabled annotation")
446+
if !enabled {
447+
if err := topologyClient.SetFailover(true); err != nil {
448+
reqLogger.Error(err, "failed to enable cluster failover")
449+
continue
449450
}
451+
reqLogger.Info("enabled failover")
452+
}
453+
454+
stsAnnotations["tarantool.io/failoverEnabled"] = "1"
455+
sts.SetAnnotations(stsAnnotations)
456+
if err := r.Update(context.TODO(), &sts); err != nil {
457+
reqLogger.Error(err, "failed to set failover enabled annotation")
450458
}
451459
}
452460
}

controllers/topology/builtin.go

Lines changed: 49 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -53,14 +53,15 @@ type BootstrapVshardResponse struct {
5353
Errors []*ResponseError `json:"errors,omitempty"`
5454
}
5555

56-
// FailoverData Structure of data for changing failover status
57-
type FailoverData struct {
56+
type FailoverParams struct {
57+
Mode string `json:"mode"`
5858
}
5959

60-
// FailoverResponse type struct for returning on failovers
61-
type FailoverResponse struct {
62-
Data *FailoverData
63-
Errors []*ResponseError
60+
type ClusterData struct {
61+
FailoverParams *FailoverParams `json:"failover_params"`
62+
}
63+
type FailoverResponseData struct {
64+
Cluster *ClusterData `json:"cluster"`
6465
}
6566

6667
// BuiltInTopologyService .
@@ -171,6 +172,22 @@ var getServerStatQuery = `query serverList {
171172
}
172173
}`
173174

175+
var getFailoverStateQuery = `query {
176+
cluster {
177+
failover_params {
178+
mode
179+
}
180+
}
181+
}`
182+
183+
var setFailoverStateQuery = `mutation setFailoverMode($mode: String) {
184+
cluster {
185+
failover_params(mode: $mode) {
186+
mode
187+
}
188+
}
189+
}`
190+
174191
// An interface describing an object with accessor methods for labels and annotations
175192
type ObjectWithMeta interface {
176193
GetLabels() map[string]string
@@ -290,11 +307,15 @@ func (s *BuiltInTopologyService) Join(pod *corev1.Pod) error {
290307
// SetFailover enables cluster failover
291308
func (s *BuiltInTopologyService) SetFailover(enabled bool) error {
292309
client := graphql.NewClient(s.serviceHost, graphql.WithHTTPClient(&http.Client{Timeout: time.Duration(time.Second * 5)}))
293-
req := graphql.NewRequest(`mutation changeFailover($enabled: Boolean!) { cluster { failover(enabled: $enabled) }}`)
310+
req := graphql.NewRequest(setFailoverStateQuery)
294311

295-
req.Var("enabled", enabled)
312+
mode := "eventual"
313+
if !enabled {
314+
mode = "disabled"
315+
}
316+
req.Var("mode", mode)
296317

297-
resp := &FailoverData{}
318+
resp := &FailoverResponseData{}
298319
if err := client.Run(context.TODO(), req, resp); err != nil {
299320
log.Error(err, "failoverError")
300321
return errors.New("failed to enable cluster failover")
@@ -303,6 +324,25 @@ func (s *BuiltInTopologyService) SetFailover(enabled bool) error {
303324
return nil
304325
}
305326

327+
func (s *BuiltInTopologyService) GetFailover() (bool, error) {
328+
client := graphql.NewClient(s.serviceHost, graphql.WithHTTPClient(&http.Client{Timeout: time.Duration(time.Second * 5)}))
329+
req := graphql.NewRequest(getFailoverStateQuery)
330+
331+
resp := &FailoverResponseData{}
332+
if err := client.Run(context.TODO(), req, resp); err != nil {
333+
log.Error(err, "failoverError")
334+
return false, errors.New("failed to get info about failover")
335+
}
336+
337+
if resp == nil || resp.Cluster == nil || resp.Cluster.FailoverParams == nil || resp.Cluster.FailoverParams.Mode == "" {
338+
return false, errors.New("failed to get info about failover: broken gql response")
339+
}
340+
341+
mode := resp.Cluster.FailoverParams.Mode
342+
343+
return mode != "disabled", nil
344+
}
345+
306346
// Expel removes an instance from the replicaset
307347
func (s *BuiltInTopologyService) Expel(pod *corev1.Pod) error {
308348
req := fmt.Sprintf("mutation {expel_instance:expel_server(uuid:\\\"%s\\\")}", pod.GetAnnotations()["tarantool.io/instance_uuid"])

controllers/topology/builtin_test.go

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
package topology
22

33
import (
4+
"encoding/json"
5+
"io"
6+
"io/ioutil"
7+
"net/http"
8+
"net/http/httptest"
49
"strings"
510
"testing"
611

@@ -149,3 +154,167 @@ func TestGetRoles_ParseRolesFromAnnotations(t *testing.T) {
149154
}
150155
}
151156
}
157+
158+
type FailoverVariables struct {
159+
Mode string `json:"mode"`
160+
}
161+
162+
type FailoverQuery struct {
163+
Query string `json:"query"`
164+
Variables FailoverVariables `json:"variables"`
165+
}
166+
167+
var setFailoverGQL = `mutation setFailoverMode($mode: String) {
168+
cluster {
169+
failover_params(mode: $mode) {
170+
mode
171+
}
172+
}
173+
}`
174+
175+
func TestSetFailover(t *testing.T) {
176+
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
177+
b, err := ioutil.ReadAll(r.Body)
178+
if err != nil {
179+
t.Fatalf("%s", err)
180+
}
181+
query := FailoverQuery{}
182+
if err = json.Unmarshal(b, &query); err != nil {
183+
t.Fatalf("Wrong qeury: %s", err)
184+
}
185+
186+
if query.Query != setFailoverGQL {
187+
t.Fatalf("Wrong query: %s", query.Query)
188+
}
189+
190+
if query.Variables.Mode != "eventual" {
191+
t.Fatalf("Wrong failover type: %s", query.Variables.Mode)
192+
}
193+
194+
_, _ = io.WriteString(w, `{
195+
"data": {
196+
"cluster": {
197+
"failover_params": {
198+
"mode": "eventual"
199+
}
200+
}
201+
}
202+
}`)
203+
}))
204+
205+
defer srv.Close()
206+
207+
topology := BuiltInTopologyService{
208+
serviceHost: srv.URL,
209+
clusterID: "uuid",
210+
}
211+
212+
err := topology.SetFailover(true)
213+
if err != nil {
214+
t.Fatalf("%s", err)
215+
}
216+
}
217+
218+
var getFailoverGQL = `query {
219+
cluster {
220+
failover_params {
221+
mode
222+
}
223+
}
224+
}`
225+
226+
func TestGetFailover(t *testing.T) {
227+
var failoverMode string
228+
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
229+
b, err := ioutil.ReadAll(r.Body)
230+
if err != nil {
231+
t.Fatalf("%s", err)
232+
}
233+
query := FailoverQuery{}
234+
if err = json.Unmarshal(b, &query); err != nil {
235+
t.Fatalf("Wrong qeury: %s", err)
236+
}
237+
238+
if query.Query != getFailoverGQL {
239+
t.Fatalf("Wrong query: %s", query.Query)
240+
}
241+
242+
_, _ = io.WriteString(w, failoverMode)
243+
}))
244+
245+
defer srv.Close()
246+
247+
topology := BuiltInTopologyService{
248+
serviceHost: srv.URL,
249+
clusterID: "uuid",
250+
}
251+
252+
failoverMode = `{
253+
"data": {
254+
"cluster": {
255+
"failover_params": {
256+
"mode": "eventual"
257+
}
258+
}
259+
}
260+
}`
261+
262+
enabled, err := topology.GetFailover()
263+
if err != nil {
264+
t.Fatalf("%s", err)
265+
}
266+
267+
if !enabled {
268+
t.Fatal("Failover should be enabled")
269+
}
270+
271+
failoverMode = `{
272+
"data": {
273+
"cluster": {
274+
"failover_params": {
275+
"mode": "stateful"
276+
}
277+
}
278+
}
279+
}`
280+
281+
enabled, err = topology.GetFailover()
282+
if err != nil {
283+
t.Fatalf("%s", err)
284+
}
285+
286+
if !enabled {
287+
t.Fatal("Failover should be enabled")
288+
}
289+
290+
failoverMode = `{
291+
"data": {
292+
"cluster": {
293+
"failover_params": {
294+
"mode": "disabled"
295+
}
296+
}
297+
}
298+
}`
299+
300+
enabled, err = topology.GetFailover()
301+
if err != nil {
302+
t.Fatalf("%s", err)
303+
}
304+
305+
if enabled {
306+
t.Fatal("Failover should be disabled")
307+
}
308+
309+
failoverMode = `{
310+
"data": {
311+
"cluster": {
312+
}
313+
}
314+
}`
315+
316+
_, err = topology.GetFailover()
317+
if err == nil {
318+
t.Fatal("Wrong answer format, but error wasn't thrown")
319+
}
320+
}

0 commit comments

Comments
 (0)