Skip to content

K8SPSMDB-1211: handle FULL CLUSTER CRASH error during the restore #1926

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 13 commits into
base: main
Choose a base branch
from
70 changes: 70 additions & 0 deletions pkg/controller/common/common.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package common
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think packages named common, utils, etc., tend to be vague, as they imply shared logic without a clearly defined domain or separation of concerns.

In this file, the main struct is CommonReconciler, but it's not clear what exactly is being reconciled. The struct also mixes responsibilities: as it's constructing and returning heterogeneous components like backup.PBM, mongo.Client, a scheme, and a k8s client.

To improve clarity and maintainability, I'd suggest:

  • Keeping the scheme and the Kubernetes client in ReconcilePerconaServerMongoDB, and having related function with receivers of type ReconcilePerconaServerMongoDB.

  • Splitting out PBM-related logic into a dedicated PBM factory/service.

  • Doing the same for the MongoClientProvider.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


import (
"context"

"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
"sigs.k8s.io/controller-runtime/pkg/client"

api "github.com/percona/percona-server-mongodb-operator/pkg/apis/psmdb/v1"
"github.com/percona/percona-server-mongodb-operator/pkg/psmdb"
"github.com/percona/percona-server-mongodb-operator/pkg/psmdb/backup"
"github.com/percona/percona-server-mongodb-operator/pkg/psmdb/mongo"
)

func New(client client.Client, scheme *runtime.Scheme, newPBMFunc backup.NewPBMFunc, mongoClientProvider psmdb.MongoClientProvider) CommonReconciler {
return CommonReconciler{
client: client,
scheme: scheme,
newPBMFunc: newPBMFunc,
mongoClientProvider: mongoClientProvider,
}
}

type CommonReconciler struct {
client client.Client
scheme *runtime.Scheme
newPBMFunc backup.NewPBMFunc
mongoClientProvider psmdb.MongoClientProvider
}

func (r *CommonReconciler) Client() client.Client {
return r.client
}

func (r *CommonReconciler) Scheme() *runtime.Scheme {
return r.scheme
}

func (r *CommonReconciler) NewPBM(ctx context.Context, cluster *api.PerconaServerMongoDB) (backup.PBM, error) {
return r.newPBMFunc(ctx, r.client, cluster)
}

func (r *CommonReconciler) NewPBMFunc() backup.NewPBMFunc {
return r.newPBMFunc
}

func (r *CommonReconciler) getMongoClientProvider() psmdb.MongoClientProvider {
if r.mongoClientProvider == nil {
return psmdb.NewProvider(r.client)
}
return r.mongoClientProvider
}

func (r *CommonReconciler) MongoClientWithRole(ctx context.Context, cr *api.PerconaServerMongoDB, rs *api.ReplsetSpec, role api.SystemUserRole) (mongo.Client, error) {
return r.getMongoClientProvider().Mongo(ctx, cr, rs, role)
}

func (r *CommonReconciler) MongosClientWithRole(ctx context.Context, cr *api.PerconaServerMongoDB, role api.SystemUserRole) (mongo.Client, error) {
return r.getMongoClientProvider().Mongos(ctx, cr, role)
}

func (r *CommonReconciler) StandaloneClientWithRole(ctx context.Context, cr *api.PerconaServerMongoDB, rs *api.ReplsetSpec, role api.SystemUserRole, pod corev1.Pod) (mongo.Client, error) {
host, err := psmdb.MongoHost(ctx, r.client, cr, cr.Spec.ClusterServiceDNSMode, rs, rs.Expose.Enabled, pod)
if err != nil {
return nil, errors.Wrap(err, "failed to get mongo host")
}
return r.getMongoClientProvider().Standalone(ctx, cr, role, host, cr.TLSEnabled())
}
2 changes: 1 addition & 1 deletion pkg/controller/perconaservermongodb/backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ func (r *ReconcilePerconaServerMongoDB) createOrUpdateBackupTask(ctx context.Con
if err != nil {
return errors.Wrap(err, "can't create job")
}
err = setControllerReference(cr, &cjob, r.scheme)
err = setControllerReference(cr, &cjob, r.Scheme())
if err != nil {
return errors.Wrapf(err, "set owner reference for backup task %s", cjob.Name)
}
Expand Down
6 changes: 3 additions & 3 deletions pkg/controller/perconaservermongodb/balancer.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@ import (
"context"
"time"

"github.com/percona/percona-server-mongodb-operator/pkg/psmdb"
"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
k8sErrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/types"
logf "sigs.k8s.io/controller-runtime/pkg/log"

api "github.com/percona/percona-server-mongodb-operator/pkg/apis/psmdb/v1"
"github.com/percona/percona-server-mongodb-operator/pkg/psmdb"
)

func (r *ReconcilePerconaServerMongoDB) enableBalancerIfNeeded(ctx context.Context, cr *api.PerconaServerMongoDB) error {
Expand Down Expand Up @@ -85,7 +85,7 @@ func (r *ReconcilePerconaServerMongoDB) enableBalancerIfNeeded(ctx context.Conte
}
}

mongosSession, err := r.mongosClientWithRole(ctx, cr, api.RoleClusterAdmin)
mongosSession, err := r.MongosClientWithRole(ctx, cr, api.RoleClusterAdmin)
if err != nil {
return errors.Wrap(err, "failed to get mongos connection")
}
Expand Down Expand Up @@ -141,7 +141,7 @@ func (r *ReconcilePerconaServerMongoDB) disableBalancer(ctx context.Context, cr
return errors.Wrapf(err, "get mongos statefulset %s", msSts.Name)
}

mongosSession, err := r.mongosClientWithRole(ctx, cr, api.RoleClusterAdmin)
mongosSession, err := r.MongosClientWithRole(ctx, cr, api.RoleClusterAdmin)
if err != nil {
return errors.Wrap(err, "failed to get mongos connection")
}
Expand Down
73 changes: 0 additions & 73 deletions pkg/controller/perconaservermongodb/connections.go

This file was deleted.

3 changes: 2 additions & 1 deletion pkg/controller/perconaservermongodb/connections_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/reconcile"

api "github.com/percona/percona-server-mongodb-operator/pkg/apis/psmdb/v1"
"github.com/percona/percona-server-mongodb-operator/pkg/controller/common"
"github.com/percona/percona-server-mongodb-operator/pkg/naming"
"github.com/percona/percona-server-mongodb-operator/pkg/psmdb"
"github.com/percona/percona-server-mongodb-operator/pkg/psmdb/mongo"
Expand Down Expand Up @@ -158,7 +159,7 @@ func TestConnectionLeaks(t *testing.T) {
connectionCount := new(int)

r := buildFakeClient(obj...)
r.mongoClientProvider = &fakeMongoClientProvider{pods: rsPods, cr: cr, connectionCount: connectionCount}
r.CommonReconciler = common.New(r.Client(), r.Scheme(), r.NewPBMFunc(), &fakeMongoClientProvider{pods: rsPods, cr: cr, connectionCount: connectionCount})
r.serverVersion = &version.ServerVersion{Platform: version.PlatformKubernetes}
r.crons = NewCronRegistry()

Expand Down
10 changes: 6 additions & 4 deletions pkg/controller/perconaservermongodb/custom_users.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ func (r *ReconcilePerconaServerMongoDB) reconcileCustomUsers(ctx context.Context
var err error
var mongoCli mongo.Client
if cr.Spec.Sharding.Enabled {
mongoCli, err = r.mongosClientWithRole(ctx, cr, api.RoleUserAdmin)
mongoCli, err = r.MongosClientWithRole(ctx, cr, api.RoleUserAdmin)
} else {
mongoCli, err = r.mongoClientWithRole(ctx, cr, cr.Spec.Replsets[0], api.RoleUserAdmin)
mongoCli, err = r.MongoClientWithRole(ctx, cr, cr.Spec.Replsets[0], api.RoleUserAdmin)
}
if err != nil {
return errors.Wrap(err, "failed to get mongo client")
Expand Down Expand Up @@ -310,7 +310,8 @@ func updatePass(
user *api.User,
userInfo *mongo.User,
secret *corev1.Secret,
annotationKey, passKey string) error {
annotationKey, passKey string,
) error {
log := logf.FromContext(ctx)

if userInfo == nil || user.IsExternalDB() {
Expand Down Expand Up @@ -395,7 +396,8 @@ func createUser(
mongoCli mongo.Client,
user *api.User,
secret *corev1.Secret,
annotationKey, passKey string) error {
annotationKey, passKey string,
) error {
log := logf.FromContext(ctx)

roles := make([]mongo.Role, 0)
Expand Down
6 changes: 3 additions & 3 deletions pkg/controller/perconaservermongodb/fcv.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import (
)

func (r *ReconcilePerconaServerMongoDB) getFCV(ctx context.Context, cr *api.PerconaServerMongoDB) (string, error) {
c, err := r.mongoClientWithRole(ctx, cr, cr.Spec.Replsets[0], api.RoleClusterAdmin)
c, err := r.MongoClientWithRole(ctx, cr, cr.Spec.Replsets[0], api.RoleClusterAdmin)
if err != nil {
return "", errors.Wrap(err, "failed to get connection")
}
Expand Down Expand Up @@ -40,9 +40,9 @@ func (r *ReconcilePerconaServerMongoDB) setFCV(ctx context.Context, cr *api.Perc
var connErr error

if cr.Spec.Sharding.Enabled {
cli, connErr = r.mongosClientWithRole(ctx, cr, api.RoleClusterAdmin)
cli, connErr = r.MongosClientWithRole(ctx, cr, api.RoleClusterAdmin)
} else {
cli, connErr = r.mongoClientWithRole(ctx, cr, cr.Spec.Replsets[0], api.RoleClusterAdmin)
cli, connErr = r.MongoClientWithRole(ctx, cr, cr.Spec.Replsets[0], api.RoleClusterAdmin)
}

if connErr != nil {
Expand Down
2 changes: 1 addition & 1 deletion pkg/controller/perconaservermongodb/finalizers.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ func (r *ReconcilePerconaServerMongoDB) checkFinalizers(ctx context.Context, cr
}

func (r *ReconcilePerconaServerMongoDB) deleteAllPITRChunks(ctx context.Context, cr *api.PerconaServerMongoDB) error {
pbmc, err := r.newPBM(ctx, r.client, cr)
pbmc, err := r.NewPBM(ctx, cr)
if err != nil {
return errors.Wrap(err, "new pbm")
}
Expand Down
16 changes: 8 additions & 8 deletions pkg/controller/perconaservermongodb/mgo.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ func (r *ReconcilePerconaServerMongoDB) reconcileCluster(ctx context.Context, cr
}
}

cli, err := r.mongoClientWithRole(ctx, cr, replset, api.RoleClusterAdmin)
cli, err := r.MongoClientWithRole(ctx, cr, replset, api.RoleClusterAdmin)
if err != nil {
if cr.Spec.Unmanaged {
return api.AppStateInit, nil, nil
Expand Down Expand Up @@ -193,7 +193,7 @@ func (r *ReconcilePerconaServerMongoDB) reconcileCluster(ctx context.Context, cr
replset.ClusterRole == api.ClusterRoleShardSvr &&
len(mongosPods) > 0 && cr.Spec.Sharding.Mongos.Size > 0 {

mongosSession, err := r.mongosClientWithRole(ctx, cr, api.RoleClusterAdmin)
mongosSession, err := r.MongosClientWithRole(ctx, cr, api.RoleClusterAdmin)
if err != nil {
return api.AppStateError, nil, errors.Wrap(err, "failed to get mongos connection")
}
Expand Down Expand Up @@ -571,7 +571,7 @@ func (r *ReconcilePerconaServerMongoDB) removeRSFromShard(ctx context.Context, c
return nil
}

cli, err := r.mongosClientWithRole(ctx, cr, api.RoleClusterAdmin)
cli, err := r.MongosClientWithRole(ctx, cr, api.RoleClusterAdmin)
if err != nil {
return errors.Errorf("failed to get mongos connection: %v", err)
}
Expand Down Expand Up @@ -621,7 +621,7 @@ func (r *ReconcilePerconaServerMongoDB) handleRsAddToShard(ctx context.Context,
return errors.Wrapf(err, "get rsPod %s host", rspod.Name)
}

cli, err := r.mongosClientWithRole(ctx, cr, api.RoleClusterAdmin)
cli, err := r.MongosClientWithRole(ctx, cr, api.RoleClusterAdmin)
if err != nil {
return errors.Wrap(err, "failed to get mongos client")
}
Expand Down Expand Up @@ -724,7 +724,7 @@ func (r *ReconcilePerconaServerMongoDB) handleReplsetInit(ctx context.Context, c
time.Sleep(time.Second * 5)

log.Info("creating user admin", "replset", replsetName, "pod", pod.Name, "user", api.RoleUserAdmin)
userAdmin, err := getInternalCredentials(ctx, r.client, cr, api.RoleUserAdmin)
userAdmin, err := psmdb.GetCredentials(ctx, r.client, cr, api.RoleUserAdmin)
if err != nil {
return nil, nil, errors.Wrap(err, "failed to get userAdmin credentials")
}
Expand Down Expand Up @@ -757,7 +757,7 @@ func (r *ReconcilePerconaServerMongoDB) handleReplicaSetNoPrimary(ctx context.Co
}

log.Info("Connecting to pod", "pod", pod.Name, "user", api.RoleClusterAdmin)
cli, err := r.standaloneClientWithRole(ctx, cr, replset, api.RoleClusterAdmin, pod)
cli, err := r.StandaloneClientWithRole(ctx, cr, replset, api.RoleClusterAdmin, pod)
if err != nil {
return errors.Wrap(err, "get standalone mongo client")
}
Expand Down Expand Up @@ -922,7 +922,7 @@ func compareRoles(x []mongo.Role, y []mongo.Role) bool {
func (r *ReconcilePerconaServerMongoDB) createOrUpdateSystemUsers(ctx context.Context, cr *api.PerconaServerMongoDB, replset *api.ReplsetSpec) error {
log := logf.FromContext(ctx)

cli, err := r.mongoClientWithRole(ctx, cr, replset, api.RoleUserAdmin)
cli, err := r.MongoClientWithRole(ctx, cr, replset, api.RoleUserAdmin)
if err != nil {
return errors.Wrap(err, "failed to get mongo client")
}
Expand Down Expand Up @@ -1013,7 +1013,7 @@ func (r *ReconcilePerconaServerMongoDB) createOrUpdateSystemUsers(ctx context.Co
}

for _, role := range users {
creds, err := getInternalCredentials(ctx, r.client, cr, role)
creds, err := psmdb.GetCredentials(ctx, r.client, cr, role)
if err != nil {
log.Error(err, "failed to get credentials", "role", role)
continue
Expand Down
Loading
Loading