Skip to content

K8SPSMDB-1296: improve readiness probe #1917

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 29 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
d6e9d6b
K8SPSMDB-1296: improve readiness probe
pooknull May 12, 2025
09a63a1
Merge branch 'main' into K8SPSMDB-1296
pooknull May 16, 2025
40b7674
Merge branch 'main' into K8SPSMDB-1296
pooknull May 19, 2025
07f1be6
Merge branch 'main' into K8SPSMDB-1296
pooknull May 21, 2025
d75cca1
fix tests
pooknull May 22, 2025
6f268bf
Merge branch 'main' into K8SPSMDB-1296
pooknull May 22, 2025
52df399
fix unit-test
pooknull May 22, 2025
5d322e3
fix tests
pooknull May 23, 2025
363fc77
Merge branch 'main' into K8SPSMDB-1296
pooknull May 26, 2025
ce419af
Merge remote-tracking branch 'origin/main' into K8SPSMDB-1296
pooknull May 26, 2025
1c3c592
Merge branch 'main' into K8SPSMDB-1296
pooknull May 27, 2025
fb8a8ef
fix unit-test
pooknull May 27, 2025
e5ae37d
Merge remote-tracking branch 'origin/main' into K8SPSMDB-1296
pooknull May 27, 2025
c896cbe
ignore connection error in readiness probe
pooknull May 27, 2025
ae932d3
delete util.go
pooknull May 27, 2025
01dedaa
fix manifests
pooknull May 27, 2025
d541d8e
Merge remote-tracking branch 'origin/main' into K8SPSMDB-1296
pooknull May 28, 2025
ad260bb
Merge branch 'main' into K8SPSMDB-1296
hors May 28, 2025
62650d9
fix compare files
pooknull May 29, 2025
160e3ea
Merge branch 'main' into K8SPSMDB-1296
pooknull May 29, 2025
5116da4
fix tests
pooknull May 29, 2025
8fc44b8
add readiness probe to hidden replsets
pooknull May 30, 2025
5c63a72
Merge branch 'main' into K8SPSMDB-1296
pooknull May 30, 2025
0520bbd
Merge branch 'main' into K8SPSMDB-1296
hors Jun 2, 2025
5059ea5
ignore invalid replset
pooknull Jun 2, 2025
6fa92a8
Merge branch 'main' into K8SPSMDB-1296
hors Jun 4, 2025
1141926
Merge branch 'main' into K8SPSMDB-1296
hors Jun 4, 2025
c0b21b5
Merge branch 'main' into K8SPSMDB-1296
hors Jun 5, 2025
cd834df
fix upgrade-consistency test
pooknull Jun 11, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions cmd/mongodb-healthcheck/db/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,25 +29,27 @@ var (
)

func Dial(ctx context.Context, conf *Config) (mongo.Client, error) {
if err := conf.configureTLS(); err != nil {
log := logf.FromContext(ctx).WithName("Dial")
ctx = logf.IntoContext(ctx, log)

if err := conf.configureTLS(ctx); err != nil {
return nil, errors.Wrap(err, "configure TLS")
}

log := logf.FromContext(ctx)
log.V(1).Info("Connecting to mongodb", "hosts", conf.Hosts, "ssl", conf.SSL.Enabled, "ssl_insecure", conf.SSL.Insecure)

if conf.Username != "" && conf.Password != "" {
log.V(1).Info("Enabling authentication for session", "user", conf.Username)
}

cl, err := mongo.Dial(&conf.Config)
cl, err := mongo.Dial(ctx, &conf.Config)
if err != nil {
cfg := conf.Config
cfg.Direct = true
cfg.ReplSetName = ""
cl, err = mongo.Dial(&cfg)
cl, err = mongo.Dial(ctx, &cfg)
if err != nil {
return nil, errors.Wrap(err, "filed to dial mongo")
return nil, errors.Wrap(err, "failed to dial mongo")
}
}

Expand Down
7 changes: 4 additions & 3 deletions cmd/mongodb-healthcheck/db/ssl.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
package db

import (
"context"
"crypto/tls"
"crypto/x509"
"os"
Expand All @@ -40,8 +41,8 @@ func (sc *SSLConfig) loadCaCertificate() (*x509.CertPool, error) {
return certificates, nil
}

func (cnf *Config) configureTLS() error {
log := logf.Log
func (cnf *Config) configureTLS(ctx context.Context) error {
log := logf.FromContext(ctx).WithName("configureTLS")

if !cnf.SSL.Enabled {
return nil
Expand Down Expand Up @@ -72,7 +73,7 @@ func (cnf *Config) configureTLS() error {
return errors.Wrapf(err, "check if file with name %s exists", cnf.SSL.CAFile)
}

log.V(1).Info("Loading SSL/TLS Certificate Authority: %s", "ca", cnf.SSL.CAFile)
log.V(1).Info("Loading SSL/TLS Certificate Authority", "ca", cnf.SSL.CAFile)
ca, err := cnf.SSL.loadCaCertificate()
if err != nil {
return errors.Wrapf(err, "load client CAs from %s", cnf.SSL.CAFile)
Expand Down
8 changes: 4 additions & 4 deletions cmd/mongodb-healthcheck/db/ssl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ func TestSSLNotEnabled(t *testing.T) {
},
}

if err := cfg.configureTLS(); err != nil {
if err := cfg.configureTLS(t.Context()); err != nil {
t.Fatalf("TLS configuration failed: %s", err)
}

Expand All @@ -32,7 +32,7 @@ func TestSSLEnabled(t *testing.T) {
},
}

if err := cfg.configureTLS(); err != nil {
if err := cfg.configureTLS(t.Context()); err != nil {
t.Fatalf("TLS configuration failed: %s", err)
}

Expand All @@ -49,7 +49,7 @@ func TestPEMKeyFileDoesNotExists(t *testing.T) {
},
}

err := cfg.configureTLS()
err := cfg.configureTLS(t.Context())
if err == nil {
t.Fatal("Expected TLS config to fail, but it returned no error")
}
Expand All @@ -71,7 +71,7 @@ func TestCAFileDoesNotExists(t *testing.T) {
},
}

err := cfg.configureTLS()
err := cfg.configureTLS(t.Context())
if err == nil {
t.Fatal("Expected TLS config to fail, but it returned no error")
}
Expand Down
61 changes: 10 additions & 51 deletions cmd/mongodb-healthcheck/healthcheck/health.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,9 @@ package healthcheck

import (
"context"
"encoding/json"

v "github.com/hashicorp/go-version"
"github.com/pkg/errors"
"go.mongodb.org/mongo-driver/bson"
"go.mongodb.org/mongo-driver/bson/primitive"
logf "sigs.k8s.io/controller-runtime/pkg/log"

"github.com/percona/percona-server-mongodb-operator/cmd/mongodb-healthcheck/db"
Expand All @@ -32,6 +29,7 @@ var ErrNoReplsetConfigStr = "(NotYetInitialized) no replset config has been rece

func HealthCheckMongosLiveness(ctx context.Context, cnf *db.Config) (err error) {
log := logf.FromContext(ctx).WithName("HealthCheckMongosLiveness")
ctx = logf.IntoContext(ctx, log)

client, err := db.Dial(ctx, cnf)
if err != nil {
Expand All @@ -58,6 +56,7 @@ func HealthCheckMongosLiveness(ctx context.Context, cnf *db.Config) (err error)

func HealthCheckMongodLiveness(ctx context.Context, cnf *db.Config, startupDelaySeconds int64) (_ *mongo.MemberState, err error) {
log := logf.FromContext(ctx).WithName("HealthCheckMongodLiveness")
ctx = logf.IntoContext(ctx, log)

client, err := db.Dial(ctx, cnf)
if err != nil {
Expand All @@ -74,50 +73,14 @@ func HealthCheckMongodLiveness(ctx context.Context, cnf *db.Config, startupDelay
return nil, errors.Wrap(err, "get isMaster response")
}

buildInfo, err := client.RSBuildInfo(ctx)
rsStatus, err := client.RSStatus(ctx)
if err != nil {
return nil, errors.Wrap(err, "get buildInfo response")
}

replSetStatusCommand := bson.D{{Key: "replSetGetStatus", Value: 1}}
mongoVersion := v.Must(v.NewVersion(buildInfo.Version))
if mongoVersion.Compare(v.Must(v.NewVersion("4.2.1"))) < 0 {
// https://docs.mongodb.com/manual/reference/command/replSetGetStatus/#syntax
replSetStatusCommand = append(replSetStatusCommand, primitive.E{Key: "initialSync", Value: 1})
}

res := client.Database("admin").RunCommand(ctx, replSetStatusCommand)
if res.Err() != nil {
// if we come this far, it means db connection was successful
// standalone mongod nodes in an unmanaged cluster doesn't need
// to die before they added to a replset
if res.Err().Error() == ErrNoReplsetConfigStr {
if err.Error() == ErrNoReplsetConfigStr {
state := mongo.MemberStateUnknown
log.V(1).Info("replSetGetStatus failed", "err", res.Err().Error(), "state", state)
log.V(1).Info("replSetGetStatus failed", "err", err.Error(), "state", state)
return &state, nil
}
return nil, errors.Wrap(res.Err(), "get replsetGetStatus response")
}

// this is a workaround to fix decoding of empty interfaces
// https://jira.mongodb.org/browse/GODRIVER-988
rsStatus := ReplSetStatus{}
tempResult := bson.M{}
err = res.Decode(&tempResult)
if err != nil {
return nil, errors.Wrap(err, "decode replsetGetStatus response")
}

if err == nil {
result, err := json.Marshal(tempResult)
if err != nil {
return nil, errors.Wrap(err, "marshal temp result")
}

err = json.Unmarshal(result, &rsStatus)
if err != nil {
return nil, errors.Wrap(err, "unmarshal temp result")
}
return nil, errors.Wrap(err, "get replSetGetStatus response")
}

oplogRs := OplogRs{}
Expand Down Expand Up @@ -156,14 +119,10 @@ type OplogRs struct {
StorageSize int64 `bson:"storageSize" json:"storageSize"`
}

type ReplSetStatus struct {
InitialSyncStatus InitialSyncStatus `bson:"initialSyncStatus" json:"initialSyncStatus"`
mongo.Status `bson:",inline"`
}

type InitialSyncStatus interface{}

func CheckState(rs ReplSetStatus, startupDelaySeconds int64, oplogSize int64) error {
func CheckState(rs mongo.Status, startupDelaySeconds int64, oplogSize int64) error {
if rs.GetSelf() == nil {
return errors.New("invalid replset status")
}
uptime := rs.GetSelf().Uptime

switch rs.MyState {
Expand Down
37 changes: 35 additions & 2 deletions cmd/mongodb-healthcheck/healthcheck/readiness.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package healthcheck
import (
"context"
"net"
"time"

"github.com/pkg/errors"
"go.mongodb.org/mongo-driver/bson"
Expand All @@ -27,21 +28,53 @@ import (
)

// MongodReadinessCheck runs a ping on a pmgo.SessionManager to check server readiness
func MongodReadinessCheck(ctx context.Context, addr string) error {
func MongodReadinessCheck(ctx context.Context, cnf *db.Config) error {
log := logf.FromContext(ctx).WithName("MongodReadinessCheck")
ctx = logf.IntoContext(ctx, log)

var d net.Dialer

addr := cnf.Hosts[0]
log.V(1).Info("Connecting to " + addr)
conn, err := d.DialContext(ctx, "tcp", addr)
if err != nil {
return errors.Wrap(err, "dial")
}
return conn.Close()
if err := conn.Close(); err != nil {
return err
}

s, err := func() (*mongo.Status, error) {
cnf.Timeout = time.Second
client, err := db.Dial(ctx, cnf)
if err != nil {
return nil, nil
}
defer func() {
if derr := client.Disconnect(ctx); derr != nil && err == nil {
err = errors.Wrap(derr, "failed to disconnect")
}
}()
rs, err := client.RSStatus(ctx)
if err != nil {
return nil, err
}
return &rs, nil
}()
if err != nil || s == nil {
return err
}

if err := CheckState(*s, 0, 0); err != nil {
return errors.Wrap(err, "check state")
}

return nil
}

func MongosReadinessCheck(ctx context.Context, cnf *db.Config) (err error) {
log := logf.FromContext(ctx).WithName("MongosReadinessCheck")
ctx = logf.IntoContext(ctx, log)

client, err := db.Dial(ctx, cnf)
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion cmd/mongodb-healthcheck/tool/tool.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ func (app *App) Run(ctx context.Context) error {
switch *component {

case "mongod":
err := healthcheck.MongodReadinessCheck(ctx, cnf.Hosts[0])
err := healthcheck.MongodReadinessCheck(ctx, cnf)
if err != nil {
return errors.Wrap(err, "member failed Kubernetes readiness check")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,12 @@ spec:
- readiness
- --component
- mongod
- --ssl
- --sslInsecure
- --sslCAFile
- /etc/mongodb-ssl/ca.crt
- --sslPEMKeyFile
- /tmp/tls.pem
failureThreshold: 8
initialDelaySeconds: 10
periodSeconds: 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,12 @@ spec:
- readiness
- --component
- mongod
- --ssl
- --sslInsecure
- --sslCAFile
- /etc/mongodb-ssl/ca.crt
- --sslPEMKeyFile
- /tmp/tls.pem
failureThreshold: 8
initialDelaySeconds: 10
periodSeconds: 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,12 @@ spec:
- readiness
- --component
- mongod
- --ssl
- --sslInsecure
- --sslCAFile
- /etc/mongodb-ssl/ca.crt
- --sslPEMKeyFile
- /tmp/tls.pem
failureThreshold: 8
initialDelaySeconds: 10
periodSeconds: 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,12 @@ spec:
- readiness
- --component
- mongod
- --ssl
- --sslInsecure
- --sslCAFile
- /etc/mongodb-ssl/ca.crt
- --sslPEMKeyFile
- /tmp/tls.pem
failureThreshold: 8
initialDelaySeconds: 10
periodSeconds: 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,12 @@ spec:
- readiness
- --component
- mongod
- --ssl
- --sslInsecure
- --sslCAFile
- /etc/mongodb-ssl/ca.crt
- --sslPEMKeyFile
- /tmp/tls.pem
failureThreshold: 3
initialDelaySeconds: 10
periodSeconds: 3
Expand Down
6 changes: 6 additions & 0 deletions e2e-tests/custom-tls/compare/statefulset_some-name-cfg.yml
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,12 @@ spec:
- readiness
- --component
- mongod
- --ssl
- --sslInsecure
- --sslCAFile
- /etc/mongodb-ssl/ca.crt
- --sslPEMKeyFile
- /tmp/tls.pem
failureThreshold: 3
initialDelaySeconds: 10
periodSeconds: 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,12 @@ spec:
- readiness
- --component
- mongod
- --ssl
- --sslInsecure
- --sslCAFile
- /etc/mongodb-ssl/ca.crt
- --sslPEMKeyFile
- /tmp/tls.pem
failureThreshold: 8
initialDelaySeconds: 10
periodSeconds: 3
Expand Down
6 changes: 6 additions & 0 deletions e2e-tests/custom-tls/compare/statefulset_some-name-rs0.yml
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,12 @@ spec:
- readiness
- --component
- mongod
- --ssl
- --sslInsecure
- --sslCAFile
- /etc/mongodb-ssl/ca.crt
- --sslPEMKeyFile
- /tmp/tls.pem
failureThreshold: 8
initialDelaySeconds: 10
periodSeconds: 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,12 @@ spec:
- readiness
- --component
- mongod
- --ssl
- --sslInsecure
- --sslCAFile
- /etc/mongodb-ssl/ca.crt
- --sslPEMKeyFile
- /tmp/tls.pem
failureThreshold: 3
initialDelaySeconds: 10
periodSeconds: 3
Expand Down
Loading
Loading