Skip to content

Commit 724f205

Browse files
committed
feat: add optional healtcheck for provider plugins
Signed-off-by: Anish Ramasekar <anish.ramasekar@gmail.com>
1 parent 73b7af5 commit 724f205

File tree

9 files changed

+153
-0
lines changed

9 files changed

+153
-0
lines changed

cmd/secrets-store-csi-driver/main.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ var (
6464
// This feature flag will be enabled by default after n+2 releases giving time for users to label all their existing credential secrets.
6565
filteredWatchSecret = flag.Bool("filtered-watch-secret", false, "enable filtered watch for NodePublishSecretRef secrets with label secrets-store.csi.k8s.io/used=true")
6666

67+
// Enable optional healthcheck for provider clients that exist in memory
68+
providerHealthCheck = flag.Bool("provider-health-check", false, "Enable health check for configured providers")
69+
providerHealthCheckInterval = flag.Duration("provider-health-check-interval", 2*time.Minute, "Provider healthcheck interval duration")
70+
6771
scheme = runtime.NewScheme()
6872
)
6973

@@ -147,6 +151,12 @@ func main() {
147151
providerClients := secretsstore.NewPluginClientBuilder(*providerVolumePath)
148152
defer providerClients.Cleanup()
149153

154+
// enable provider health check
155+
if *providerHealthCheck {
156+
klog.InfoS("provider health check enabled", "interval", *providerHealthCheckInterval)
157+
go providerClients.HealthCheck(ctx, *providerHealthCheckInterval)
158+
}
159+
150160
go func() {
151161
klog.Infof("starting manager")
152162
if err := mgr.Start(ctx); err != nil {

manifest_staging/charts/secrets-store-csi-driver/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,3 +83,5 @@ The following table lists the configurable parameters of the csi-secrets-store-p
8383
| `enableSecretRotation` | Enable secret rotation feature [alpha] | `false` |
8484
| `rotationPollInterval` | Secret rotation poll interval duration | `"120s"` |
8585
| `filteredWatchSecret` | Enable filtered watch for NodePublishSecretRef secrets with label `secrets-store.csi.k8s.io/used=true` | `false` |
86+
| `providerHealthCheck` | Enable health check for configured providers | `false` |
87+
| `providerHealthCheckInterval` | Provider healthcheck interval duration | `2m` |

manifest_staging/charts/secrets-store-csi-driver/templates/secrets-store-csi-driver-windows.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,12 @@ spec:
7676
{{- if and (semverCompare ">= v0.0.21-0" .Values.windows.image.tag) .Values.filteredWatchSecret }}
7777
- "--filtered-watch-secret={{ .Values.filteredWatchSecret }}"
7878
{{- end }}
79+
{{- if and (semverCompare ">= v0.0.22-0" .Values.windows.image.tag) .Values.providerHealthCheck }}
80+
- "--provider-health-check={{ .Values.providerHealthCheck }}"
81+
{{- end }}
82+
{{- if and (semverCompare ">= v0.0.22-0" .Values.windows.image.tag) .Values.providerHealthCheckInterval }}
83+
- "--provider-health-check-interval={{ .Values.providerHealthCheckInterval }}"
84+
{{- end }}
7985
env:
8086
{{- with .Values.windows.env }}
8187
{{- toYaml . | nindent 10 }}

manifest_staging/charts/secrets-store-csi-driver/templates/secrets-store-csi-driver.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,12 @@ spec:
7676
{{- if and (semverCompare ">= v0.0.21-0" .Values.linux.image.tag) .Values.filteredWatchSecret }}
7777
- "--filtered-watch-secret={{ .Values.filteredWatchSecret }}"
7878
{{- end }}
79+
{{- if and (semverCompare ">= v0.0.22-0" .Values.linux.image.tag) .Values.providerHealthCheck }}
80+
- "--provider-health-check={{ .Values.providerHealthCheck }}"
81+
{{- end }}
82+
{{- if and (semverCompare ">= v0.0.22-0" .Values.linux.image.tag) .Values.providerHealthCheckInterval }}
83+
- "--provider-health-check-interval={{ .Values.providerHealthCheckInterval }}"
84+
{{- end }}
7985
env:
8086
{{- with .Values.linux.env }}
8187
{{- toYaml . | nindent 10 }}

manifest_staging/charts/secrets-store-csi-driver/values.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,3 +152,9 @@ rotationPollInterval:
152152

153153
## Filtered watch nodePublishSecretRef secrets
154154
filteredWatchSecret: false
155+
156+
## Provider HealthCheck
157+
providerHealthCheck: false
158+
159+
## Provider HealthCheck interval
160+
providerHealthCheckInterval: 2m

manifest_staging/deploy/secrets-store-csi-driver-windows.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ spec:
5151
- "--enable-secret-rotation=false"
5252
- "--rotation-poll-interval=2m"
5353
- "--filtered-watch-secret=false"
54+
- "--provider-health-check=false"
55+
- "--provider-health-check-interval=2m"
5456
env:
5557
- name: CSI_ENDPOINT
5658
value: unix://C:\\csi\\csi.sock

manifest_staging/deploy/secrets-store-csi-driver.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ spec:
5151
- "--enable-secret-rotation=false"
5252
- "--rotation-poll-interval=2m"
5353
- "--filtered-watch-secret=false"
54+
- "--provider-health-check=false"
55+
- "--provider-health-check-interval=2m"
5456
env:
5557
- name: CSI_ENDPOINT
5658
value: unix:///csi/csi.sock

pkg/secrets-store/provider_client.go

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
"os"
2525
"regexp"
2626
"sync"
27+
"time"
2728

2829
"google.golang.org/grpc"
2930
"google.golang.org/protobuf/proto"
@@ -154,6 +155,38 @@ func (p *PluginClientBuilder) Cleanup() {
154155
p.conns = make(map[string]*grpc.ClientConn)
155156
}
156157

158+
// HealthCheck enables periodic healthcheck for configured provider clients by making
159+
// a Version() RPC call. If the provider healthcheck fails, we log an error.
160+
//
161+
// This method blocks until the parent context is cancelled during termination.
162+
func (p *PluginClientBuilder) HealthCheck(ctx context.Context, interval time.Duration) {
163+
ticker := time.NewTicker(interval)
164+
defer ticker.Stop()
165+
166+
for {
167+
select {
168+
case <-ctx.Done():
169+
return
170+
case <-ticker.C:
171+
p.lock.RLock()
172+
173+
for provider, client := range p.clients {
174+
c, cancel := context.WithTimeout(ctx, 5*time.Second)
175+
defer cancel()
176+
177+
runtimeVersion, err := Version(c, client)
178+
if err != nil {
179+
klog.V(4).ErrorS(err, "provider healthcheck failed", "provider", provider)
180+
continue
181+
}
182+
klog.V(4).InfoS("provider healthcheck successful", "provider", provider, "runtimeVersion", runtimeVersion)
183+
}
184+
185+
p.lock.RUnlock()
186+
}
187+
}
188+
}
189+
157190
// MountContent calls the client's Mount() RPC with helpers to format the
158191
// request and interpret the response.
159192
func MountContent(ctx context.Context, client v1alpha1.CSIDriverProviderClient, attributes, secrets, targetPath, permission string, oldObjectVersions map[string]string) (map[string]string, string, error) {
@@ -208,3 +241,17 @@ func MountContent(ctx context.Context, client v1alpha1.CSIDriverProviderClient,
208241

209242
return objectVersions, "", nil
210243
}
244+
245+
// Version calls the client's Version() RPC
246+
// returns provider runtime version and error.
247+
func Version(ctx context.Context, client v1alpha1.CSIDriverProviderClient) (string, error) {
248+
req := &v1alpha1.VersionRequest{
249+
Version: "v1alpha1",
250+
}
251+
252+
resp, err := client.Version(ctx, req)
253+
if err != nil {
254+
return "", err
255+
}
256+
return resp.RuntimeVersion, nil
257+
}

pkg/secrets-store/provider_client_test.go

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"reflect"
2626
"sync"
2727
"testing"
28+
"time"
2829

2930
"github.com/google/go-cmp/cmp"
3031

@@ -334,3 +335,74 @@ func TestPluginClientBuilderErrorInvalid(t *testing.T) {
334335
t.Errorf("Get(%s) = %v, want %v", "bad/provider/name", err, ErrInvalidProvider)
335336
}
336337
}
338+
339+
func TestVersion(t *testing.T) {
340+
cases := []struct {
341+
name string
342+
expectedRuntimeVersion string
343+
}{
344+
{
345+
name: "provider version successful response",
346+
expectedRuntimeVersion: "0.0.10",
347+
},
348+
}
349+
350+
for _, test := range cases {
351+
t.Run(test.name, func(t *testing.T) {
352+
socketPath := tmpdir.New(t, "", "ut")
353+
354+
pool := NewPluginClientBuilder(socketPath)
355+
defer pool.Cleanup()
356+
357+
server, cleanup := fakeServer(t, socketPath, "provider1")
358+
defer cleanup()
359+
360+
server.Start()
361+
362+
client, err := pool.Get(context.Background(), "provider1")
363+
if err != nil {
364+
t.Fatalf("expected err to be nil, got: %+v", err)
365+
}
366+
367+
runtimeVersion, err := Version(context.TODO(), client)
368+
if err != nil {
369+
t.Errorf("expected err to be nil, got: %+v", err)
370+
}
371+
if test.expectedRuntimeVersion != runtimeVersion {
372+
t.Errorf("expected version: %s, got: %s", test.expectedRuntimeVersion, runtimeVersion)
373+
}
374+
})
375+
}
376+
}
377+
378+
func TestPluginClientBuilder_HealthCheck(t *testing.T) {
379+
// this test asserts the read lock and unlock semantics in the
380+
// HealthCheck() method work as expected
381+
path := tmpdir.New(t, "", "ut")
382+
383+
cb := NewPluginClientBuilder(path)
384+
ctx := context.Background()
385+
healthCheckInterval := 1 * time.Millisecond
386+
387+
provider := "server"
388+
server, cleanup := fakeServer(t, path, provider)
389+
defer cleanup()
390+
server.Start()
391+
392+
// run the provider healthcheck
393+
go cb.HealthCheck(ctx, healthCheckInterval)
394+
var wg sync.WaitGroup
395+
396+
// try a concurrent get with the healthcheck running in the background
397+
for i := 0; i < 10; i++ {
398+
wg.Add(1)
399+
go func() {
400+
defer wg.Done()
401+
if _, err := cb.Get(ctx, provider); err != nil {
402+
t.Errorf("Get(%q) = %v, want nil", provider, err)
403+
}
404+
}()
405+
}
406+
407+
wg.Wait()
408+
}

0 commit comments

Comments
 (0)