Skip to content

Commit 1565b2c

Browse files
authored
Pause-resume-frozen functionality (#168)
1 parent c5e82c8 commit 1565b2c

File tree

27 files changed

+807
-251
lines changed

27 files changed

+807
-251
lines changed

.github/workflows/run-tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ jobs:
165165
kind load docker-image cr.yandex/crptqonuodf51kdj7a7d/ydb:22.4.44
166166
- name: run-tests
167167
run: |
168-
go test -timeout 1800s -p 1 ./...
168+
go test -v -timeout 1800s -p 1 ./... -args -ginkgo.v
169169
- name: teardown-k8s-cluster
170170
run: |
171171
kind delete cluster

.golangci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ linters:
169169
- nestif
170170
# - nilnil
171171
# - nlreturn
172-
- nolintlint
172+
# - nolintlint
173173
# - prealloc
174174
- predeclared
175175
- rowserrcheck

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
[![check-pr](https://github.com/ydb-platform/ydb-kubernetes-operator/actions/workflows/check-pr.yml/badge.svg)](https://github.com/ydb-platform/ydb-kubernetes-operator/actions/workflows/check-pr.yml)
2+
[![upload-artifacts](https://github.com/ydb-platform/ydb-kubernetes-operator/actions/workflows/upload-artifacts.yml/badge.svg)](https://github.com/ydb-platform/ydb-kubernetes-operator/actions/workflows/upload-artifacts.yml)
3+
14
# YDB Kubernetes Operator
25

36
The YDB Kubernetes operator deploys and manages YDB resources in a Kubernetes cluster.

api/v1alpha1/database_types.go

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ package v1alpha1
33
import (
44
corev1 "k8s.io/api/core/v1"
55
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
6+
7+
"github.com/ydb-platform/ydb-kubernetes-operator/internal/controllers/constants"
68
)
79

810
// DatabaseSpec defines the desired state of Database
@@ -29,7 +31,7 @@ type DatabaseSpec struct {
2931
Encryption *EncryptionConfig `json:"encryption,omitempty"`
3032

3133
// Additional volumes that will be mounted into the well-known directory of
32-
// every storage pod. Directiry: `/opt/ydb/volumes/<volume_name>`.
34+
// every storage pod. Directory: `/opt/ydb/volumes/<volume_name>`.
3335
// Only `hostPath` volume type is supported for now.
3436
// +optional
3537
Volumes []*corev1.Volume `json:"volumes,omitempty"`
@@ -38,6 +40,21 @@ type DatabaseSpec struct {
3840
// +optional
3941
Datastreams *DatastreamsConfig `json:"datastreams,omitempty"`
4042

43+
// The state of the Database processes.
44+
// `true` means all the Database Pods are being killed, but the Database resource is persisted.
45+
// `false` means the default state of the system, all Pods running.
46+
// +kubebuilder:default:=false
47+
// +optional
48+
Pause bool `json:"pause"`
49+
50+
// Enables or disables operator's reconcile loop.
51+
// `false` means all the Pods are running, but the reconcile is effectively turned off.
52+
// `true` means the default state of the system, all Pods running, operator reacts
53+
// to specification change of this Database resource.
54+
// +kubebuilder:default:=true
55+
// +optional
56+
OperatorSync bool `json:"operatorSync"`
57+
4158
// (Optional) Name of the root storage domain
4259
// Default: root
4360
// +kubebuilder:validation:Pattern:=[a-zA-Z0-9]([-_a-zA-Z0-9]*[a-zA-Z0-9])?
@@ -181,8 +198,8 @@ type StorageUnit struct {
181198

182199
// DatabaseStatus defines the observed state of Database
183200
type DatabaseStatus struct {
184-
State string `json:"state"`
185-
Conditions []metav1.Condition `json:"conditions,omitempty"`
201+
State constants.ClusterState `json:"state"`
202+
Conditions []metav1.Condition `json:"conditions,omitempty"`
186203
}
187204

188205
//+kubebuilder:object:root=true

api/v1alpha1/database_webhook.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ import (
1010
ctrl "sigs.k8s.io/controller-runtime"
1111
logf "sigs.k8s.io/controller-runtime/pkg/log"
1212
"sigs.k8s.io/controller-runtime/pkg/webhook"
13+
14+
. "github.com/ydb-platform/ydb-kubernetes-operator/internal/controllers/constants" //nolint:revive,stylecheck
1315
)
1416

1517
// log is for logging in this package.
@@ -156,5 +158,8 @@ func (r *Database) ValidateUpdate(old runtime.Object) error {
156158
}
157159

158160
func (r *Database) ValidateDelete() error {
161+
if r.Status.State != DatabasePaused {
162+
return fmt.Errorf("database deletion is only possible from `Paused` state, current state %v", r.Status.State)
163+
}
159164
return nil
160165
}

api/v1alpha1/storage_types.go

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ package v1alpha1
33
import (
44
corev1 "k8s.io/api/core/v1"
55
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
6+
7+
"github.com/ydb-platform/ydb-kubernetes-operator/internal/controllers/constants"
68
)
79

810
// StorageSpec defines the desired state of Storage
@@ -37,6 +39,21 @@ type StorageSpec struct {
3739
// +optional
3840
Service StorageServices `json:"service,omitempty"`
3941

42+
// The state of the Storage processes.
43+
// `true` means all the Storage Pods are being killed, but the Storage resource is persisted.
44+
// `false` means the default state of the system, all Pods running.
45+
// +kubebuilder:default:=false
46+
// +optional
47+
Pause bool `json:"pause"`
48+
49+
// Enables or disables operator's reconcile loop.
50+
// `false` means all the Pods are running, but the reconcile is effectively turned off.
51+
// `true` means the default state of the system, all Pods running, operator reacts
52+
// to specification change of this Storage resource.
53+
// +kubebuilder:default:=true
54+
// +optional
55+
OperatorSync bool `json:"operatorSync"`
56+
4057
// (Optional) Name of the root storage domain
4158
// Default: root
4259
// +kubebuilder:validation:Pattern:=[a-zA-Z0-9]([-_a-zA-Z0-9]*[a-zA-Z0-9])?
@@ -92,7 +109,7 @@ type StorageSpec struct {
92109
Secrets []*corev1.LocalObjectReference `json:"secrets,omitempty"`
93110

94111
// Additional volumes that will be mounted into the well-known directory of
95-
// every storage pod. Directiry: `/opt/ydb/volumes/<volume_name>`.
112+
// every storage pod. Directory: `/opt/ydb/volumes/<volume_name>`.
96113
// Only `hostPath` volume type is supported for now.
97114
// +optional
98115
Volumes []*corev1.Volume `json:"volumes,omitempty"`
@@ -142,8 +159,8 @@ type StorageSpec struct {
142159

143160
// StorageStatus defines the observed state of Storage
144161
type StorageStatus struct {
145-
State string `json:"state"`
146-
Conditions []metav1.Condition `json:"conditions,omitempty"`
162+
State constants.ClusterState `json:"state"`
163+
Conditions []metav1.Condition `json:"conditions,omitempty"`
147164
}
148165

149166
//+kubebuilder:object:root=true

api/v1alpha1/storage_webhook.go

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,17 @@ package v1alpha1
33
import (
44
"fmt"
55

6+
"github.com/google/go-cmp/cmp"
7+
"github.com/google/go-cmp/cmp/cmpopts"
68
"gopkg.in/yaml.v3"
79
v1 "k8s.io/api/core/v1"
810
"k8s.io/apimachinery/pkg/runtime"
911
"k8s.io/utils/strings/slices"
1012
ctrl "sigs.k8s.io/controller-runtime"
1113
logf "sigs.k8s.io/controller-runtime/pkg/log"
1214
"sigs.k8s.io/controller-runtime/pkg/webhook"
15+
16+
. "github.com/ydb-platform/ydb-kubernetes-operator/internal/controllers/constants" //nolint:revive,stylecheck
1317
)
1418

1519
// log is for logging in this package.
@@ -143,6 +147,21 @@ func (r *Storage) ValidateCreate() error {
143147
return nil
144148
}
145149

150+
func hasUpdatesBesidesFrozen(oldStorage, newStorage *Storage) (bool, string) {
151+
oldStorageCopy := oldStorage.DeepCopy()
152+
newStorageCopy := newStorage.DeepCopy()
153+
154+
// If we set Frozen field to the same value,
155+
// the remaining diff must be empty.
156+
oldStorageCopy.Spec.OperatorSync = false
157+
newStorageCopy.Spec.OperatorSync = false
158+
159+
ignoreNonSpecFields := cmpopts.IgnoreFields(Storage{}, "Status", "ObjectMeta", "TypeMeta")
160+
161+
diff := cmp.Diff(oldStorageCopy, newStorageCopy, ignoreNonSpecFields)
162+
return diff != "", diff
163+
}
164+
146165
// ValidateUpdate implements webhook.Validator so a webhook will be registered for the type
147166
func (r *Storage) ValidateUpdate(old runtime.Object) error {
148167
storagelog.Info("validate update", "name", r.Name)
@@ -153,6 +172,24 @@ func (r *Storage) ValidateUpdate(old runtime.Object) error {
153172
return fmt.Errorf("failed to parse Storage.spec.configuration, error: %w", err)
154173
}
155174

175+
if !r.Spec.OperatorSync {
176+
oldStorage := old.(*Storage)
177+
178+
hasIllegalUpdates, diff := hasUpdatesBesidesFrozen(old.(*Storage), r)
179+
180+
if hasIllegalUpdates {
181+
if oldStorage.Spec.OperatorSync {
182+
return fmt.Errorf(
183+
"it is illegal to update spec.OperatorSync and any other "+
184+
"spec fields at the same time. Here is what you else tried to update: %s", diff)
185+
}
186+
187+
return fmt.Errorf(
188+
"it is illegal to update any spec fields when spec.OperatorSync is false. "+
189+
"Here is what you else tried to update: %s", diff)
190+
}
191+
}
192+
156193
yamlConfig := PartialYamlConfig{}
157194
err = yaml.Unmarshal([]byte(r.Spec.Configuration), &yamlConfig)
158195
if err != nil {
@@ -165,18 +202,20 @@ func (r *Storage) ValidateUpdate(old runtime.Object) error {
165202
}
166203

167204
if (authEnabled && r.Spec.OperatorConnection == nil) || (!authEnabled && r.Spec.OperatorConnection != nil) {
168-
return fmt.Errorf("field 'spec.operatorConnection' does not satisfy with config option `enforce_user_token_requirement: %t`", authEnabled)
205+
return fmt.Errorf("field 'spec.operatorConnection' does not align with config option `enforce_user_token_requirement: %t`", authEnabled)
169206
}
170207

171208
crdCheckError := checkMonitoringCRD(manager, storagelog, r.Spec.Monitoring != nil)
172209
if crdCheckError != nil {
173210
return crdCheckError
174211
}
175212

176-
// TODO(user): fill in your validation logic upon object update.
177213
return nil
178214
}
179215

180216
func (r *Storage) ValidateDelete() error {
217+
if r.Status.State != StoragePaused {
218+
return fmt.Errorf("storage deletion is only possible from `Paused` state, current state %v", r.Status.State)
219+
}
181220
return nil
182221
}

deploy/ydb-operator/crds/database.yaml

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2279,12 +2279,27 @@ spec:
22792279
description: Number of nodes (pods) in the cluster
22802280
format: int32
22812281
type: integer
2282+
operatorSync:
2283+
default: true
2284+
description: Enables or disables operator's reconcile loop. `false`
2285+
means all the Pods are running, but the reconcile is effectively
2286+
turned off. `true` means the default state of the system, all Pods
2287+
running, operator reacts to specification change of this Database
2288+
resource.
2289+
type: boolean
22822290
path:
22832291
description: '(Optional) Custom database path in schemeshard Default:
22842292
/<spec.domain>/<metadata.name>'
22852293
maxLength: 255
22862294
pattern: /[a-zA-Z0-9]([-_a-zA-Z0-9]*[a-zA-Z0-9])?/[a-zA-Z0-9]([-_a-zA-Z0-9]*[a-zA-Z0-9])?(/[a-zA-Z0-9]([-_a-zA-Z0-9]*[a-zA-Z0-9])?)*
22872295
type: string
2296+
pause:
2297+
default: false
2298+
description: The state of the Database processes. `true` means all
2299+
the Database Pods are being killed, but the Database resource is
2300+
persisted. `false` means the default state of the system, all Pods
2301+
running.
2302+
type: boolean
22882303
priorityClassName:
22892304
description: (Optional) If specified, the pod's priorityClassName.
22902305
type: string
@@ -2988,7 +3003,7 @@ spec:
29883003
type: string
29893004
volumes:
29903005
description: 'Additional volumes that will be mounted into the well-known
2991-
directory of every storage pod. Directiry: `/opt/ydb/volumes/<volume_name>`.
3006+
directory of every storage pod. Directory: `/opt/ydb/volumes/<volume_name>`.
29923007
Only `hostPath` volume type is supported for now.'
29933008
items:
29943009
description: Volume represents a named volume in a pod that may

deploy/ydb-operator/crds/storage.yaml

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2503,6 +2503,20 @@ spec:
25032503
- username
25042504
type: object
25052505
type: object
2506+
operatorSync:
2507+
default: true
2508+
description: Enables or disables operator's reconcile loop. `false`
2509+
means all the Pods are running, but the reconcile is effectively
2510+
turned off. `true` means the default state of the system, all Pods
2511+
running, operator reacts to specification change of this Storage
2512+
resource.
2513+
type: boolean
2514+
pause:
2515+
default: false
2516+
description: The state of the Storage processes. `true` means all
2517+
the Storage Pods are being killed, but the Storage resource is persisted.
2518+
`false` means the default state of the system, all Pods running.
2519+
type: boolean
25062520
priorityClassName:
25072521
description: (Optional) If specified, the pod's priorityClassName.
25082522
type: string
@@ -2985,7 +2999,7 @@ spec:
29852999
type: string
29863000
volumes:
29873001
description: 'Additional volumes that will be mounted into the well-known
2988-
directory of every storage pod. Directiry: `/opt/ydb/volumes/<volume_name>`.
3002+
directory of every storage pod. Directory: `/opt/ydb/volumes/<volume_name>`.
29893003
Only `hostPath` volume type is supported for now.'
29903004
items:
29913005
description: Volume represents a named volume in a pod that may

0 commit comments

Comments
 (0)