Skip to content

Commit 84026fe

Browse files
committed
adding new pod-count test to the observability suite
Signed-off-by: Adam D. Cornett <adc@redhat.com>
1 parent 759156e commit 84026fe

File tree

10 files changed

+130
-18
lines changed

10 files changed

+130
-18
lines changed

CATALOG.md

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ Depending on the workload type, not all tests are required to pass to satisfy be
77

88
## Test cases summary
99

10-
### Total test cases: 119
10+
### Total test cases: 120
1111

1212
### Total suites: 10
1313

@@ -18,7 +18,7 @@ Depending on the workload type, not all tests are required to pass to satisfy be
1818
|lifecycle|18|
1919
|manageability|2|
2020
|networking|12|
21-
|observability|5|
21+
|observability|6|
2222
|operator|12|
2323
|performance|6|
2424
|platform-alteration|14|
@@ -36,11 +36,11 @@ Depending on the workload type, not all tests are required to pass to satisfy be
3636
|---|---|
3737
|8|1|
3838

39-
### Non-Telco specific tests only: 70
39+
### Non-Telco specific tests only: 71
4040

4141
|Mandatory|Optional|
4242
|---|---|
43-
|43|27|
43+
|43|28|
4444

4545
### Telco specific tests only: 27
4646

@@ -1203,6 +1203,23 @@ Test Cases are the specifications used to perform a meaningful test. Test cases
12031203
|Non-Telco|Mandatory|
12041204
|Telco|Mandatory|
12051205

1206+
#### observability-pod-count
1207+
1208+
|Property|Description|
1209+
|---|---|
1210+
|Unique ID|observability-pod-count|
1211+
|Description|Checks that all pods running at the beginning of the tests, continue to run throughout the test|
1212+
|Suggested Remediation|Ensure all expected pods are running|
1213+
|Best Practice Reference|https://redhat-best-practices-for-k8s.github.io/guide/#observability-pod-count|
1214+
|Exception Process|No exceptions|
1215+
|Impact Statement|Inconsistency of running pods can cause instability of the application.|
1216+
|Tags|common,observability|
1217+
|**Scenario**|**Optional/Mandatory**|
1218+
|Extended|Optional|
1219+
|Far-Edge|Optional|
1220+
|Non-Telco|Optional|
1221+
|Telco|Optional|
1222+
12061223
#### observability-pod-disruption-budget
12071224

12081225
|Property|Description|

cmd/certsuite/main_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ func TestCertsuiteInfoCmd(t *testing.T) {
6767
| observability-termination-policy |
6868
| observability-pod-disruption-budget |
6969
| observability-compatibility-with-next-ocp-release |
70+
| observability-pod-count |
7071
------------------------------------------------------------
7172
`
7273
assert.Equal(t, expectedOutput, string(out))

expected_results.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ testCases:
5656
- observability-pod-disruption-budget
5757
- observability-compatibility-with-next-ocp-release
5858
- observability-termination-policy
59+
- observability-pod-count
5960
- operator-crd-versioning
6061
- operator-crd-openapi-schema
6162
- operator-install-source

pkg/autodiscover/autodiscover.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ var data = DiscoveredTestData{}
129129
const labelRegex = `(\S*)\s*:\s*(\S*)`
130130
const labelRegexMatches = 3
131131

132-
func createLabels(labelStrings []string) (labelObjects []labelObject) {
132+
func CreateLabels(labelStrings []string) (labelObjects []labelObject) {
133133
for _, label := range labelStrings {
134134
r := regexp.MustCompile(labelRegex)
135135

@@ -158,8 +158,8 @@ func DoAutoDiscover(config *configuration.TestConfiguration) DiscoveredTestData
158158
log.Fatal("Failed to retrieve storageClasses - err: %v", err)
159159
}
160160

161-
podsUnderTestLabelsObjects := createLabels(config.PodsUnderTestLabels)
162-
operatorsUnderTestLabelsObjects := createLabels(config.OperatorsUnderTestLabels)
161+
podsUnderTestLabelsObjects := CreateLabels(config.PodsUnderTestLabels)
162+
operatorsUnderTestLabelsObjects := CreateLabels(config.OperatorsUnderTestLabels)
163163

164164
log.Debug("Pods under test labels: %+v", podsUnderTestLabelsObjects)
165165
log.Debug("Operators under test labels: %+v", operatorsUnderTestLabelsObjects)
@@ -180,11 +180,11 @@ func DoAutoDiscover(config *configuration.TestConfiguration) DiscoveredTestData
180180
data.AllPackageManifests = getAllPackageManifests(oc.OlmPkgClient.PackageManifests(""))
181181

182182
data.Namespaces = namespacesListToStringList(config.TargetNameSpaces)
183-
data.Pods, data.AllPods = findPodsByLabels(oc.K8sClient.CoreV1(), podsUnderTestLabelsObjects, data.Namespaces)
183+
data.Pods, data.AllPods = FindPodsByLabels(oc.K8sClient.CoreV1(), podsUnderTestLabelsObjects, data.Namespaces)
184184
data.AbnormalEvents = findAbnormalEvents(oc.K8sClient.CoreV1(), data.Namespaces)
185185
probeLabels := []labelObject{{LabelKey: probeHelperPodsLabelName, LabelValue: probeHelperPodsLabelValue}}
186186
probeNS := []string{config.ProbeDaemonSetNamespace}
187-
data.ProbePods, _ = findPodsByLabels(oc.K8sClient.CoreV1(), probeLabels, probeNS)
187+
data.ProbePods, _ = FindPodsByLabels(oc.K8sClient.CoreV1(), probeLabels, probeNS)
188188
data.ResourceQuotaItems, err = getResourceQuotas(oc.K8sClient.CoreV1())
189189
if err != nil {
190190
log.Fatal("Cannot get resource quotas, err: %v", err)
@@ -222,7 +222,7 @@ func DoAutoDiscover(config *configuration.TestConfiguration) DiscoveredTestData
222222
}
223223

224224
// Best effort mode autodiscovery for operand (running-only) pods.
225-
pods, _ := findPodsByLabels(oc.K8sClient.CoreV1(), nil, data.Namespaces)
225+
pods, _ := FindPodsByLabels(oc.K8sClient.CoreV1(), nil, data.Namespaces)
226226
if err != nil {
227227
log.Fatal("Failed to get running pods, err: %v", err)
228228
}

pkg/autodiscover/autodiscover_pods.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ func findPodsMatchingAtLeastOneLabel(oc corev1client.CoreV1Interface, labels []l
4141
return allPods
4242
}
4343

44-
func findPodsByLabels(oc corev1client.CoreV1Interface, labels []labelObject, namespaces []string) (runningPods, allPods []corev1.Pod) {
44+
func FindPodsByLabels(oc corev1client.CoreV1Interface, labels []labelObject, namespaces []string) (runningPods, allPods []corev1.Pod) {
4545
runningPods = []corev1.Pod{}
4646
allPods = []corev1.Pod{}
4747
// Iterate through namespaces

pkg/autodiscover/autodiscover_pods_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ func TestFindPodsUnderTest(t *testing.T) {
9191
testRuntimeObjects = append(testRuntimeObjects, generatePod(tc.testPodName, tc.testPodNamespace, tc.queryLabel))
9292
oc := clientsholder.GetTestClientsHolder(testRuntimeObjects)
9393

94-
podResult, _ := findPodsByLabels(oc.K8sClient.CoreV1(), testLabel, testNamespaces)
94+
podResult, _ := FindPodsByLabels(oc.K8sClient.CoreV1(), testLabel, testNamespaces)
9595
assert.Equal(t, tc.expectedResults, podResult)
9696
}
9797
}

pkg/autodiscover/autodiscover_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,8 @@ func TestCreateLabels(t *testing.T) {
5050
}
5151
for _, tt := range tests {
5252
t.Run(tt.name, func(t *testing.T) {
53-
if gotLabelObjects := createLabels(tt.args.labelStrings); !reflect.DeepEqual(gotLabelObjects, tt.wantLabelObjects) {
54-
t.Errorf("createLabels() = %v, want %v", gotLabelObjects, tt.wantLabelObjects)
53+
if gotLabelObjects := CreateLabels(tt.args.labelStrings); !reflect.DeepEqual(gotLabelObjects, tt.wantLabelObjects) {
54+
t.Errorf("CreateLabels() = %v, want %v", gotLabelObjects, tt.wantLabelObjects)
5555
}
5656
})
5757
}

tests/identifiers/identifiers.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ var (
170170
TestNamespaceResourceQuotaIdentifier claim.Identifier
171171
TestPodDisruptionBudgetIdentifier claim.Identifier
172172
TestAPICompatibilityWithNextOCPReleaseIdentifier claim.Identifier
173+
TestPodCountIdentifier claim.Identifier
173174
TestPodTolerationBypassIdentifier claim.Identifier
174175
TestPersistentVolumeReclaimPolicyIdentifier claim.Identifier
175176
TestContainersImageTag claim.Identifier
@@ -1677,6 +1678,22 @@ that Node's kernel may not have the same hacks.'`,
16771678
},
16781679
TagCommon)
16791680

1681+
TestPodCountIdentifier = AddCatalogEntry(
1682+
"pod-count",
1683+
common.ObservabilityTestKey,
1684+
`Checks that all pods running at the beginning of the tests, continue to run throughout the test`,
1685+
"Ensure all expected pods are running",
1686+
NoExceptions,
1687+
"https://redhat-best-practices-for-k8s.github.io/guide/#observability-pod-count",
1688+
true,
1689+
map[string]string{
1690+
FarEdge: Optional,
1691+
Telco: Optional,
1692+
NonTelco: Optional,
1693+
Extended: Optional,
1694+
},
1695+
TagCommon)
1696+
16801697
TestPodTolerationBypassIdentifier = AddCatalogEntry(
16811698
"pod-toleration-bypass",
16821699
common.LifecycleTestKey,

tests/identifiers/impact.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ const (
136136
TestCrdsStatusSubresourceIdentifierImpact = `Missing status subresources prevent proper monitoring and automation based on custom resource states.`
137137
TestPodDisruptionBudgetIdentifierImpact = `Improper disruption budgets can prevent necessary maintenance operations or allow too many pods to be disrupted simultaneously.`
138138
TestAPICompatibilityWithNextOCPReleaseIdentifierImpact = `Deprecated API usage can cause applications to break during OpenShift upgrades, requiring emergency fixes.`
139+
TestPodCountIdentifierImpact = `Inconsistency of running pods can cause instability of the application.`
139140

140141
// Manageability Test Suite Impact Statements
141142
TestContainersImageTagImpact = `Missing image tags make it difficult to track versions, perform rollbacks, and maintain deployment consistency.`
@@ -277,6 +278,7 @@ var ImpactMap = map[string]string{
277278
"observability-crd-status": TestCrdsStatusSubresourceIdentifierImpact,
278279
"observability-pod-disruption-budget": TestPodDisruptionBudgetIdentifierImpact,
279280
"observability-compatibility-with-next-ocp-release": TestAPICompatibilityWithNextOCPReleaseIdentifierImpact,
281+
"observability-pod-count": TestPodCountIdentifierImpact,
280282

281283
// Manageability Test Suite
282284
"manageability-containers-image-tag": TestContainersImageTagImpact,

tests/observability/suite.go

Lines changed: 78 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,16 +24,16 @@ import (
2424
"strings"
2525

2626
"github.com/Masterminds/semver"
27-
"github.com/redhat-best-practices-for-k8s/certsuite/tests/common"
28-
"github.com/redhat-best-practices-for-k8s/certsuite/tests/identifiers"
29-
pdbv1 "github.com/redhat-best-practices-for-k8s/certsuite/tests/observability/pdb"
30-
3127
apiserv1 "github.com/openshift/api/apiserver/v1"
3228
"github.com/redhat-best-practices-for-k8s/certsuite/internal/clientsholder"
3329
"github.com/redhat-best-practices-for-k8s/certsuite/internal/log"
30+
"github.com/redhat-best-practices-for-k8s/certsuite/pkg/autodiscover"
3431
"github.com/redhat-best-practices-for-k8s/certsuite/pkg/checksdb"
3532
"github.com/redhat-best-practices-for-k8s/certsuite/pkg/provider"
3633
"github.com/redhat-best-practices-for-k8s/certsuite/pkg/testhelper"
34+
"github.com/redhat-best-practices-for-k8s/certsuite/tests/common"
35+
"github.com/redhat-best-practices-for-k8s/certsuite/tests/identifiers"
36+
pdbv1 "github.com/redhat-best-practices-for-k8s/certsuite/tests/observability/pdb"
3737
corev1 "k8s.io/api/core/v1"
3838
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3939
"k8s.io/apimachinery/pkg/labels"
@@ -88,6 +88,13 @@ func LoadChecks() {
8888
testAPICompatibilityWithNextOCPRelease(c, &env)
8989
return nil
9090
}))
91+
92+
checksGroup.Add(checksdb.NewCheck(identifiers.GetTestIDAndLabels(identifiers.TestPodCountIdentifier)).
93+
WithSkipCheckFn(testhelper.GetNoPodsUnderTestSkipFn(&env)).
94+
WithCheckFn(func(c *checksdb.Check) error {
95+
testComparePodCount(c, &env)
96+
return nil
97+
}))
9198
}
9299

93100
// containerHasLoggingOutput helper function to get the last line of logging output from
@@ -423,3 +430,70 @@ func testAPICompatibilityWithNextOCPRelease(check *checksdb.Check, env *provider
423430
// Add test results
424431
check.SetResult(compliantObjects, nonCompliantObjects)
425432
}
433+
434+
// Function to compare the number of running pods to those loaded during autodiscover at the start of test execution.
435+
func testComparePodCount(check *checksdb.Check, env *provider.TestEnvironment) {
436+
oc := clientsholder.GetClientsHolder()
437+
438+
originalPods := env.Pods
439+
440+
currentPods, _ := autodiscover.FindPodsByLabels(oc.K8sClient.CoreV1(), autodiscover.CreateLabels(env.Config.PodsUnderTestLabels), env.Namespaces)
441+
442+
var compliantObjects []*testhelper.ReportObject
443+
var nonCompliantObjects []*testhelper.ReportObject
444+
445+
// Compare pod counts
446+
originalPodCount := len(originalPods)
447+
currentPodCount := len(currentPods)
448+
449+
if originalPodCount == currentPodCount {
450+
check.LogInfo("Pod count is consistent")
451+
compliantObjects = append(compliantObjects,
452+
testhelper.NewReportObject("Pod count is consistent", "PodCount", true).AddField("OriginalCount", fmt.Sprintf("%d", originalPodCount)).AddField("CurrentCount", fmt.Sprintf("%d", currentPodCount)))
453+
} else {
454+
check.LogError("Pod count mismatch: original=%d, current=%d", originalPodCount, currentPodCount)
455+
nonCompliantObjects = append(nonCompliantObjects,
456+
testhelper.NewReportObject("Pod count mismatch", "PodCount", false).AddField("OriginalCount", fmt.Sprintf("%d", originalPodCount)).AddField("CurrentCount", fmt.Sprintf("%d", currentPodCount)))
457+
}
458+
459+
// Create maps for detailed comparison
460+
originalPodsMap := make(map[string]struct{})
461+
for _, pod := range originalPods {
462+
key := fmt.Sprintf("%s/%s", pod.Namespace, pod.Name)
463+
originalPodsMap[key] = struct{}{}
464+
}
465+
466+
currentPodsMap := make(map[string]struct{})
467+
for i := range currentPods {
468+
pod := currentPods[i]
469+
key := fmt.Sprintf("%s/%s", pod.Namespace, pod.Name)
470+
currentPodsMap[key] = struct{}{}
471+
}
472+
473+
// Check for missing pods (in original but not in current)
474+
for _, originalPod := range originalPods {
475+
podKey := fmt.Sprintf("%s/%s", originalPod.Namespace, originalPod.Name)
476+
if _, exists := currentPodsMap[podKey]; !exists {
477+
check.LogError("Pod %q is missing from current state", originalPod.String())
478+
nonCompliantObjects = append(nonCompliantObjects,
479+
testhelper.NewReportObject("Pod is missing from current state", testhelper.PodType, false).AddField(testhelper.PodName, originalPod.Name).AddField(testhelper.Namespace, originalPod.Namespace))
480+
} else {
481+
check.LogInfo("Pod %q is present in current state", originalPod.String())
482+
compliantObjects = append(compliantObjects,
483+
testhelper.NewReportObject("Pod is present in current state", testhelper.PodType, true).AddField(testhelper.PodName, originalPod.Name).AddField(testhelper.Namespace, originalPod.Namespace))
484+
}
485+
}
486+
487+
// Check for extra pods (in current but not in original)
488+
for i := range currentPods {
489+
currentPod := currentPods[i]
490+
podKey := fmt.Sprintf("%s/%s", currentPod.Namespace, currentPod.Name)
491+
if _, exists := originalPodsMap[podKey]; !exists {
492+
check.LogError("Extra pod %s/%s found in current state", currentPod.Namespace, currentPod.Name)
493+
nonCompliantObjects = append(nonCompliantObjects,
494+
testhelper.NewReportObject("Extra pod found in current state", testhelper.PodType, false).AddField(testhelper.PodName, currentPod.Name).AddField(testhelper.Namespace, currentPod.Namespace))
495+
}
496+
}
497+
498+
check.SetResult(compliantObjects, nonCompliantObjects)
499+
}

0 commit comments

Comments
 (0)