Skip to content

Commit 123d17a

Browse files
authored
feat: node metrics collector (#1516)
* feat: node metrics collector A collector to collect node metrics served by the API server as per the documented API https://kubernetes.io/docs/reference/instrumentation/node-metrics/ * Update CRD schemas * Add tests * Remove clean from build target * Update comments * Commit missing tests * Remove unnecessary log in tests
1 parent 867c706 commit 123d17a

14 files changed

+365
-8
lines changed

Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@ support-bundle-e2e-go-test:
8080
go test ${BUILDFLAGS} ${E2EPATHS} -v; \
8181
fi
8282

83+
rebuild: clean build
84+
8385
# Build all binaries in parallel ( -j )
8486
build: tidy
8587
@echo "Build cli binaries"

config/crds/troubleshoot.sh_collectors.yaml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -551,6 +551,21 @@ spec:
551551
required:
552552
- uri
553553
type: object
554+
nodeMetrics:
555+
properties:
556+
collectorName:
557+
type: string
558+
exclude:
559+
type: BoolString
560+
nodeNames:
561+
items:
562+
type: string
563+
type: array
564+
selector:
565+
items:
566+
type: string
567+
type: array
568+
type: object
554569
postgres:
555570
properties:
556571
collectorName:

config/crds/troubleshoot.sh_preflights.yaml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2171,6 +2171,21 @@ spec:
21712171
required:
21722172
- uri
21732173
type: object
2174+
nodeMetrics:
2175+
properties:
2176+
collectorName:
2177+
type: string
2178+
exclude:
2179+
type: BoolString
2180+
nodeNames:
2181+
items:
2182+
type: string
2183+
type: array
2184+
selector:
2185+
items:
2186+
type: string
2187+
type: array
2188+
type: object
21742189
postgres:
21752190
properties:
21762191
collectorName:

config/crds/troubleshoot.sh_supportbundles.yaml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2202,6 +2202,21 @@ spec:
22022202
required:
22032203
- uri
22042204
type: object
2205+
nodeMetrics:
2206+
properties:
2207+
collectorName:
2208+
type: string
2209+
exclude:
2210+
type: BoolString
2211+
nodeNames:
2212+
items:
2213+
type: string
2214+
type: array
2215+
selector:
2216+
items:
2217+
type: string
2218+
type: array
2219+
type: object
22052220
postgres:
22062221
properties:
22072222
collectorName:

pkg/apis/troubleshoot/v1beta2/collector_shared.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,12 @@ type CustomMetrics struct {
4444
MetricRequests []MetricRequest `json:"metricRequests,omitempty" yaml:"metricRequests,omitempty"`
4545
}
4646

47+
type NodeMetrics struct {
48+
CollectorMeta `json:",inline" yaml:",inline"`
49+
NodeNames []string `json:"nodeNames,omitempty" yaml:"nodeNames,omitempty"`
50+
Selector []string `json:"selector,omitempty" yaml:"selector,omitempty"`
51+
}
52+
4753
type Secret struct {
4854
CollectorMeta `json:",inline" yaml:",inline"`
4955
Name string `json:"name,omitempty" yaml:"name,omitempty"`
@@ -315,6 +321,7 @@ type Collect struct {
315321
Helm *Helm `json:"helm,omitempty" yaml:"helm,omitempty"`
316322
Goldpinger *Goldpinger `json:"goldpinger,omitempty" yaml:"goldpinger,omitempty"`
317323
Sonobuoy *Sonobuoy `json:"sonobuoy,omitempty" yaml:"sonobuoy,omitempty"`
324+
NodeMetrics *NodeMetrics `json:"nodeMetrics,omitempty" yaml:"nodeMetrics,omitempty"`
318325
}
319326

320327
func (c *Collect) AccessReviewSpecs(overrideNS string) []authorizationv1.SelfSubjectAccessReviewSpec {

pkg/apis/troubleshoot/v1beta2/zz_generated.deepcopy.go

Lines changed: 31 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/collect/collector.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,8 @@ func GetCollector(collector *troubleshootv1beta2.Collect, bundlePath string, nam
122122
return &CollectGoldpinger{collector.Goldpinger, bundlePath, namespace, clientConfig, client, ctx, RBACErrors}, true
123123
case collector.Sonobuoy != nil:
124124
return &CollectSonobuoyResults{collector.Sonobuoy, bundlePath, namespace, clientConfig, client, ctx, RBACErrors}, true
125+
case collector.NodeMetrics != nil:
126+
return &CollectNodeMetrics{collector.NodeMetrics, bundlePath, clientConfig, client, ctx, RBACErrors}, true
125127
default:
126128
return nil, false
127129
}
@@ -211,6 +213,8 @@ func getCollectorName(c interface{}) string {
211213
collector = "goldpinger"
212214
case *CollectSonobuoyResults:
213215
collector = "sonobuoy"
216+
case *CollectNodeMetrics:
217+
collector = "node-metrics"
214218
default:
215219
collector = "<none>"
216220
}

pkg/collect/k8s_node_metrics.go

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
package collect
2+
3+
import (
4+
"bytes"
5+
"context"
6+
"fmt"
7+
"strings"
8+
9+
"github.com/pkg/errors"
10+
troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2"
11+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
12+
"k8s.io/client-go/kubernetes"
13+
"k8s.io/client-go/rest"
14+
"k8s.io/klog/v2"
15+
)
16+
17+
const (
18+
summaryUrlTemplate = "/api/v1/nodes/%s/proxy/stats/summary"
19+
)
20+
21+
type CollectNodeMetrics struct {
22+
Collector *troubleshootv1beta2.NodeMetrics
23+
BundlePath string
24+
ClientConfig *rest.Config
25+
Client kubernetes.Interface
26+
Context context.Context
27+
RBACErrors
28+
}
29+
30+
func (c *CollectNodeMetrics) Title() string {
31+
return getCollectorName(c)
32+
}
33+
34+
func (c *CollectNodeMetrics) IsExcluded() (bool, error) {
35+
return isExcluded(c.Collector.Exclude)
36+
}
37+
38+
func (c *CollectNodeMetrics) Collect(progressChan chan<- interface{}) (CollectorResult, error) {
39+
output := NewResult()
40+
nodesMap := c.constructNodesMap()
41+
if len(nodesMap) == 0 {
42+
klog.V(2).Info("no nodes found to collect metrics for")
43+
return output, nil
44+
}
45+
46+
nodeNames := make([]string, 0, len(nodesMap))
47+
for nodeName := range nodesMap {
48+
nodeNames = append(nodeNames, nodeName)
49+
}
50+
51+
klog.V(2).Infof("collecting node metrics for [%s] nodes", strings.Join(nodeNames, ", "))
52+
53+
for nodeName, endpoint := range nodesMap {
54+
// Equivalent to `kubectl get --raw "/api/v1/nodes/<nodeName>/proxy/stats/summary"`
55+
klog.V(2).Infof("querying: %+v\n", endpoint)
56+
response, err := c.Client.CoreV1().RESTClient().Get().AbsPath(endpoint).DoRaw(c.Context)
57+
if err != nil {
58+
return output, errors.Wrapf(err, "could not query endpoint %s", endpoint)
59+
}
60+
err = output.SaveResult(c.BundlePath, fmt.Sprintf("node-metrics/%s.json", nodeName), bytes.NewBuffer(response))
61+
if err != nil {
62+
klog.Errorf("failed to save node metrics for %s: %v", nodeName, err)
63+
}
64+
65+
}
66+
return output, nil
67+
}
68+
69+
func (c *CollectNodeMetrics) constructNodesMap() map[string]string {
70+
nodesMap := map[string]string{}
71+
72+
if c.Collector.NodeNames == nil && c.Collector.Selector == nil {
73+
// If no node names or selectors are provided, collect all nodes
74+
nodes, err := c.Client.CoreV1().Nodes().List(c.Context, metav1.ListOptions{})
75+
if err != nil {
76+
klog.Errorf("failed to list nodes: %v", err)
77+
}
78+
for _, node := range nodes.Items {
79+
nodesMap[node.Name] = fmt.Sprintf(summaryUrlTemplate, node.Name)
80+
}
81+
return nodesMap
82+
}
83+
84+
for _, nodeName := range c.Collector.NodeNames {
85+
nodesMap[nodeName] = fmt.Sprintf(summaryUrlTemplate, nodeName)
86+
}
87+
88+
// Find nodes by label selector
89+
if c.Collector.Selector != nil {
90+
nodes, err := c.Client.CoreV1().Nodes().List(c.Context, metav1.ListOptions{
91+
LabelSelector: strings.Join(c.Collector.Selector, ","),
92+
})
93+
if err != nil {
94+
klog.Errorf("failed to list nodes by label selector: %v", err)
95+
}
96+
for _, node := range nodes.Items {
97+
nodesMap[node.Name] = fmt.Sprintf(summaryUrlTemplate, node.Name)
98+
}
99+
}
100+
101+
return nodesMap
102+
}

pkg/collect/k8s_node_metrics_test.go

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
package collect
2+
3+
import (
4+
"context"
5+
"testing"
6+
7+
troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2"
8+
"github.com/stretchr/testify/assert"
9+
"github.com/stretchr/testify/require"
10+
v1 "k8s.io/api/core/v1"
11+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
12+
testclient "k8s.io/client-go/kubernetes/fake"
13+
)
14+
15+
func TestCollectNodeMetrics_constructNodesMap(t *testing.T) {
16+
tests := []struct {
17+
name string
18+
objectMetas []metav1.ObjectMeta
19+
collector troubleshootv1beta2.NodeMetrics
20+
want map[string]string
21+
}{
22+
{
23+
name: "default collector no nodes",
24+
want: map[string]string{},
25+
},
26+
{
27+
name: "default collector one node",
28+
objectMetas: []metav1.ObjectMeta{
29+
{
30+
Name: "node1",
31+
},
32+
},
33+
want: map[string]string{
34+
"node1": "/api/v1/nodes/node1/proxy/stats/summary",
35+
},
36+
},
37+
{
38+
name: "collector with node list picking one node",
39+
objectMetas: []metav1.ObjectMeta{
40+
{
41+
Name: "node1",
42+
},
43+
{
44+
Name: "node2",
45+
},
46+
},
47+
collector: troubleshootv1beta2.NodeMetrics{
48+
NodeNames: []string{"node2"},
49+
},
50+
want: map[string]string{
51+
"node2": "/api/v1/nodes/node2/proxy/stats/summary",
52+
},
53+
},
54+
{
55+
name: "collector with selector picking one node",
56+
objectMetas: []metav1.ObjectMeta{
57+
{
58+
Name: "node1",
59+
Labels: map[string]string{
60+
"hostname": "node1.example.com",
61+
},
62+
},
63+
{
64+
Name: "node2",
65+
},
66+
},
67+
collector: troubleshootv1beta2.NodeMetrics{
68+
Selector: []string{"hostname=node1.example.com"},
69+
},
70+
want: map[string]string{
71+
"node1": "/api/v1/nodes/node1/proxy/stats/summary",
72+
},
73+
},
74+
}
75+
for _, tt := range tests {
76+
t.Run(tt.name, func(t *testing.T) {
77+
client := testclient.NewSimpleClientset()
78+
ctx := context.Background()
79+
collector := tt.collector
80+
c := &CollectNodeMetrics{
81+
Collector: &collector,
82+
Client: client,
83+
Context: ctx,
84+
}
85+
86+
for _, objectMeta := range tt.objectMetas {
87+
_, err := client.CoreV1().Nodes().Create(ctx, &v1.Node{
88+
ObjectMeta: objectMeta,
89+
}, metav1.CreateOptions{})
90+
require.NoError(t, err)
91+
}
92+
93+
got := c.constructNodesMap()
94+
assert.Equalf(t, tt.want, got, "constructNodesMap() = %v, want %v", got, tt.want)
95+
})
96+
}
97+
}

pkg/collect/redact.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,9 @@ func RedactResult(bundlePath string, input CollectorResult, additionalRedactors
7171
errorCh <- errors.Wrap(err, "failed to get relative path")
7272
return
7373
}
74-
klog.V(2).Infof("Redacting %s (symlink => %s)\n", file, symlink)
74+
klog.V(4).Infof("Redacting %s (symlink => %s)\n", file, symlink)
7575
} else {
76-
klog.V(2).Infof("Redacting %s\n", file)
76+
klog.V(4).Infof("Redacting %s\n", file)
7777
}
7878
r, err := input.GetReader(bundlePath, file)
7979
if err != nil {

0 commit comments

Comments
 (0)