Skip to content

Commit 0c24ce6

Browse files
authored
Merge pull request #760 from csfldf/dev/net_metric
feat(metric): add NIC speed, tx_bps, rx_bps metrics
2 parents 356adb7 + 1afa6a7 commit 0c24ce6

File tree

7 files changed

+97
-17
lines changed

7 files changed

+97
-17
lines changed

pkg/consts/metric.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,13 @@ const (
9999
MetricNetTcpRetranSegs = "net.tcp.retrans_segs"
100100
MetricNetTcpRecvPackets = "net.tcp.out_segs"
101101
MetricNetTcpCloseWait = "net.tcp.close_wait"
102+
MetricNetUpdateTime = "net.updatetime"
102103
)
103104

104105
// System network metrics
105106
const (
106107
MetricNetReceiveBytes = "net.tcp.receive_bytes"
108+
MetricNetReceiveBPS = "net.receive.bps"
107109
MetricNetReceivePackets = "net.tcp.receive_packets"
108110
MetricNetReceiveErrs = "net.tcp.receive_errs"
109111
MetricNetReceiveDrops = "net.tcp.receive_drop"
@@ -112,13 +114,15 @@ const (
112114
MetricNetReceiveCompressed = "net.tcp.receive_compressed"
113115
MetricNetTransmitMulticast = "net.tcp.receive_multicast"
114116
MetricNetTransmitBytes = "net.tcp.transmit_bytes"
117+
MetricNetTransmitBPS = "net.transmit.bps"
115118
MetricNetTransmitPackets = "net.tcp.transmit_packets"
116119
MetricNetTransmitErrs = "net.tcp.transmit_errs"
117120
MetricNetTransmitDrops = "net.tcp.transmit_drop"
118121
MetricNetTransmitFIFO = "net.tcp.transmit_fifo"
119122
MetricNetTransmitColls = "net.tcp.transmit_colls"
120123
MetricNetTransmitCarrier = "net.tcp.transmit_carrier"
121124
MetricNetTransmitCompressed = "net.tcp.transmit_compressed"
125+
MetricNetSpeed = "net.speed"
122126
)
123127

124128
// Node filesystem metrics

pkg/metaserver/agent/metric/fake_metric.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,10 @@ func (f *FakeMetricsFetcher) GetNumaMetric(numaID int, metricName string) (metri
102102
return f.checkMetricDataExpire(f.metricStore.GetNumaMetric(numaID, metricName))
103103
}
104104

105+
func (f *FakeMetricsFetcher) GetNetworkMetric(networkName string, metricName string) (metric.MetricData, error) {
106+
return f.checkMetricDataExpire(f.metricStore.GetDeviceMetric(networkName, metricName))
107+
}
108+
105109
func (f *FakeMetricsFetcher) GetDeviceMetric(deviceName string, metricName string) (metric.MetricData, error) {
106110
return f.checkMetricDataExpire(f.metricStore.GetDeviceMetric(deviceName, metricName))
107111
}

pkg/metaserver/agent/metric/metric_impl.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,10 @@ func (f *MetricsFetcherImpl) GetDeviceMetric(deviceName string, metricName strin
326326
return f.checkMetricDataExpire(f.metricStore.GetDeviceMetric(deviceName, metricName))
327327
}
328328

329+
func (f *MetricsFetcherImpl) GetNetworkMetric(networkName string, metricName string) (utilmetric.MetricData, error) {
330+
return f.checkMetricDataExpire(f.metricStore.GetNetworkMetric(networkName, metricName))
331+
}
332+
329333
func (f *MetricsFetcherImpl) GetCPUMetric(coreID int, metricName string) (utilmetric.MetricData, error) {
330334
return f.checkMetricDataExpire(f.metricStore.GetCPUMetric(coreID, metricName))
331335
}

pkg/metaserver/agent/metric/metric_test.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ func Test_notifySystem(t *testing.T) {
8282
now := time.Now()
8383
m.metricStore.SetNodeMetric("test-node-metric", metric.MetricData{Value: 34, Time: &now})
8484
m.metricStore.SetNumaMetric(1, "test-numa-metric", metric.MetricData{Value: 56, Time: &now})
85+
m.metricStore.SetNetworkMetric("eth0", "test-net-metric", metric.MetricData{Value: 56, Time: &now})
8586
m.metricStore.SetCPUMetric(2, "test-cpu-metric", metric.MetricData{Value: 78, Time: &now})
8687
m.metricStore.SetDeviceMetric("test-device", "test-device-metric", metric.MetricData{Value: 91, Time: &now})
8788
m.metricStore.SetContainerMetric("test-pod", "test-container", "test-container-metric", metric.MetricData{Value: 91, Time: &now})
@@ -152,6 +153,12 @@ func Test_notifySystem(t *testing.T) {
152153
}
153154
}
154155
assert.Equal(t, 8, totalNotification)
156+
_, err := f.GetNetworkMetric("eth0", "test-net-metric")
157+
assert.Nil(t, err)
158+
159+
ff := NewFakeMetricsFetcher(metrics.DummyMetrics{})
160+
_, err = ff.GetNetworkMetric("eth0", "test-net-metric")
161+
assert.NotNil(t, err)
155162
}
156163

157164
func TestStore_Aggregate(t *testing.T) {

pkg/metaserver/agent/metric/provisioner/malachite/provisioner.go

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package malachite
1818

1919
import (
2020
"context"
21+
"fmt"
2122
"math"
2223
"strconv"
2324
"strings"
@@ -38,6 +39,7 @@ import (
3839
"github.com/kubewharf/katalyst-core/pkg/util/cgroup/common"
3940
"github.com/kubewharf/katalyst-core/pkg/util/general"
4041
"github.com/kubewharf/katalyst-core/pkg/util/machine"
42+
"github.com/kubewharf/katalyst-core/pkg/util/metric"
4143
utilmetric "github.com/kubewharf/katalyst-core/pkg/util/metric"
4244
)
4345

@@ -412,6 +414,7 @@ func (m *MalachiteMetricsProvisioner) processSystemNetData(systemNetData *malach
412414
if systemNetData == nil {
413415
return
414416
}
417+
415418
// todo, currently we only get a unified data for the whole system io data
416419
updateTime := time.Unix(systemNetData.UpdateTime, 0)
417420

@@ -433,6 +436,8 @@ func (m *MalachiteMetricsProvisioner) processSystemNetData(systemNetData *malach
433436
utilmetric.MetricData{Value: float64(systemNetData.TCP.TCPOutSegs), Time: &updateTime})
434437
m.metricStore.SetNodeMetric(consts.MetricNetTcpCloseWait,
435438
utilmetric.MetricData{Value: float64(systemNetData.TCP.TCPCloseWait), Time: &updateTime})
439+
m.metricStore.SetNodeMetric(consts.MetricNetUpdateTime,
440+
utilmetric.MetricData{Value: float64(systemNetData.UpdateTime), Time: &updateTime})
436441

437442
for _, device := range systemNetData.NetworkCard {
438443
// for now, we will only consider standard network interface
@@ -441,6 +446,14 @@ func (m *MalachiteMetricsProvisioner) processSystemNetData(systemNetData *malach
441446
continue
442447
}
443448

449+
errs := []error{}
450+
// setNetworkRateMetric will use metricStore.GetNetworkMetric to get previous round metric,
451+
// we should call setNetworkRateMetric before calling SetNetworkMetric
452+
errs = append(errs, m.setNetworkRateMetric(device.Name, consts.MetricNetTransmitBPS,
453+
consts.MetricNetTransmitBytes, float64(device.TransmitBytes), &updateTime))
454+
errs = append(errs, m.setNetworkRateMetric(device.Name, consts.MetricNetReceiveBPS,
455+
consts.MetricNetReceiveBytes, float64(device.ReceiveBytes), &updateTime))
456+
444457
m.metricStore.SetNetworkMetric(device.Name, consts.MetricNetReceiveBytes,
445458
utilmetric.MetricData{Value: float64(device.ReceiveBytes), Time: &updateTime})
446459
m.metricStore.SetNetworkMetric(device.Name, consts.MetricNetReceivePackets,
@@ -474,7 +487,52 @@ func (m *MalachiteMetricsProvisioner) processSystemNetData(systemNetData *malach
474487
m.metricStore.SetNetworkMetric(device.Name, consts.MetricNetTransmitCompressed,
475488
utilmetric.MetricData{Value: float64(device.TransmitCompressed), Time: &updateTime})
476489

490+
if device.Speeds != nil {
491+
m.metricStore.SetNetworkMetric(device.Name, consts.MetricNetSpeed,
492+
utilmetric.MetricData{Value: float64(*device.Speeds), Time: &updateTime})
493+
}
494+
495+
aggErrs := errors.NewAggregate(errs)
496+
497+
if aggErrs != nil {
498+
general.Warningf("set network metrics for: %s got errors: %s", device.Name, aggErrs.Error())
499+
}
500+
}
501+
}
502+
503+
func (m *MalachiteMetricsProvisioner) setNetworkRateMetric(deviceName,
504+
rateMetricName, valueMetricName string,
505+
curValue float64,
506+
curUpdateTime *time.Time,
507+
) error {
508+
lastMetric, err := m.metricStore.GetNetworkMetric(deviceName, valueMetricName)
509+
if err != nil {
510+
return fmt.Errorf("get value metric: %s for %s failed with err: %v",
511+
valueMetricName, rateMetricName, err)
512+
}
513+
514+
lastValue := lastMetric.Value
515+
lastUpdateTime := lastMetric.Time
516+
517+
if curUpdateTime == nil || lastUpdateTime == nil || curUpdateTime.Unix() == 0 || lastUpdateTime.Unix() == 0 {
518+
return fmt.Errorf("nil curUpdateTime or lastUpdateTime for rateMetricName: %s", rateMetricName)
519+
}
520+
521+
timeDeltaInSec := curUpdateTime.Sub(*lastUpdateTime).Seconds()
522+
523+
if timeDeltaInSec <= 0 {
524+
return fmt.Errorf("invalid timeDelta: %.2f", timeDeltaInSec)
477525
}
526+
527+
if (curValue > lastValue) && (curValue != 0) && (lastValue != 0) {
528+
m.metricStore.SetNetworkMetric(deviceName, rateMetricName,
529+
metric.MetricData{Value: (curValue - lastValue) / timeDeltaInSec, Time: curUpdateTime})
530+
} else {
531+
return fmt.Errorf("invalid curValue: %.2f, lastValue: %.2f for rateMetricName: %s",
532+
curValue, lastValue, rateMetricName)
533+
}
534+
535+
return nil
478536
}
479537

480538
func (m *MalachiteMetricsProvisioner) processSystemNumaData(systemMemoryData *malachitetypes.SystemMemoryData, systemComputeData *malachitetypes.SystemComputeData) {

pkg/metaserver/agent/metric/provisioner/malachite/types/system.go

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -49,23 +49,24 @@ type SystemNetworkData struct {
4949
}
5050

5151
type NetworkCard struct {
52-
Name string `json:"name"`
53-
ReceiveBytes uint64 `json:"receive_bytes"`
54-
ReceivePackets uint64 `json:"receive_packets"`
55-
ReceiveErrs uint64 `json:"receive_errs"`
56-
ReceiveDrop uint64 `json:"receive_drop"`
57-
ReceiveFifo uint64 `json:"receive_fifo"`
58-
ReceiveFrame uint64 `json:"receive_frame"`
59-
ReceiveCompressed uint64 `json:"receive_compressed"`
60-
ReceiveMulticast uint64 `json:"receive_multicast"`
61-
TransmitBytes uint64 `json:"transmit_bytes"`
62-
TransmitPackets uint64 `json:"transmit_packets"`
63-
TransmitErrs uint64 `json:"transmit_errs"`
64-
TransmitDrop uint64 `json:"transmit_drop"`
65-
TransmitFifo uint64 `json:"transmit_fifo"`
66-
TransmitColls uint64 `json:"transmit_colls"`
67-
TransmitCarrier uint64 `json:"transmit_carrier"`
68-
TransmitCompressed uint64 `json:"transmit_compressed"`
52+
Name string `json:"name"`
53+
ReceiveBytes uint64 `json:"receive_bytes"`
54+
ReceivePackets uint64 `json:"receive_packets"`
55+
ReceiveErrs uint64 `json:"receive_errs"`
56+
ReceiveDrop uint64 `json:"receive_drop"`
57+
ReceiveFifo uint64 `json:"receive_fifo"`
58+
ReceiveFrame uint64 `json:"receive_frame"`
59+
ReceiveCompressed uint64 `json:"receive_compressed"`
60+
ReceiveMulticast uint64 `json:"receive_multicast"`
61+
TransmitBytes uint64 `json:"transmit_bytes"`
62+
TransmitPackets uint64 `json:"transmit_packets"`
63+
TransmitErrs uint64 `json:"transmit_errs"`
64+
TransmitDrop uint64 `json:"transmit_drop"`
65+
TransmitFifo uint64 `json:"transmit_fifo"`
66+
TransmitColls uint64 `json:"transmit_colls"`
67+
TransmitCarrier uint64 `json:"transmit_carrier"`
68+
TransmitCompressed uint64 `json:"transmit_compressed"`
69+
Speeds *uint64 `json:"speeds"`
6970
}
7071

7172
type TCP struct {

pkg/metaserver/agent/metric/types/metric.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ type MetricsReader interface {
7272
GetNumaMetric(numaID int, metricName string) (metric.MetricData, error)
7373
// GetDeviceMetric get metric of device.
7474
GetDeviceMetric(deviceName string, metricName string) (metric.MetricData, error)
75+
// GetDeviceMetric get metric of network.
76+
GetNetworkMetric(networkName string, metricName string) (metric.MetricData, error)
7577
// GetCPUMetric get metric of cpu.
7678
GetCPUMetric(coreID int, metricName string) (metric.MetricData, error)
7779
// GetContainerMetric get metric of container.

0 commit comments

Comments
 (0)