Skip to content

Commit 3805ead

Browse files
huikangrboyer
andauthored
Add metrics labels (#658)
* Add labels to metrics * upgrade memberlist to 0.4.0 Co-authored-by: R.B. Boyer <rb@hashicorp.com>
1 parent b3a2384 commit 3805ead

File tree

9 files changed

+49
-27
lines changed

9 files changed

+49
-27
lines changed

coordinate/client.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ func (c *Client) Update(node string, other *Coordinate, rtt time.Duration) (*Coo
218218
return nil, fmt.Errorf("round trip time not in valid range, duration %v is not a positive value less than %v ", rtt, maxRTT)
219219
}
220220
if rtt == 0 {
221-
metrics.IncrCounter([]string{"serf", "coordinate", "zero-rtt"}, 1)
221+
metrics.IncrCounterWithLabels([]string{"serf", "coordinate", "zero-rtt"}, 1, c.config.MetricLabels)
222222
}
223223

224224
rttSeconds := c.latencyFilter(node, rtt.Seconds())

coordinate/config.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
package coordinate
22

3+
import (
4+
"github.com/armon/go-metrics"
5+
)
6+
37
// Config is used to set the parameters of the Vivaldi-based coordinate mapping
48
// algorithm.
59
//
@@ -52,6 +56,9 @@ type Config struct {
5256
// GravityRho is a tuning factor that sets how much gravity has an effect
5357
// to try to re-center coordinates. See [2] for more details.
5458
GravityRho float64
59+
60+
// metricLabels is the slice of labels to put on all emitted metrics
61+
MetricLabels []metrics.Label
5562
}
5663

5764
// DefaultConfig returns a Config that has some default values suitable for

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ require (
1313
github.com/hashicorp/go-uuid v1.0.1 // indirect
1414
github.com/hashicorp/logutils v1.0.0
1515
github.com/hashicorp/mdns v1.0.4
16-
github.com/hashicorp/memberlist v0.3.0
16+
github.com/hashicorp/memberlist v0.4.0
1717
github.com/mattn/go-colorable v0.1.6 // indirect
1818
github.com/mitchellh/cli v1.1.0
1919
github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee

go.sum

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@ github.com/hashicorp/logutils v1.0.0 h1:dLEQVugN8vlakKOUE3ihGLTZJRB4j+M2cdTm/ORI
3737
github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64=
3838
github.com/hashicorp/mdns v1.0.4 h1:sY0CMhFmjIPDMlTB+HfymFHCaYLhgifZ0QhjaYKD/UQ=
3939
github.com/hashicorp/mdns v1.0.4/go.mod h1:mtBihi+LeNXGtG8L9dX59gAEa12BDtBQSp4v/YAJqrc=
40-
github.com/hashicorp/memberlist v0.3.0 h1:8+567mCcFDnS5ADl7lrpxPMWiFCElyUEeW0gtj34fMA=
41-
github.com/hashicorp/memberlist v0.3.0/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE=
40+
github.com/hashicorp/memberlist v0.4.0 h1:k3uda5gZcltmafuFF+UFqNEl5PrH+yPZ4zkjp1f/H/8=
41+
github.com/hashicorp/memberlist v0.4.0/go.mod h1:yvyXLpo0QaGE59Y7hDTsTzDD25JYBZ4mHgHUZ8lrOI0=
4242
github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU=
4343
github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=
4444
github.com/mattn/go-colorable v0.1.6 h1:6Su7aK7lXmJ/U79bYtBjLNaha4Fs1Rg9plHpcH+vvnE=
@@ -91,8 +91,9 @@ golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7w
9191
golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
9292
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
9393
golang.org/x/sys v0.0.0-20210303074136-134d130e1a04/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
94-
golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44 h1:Bli41pIlzTzf3KEY06n+xnzK/BESIg2ze4Pgfh/aI8c=
9594
golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
95+
golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10 h1:WIoqL4EROvwiPdUtaip4VcDdpZ4kha7wBWZrbVKCIZg=
96+
golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
9697
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
9798
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
9899
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=

serf/config.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"os"
77
"time"
88

9+
"github.com/armon/go-metrics"
910
"github.com/hashicorp/memberlist"
1011
)
1112

@@ -262,6 +263,9 @@ type Config struct {
262263
// contain alphanumeric, dashes and '.'characters
263264
// and sets maximum length to 128 characters
264265
ValidateNodeNames bool
266+
267+
// MetricLabels is a map of optional labels to apply to all metrics emitted.
268+
MetricLabels []metrics.Label
265269
}
266270

267271
// Init allocates the subdata structures

serf/delegate.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ func (d *delegate) NotifyMsg(buf []byte) {
3030
if len(buf) == 0 {
3131
return
3232
}
33-
metrics.AddSample([]string{"serf", "msgs", "received"}, float32(len(buf)))
33+
metrics.AddSampleWithLabels([]string{"serf", "msgs", "received"}, float32(len(buf)), d.serf.metricLabels)
3434

3535
rebroadcast := false
3636
rebroadcastQueue := d.serf.broadcasts
@@ -142,7 +142,7 @@ func (d *delegate) GetBroadcasts(overhead, limit int) [][]byte {
142142
for _, msg := range msgs {
143143
lm := len(msg)
144144
bytesUsed += lm + overhead
145-
metrics.AddSample([]string{"serf", "msgs", "sent"}, float32(lm))
145+
metrics.AddSampleWithLabels([]string{"serf", "msgs", "sent"}, float32(lm), d.serf.metricLabels)
146146
}
147147

148148
// Get any additional query broadcasts
@@ -151,7 +151,7 @@ func (d *delegate) GetBroadcasts(overhead, limit int) [][]byte {
151151
for _, m := range queryMsgs {
152152
lm := len(m)
153153
bytesUsed += lm + overhead
154-
metrics.AddSample([]string{"serf", "msgs", "sent"}, float32(lm))
154+
metrics.AddSampleWithLabels([]string{"serf", "msgs", "sent"}, float32(lm), d.serf.metricLabels)
155155
}
156156
msgs = append(msgs, queryMsgs...)
157157
}
@@ -162,7 +162,7 @@ func (d *delegate) GetBroadcasts(overhead, limit int) [][]byte {
162162
for _, m := range eventMsgs {
163163
lm := len(m)
164164
bytesUsed += lm + overhead
165-
metrics.AddSample([]string{"serf", "msgs", "sent"}, float32(lm))
165+
metrics.AddSampleWithLabels([]string{"serf", "msgs", "sent"}, float32(lm), d.serf.keyManager.serf.metricLabels)
166166
}
167167
msgs = append(msgs, eventMsgs...)
168168
}

serf/ping_delegate.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ func (p *pingDelegate) NotifyPingComplete(other *memberlist.Node, rtt time.Durat
6868
before := p.serf.coordClient.GetCoordinate()
6969
after, err := p.serf.coordClient.Update(other.Name, &coord, rtt)
7070
if err != nil {
71-
metrics.IncrCounter([]string{"serf", "coordinate", "rejected"}, 1)
71+
metrics.IncrCounterWithLabels([]string{"serf", "coordinate", "rejected"}, 1, p.serf.metricLabels)
7272
p.serf.logger.Printf("[TRACE] serf: Rejected coordinate from %s: %v\n",
7373
other.Name, err)
7474
return
@@ -77,7 +77,7 @@ func (p *pingDelegate) NotifyPingComplete(other *memberlist.Node, rtt time.Durat
7777
// Publish some metrics to give us an idea of how much we are
7878
// adjusting each time we update.
7979
d := float32(before.DistanceTo(after).Seconds() * 1.0e3)
80-
metrics.AddSample([]string{"serf", "coordinate", "adjustment-ms"}, d)
80+
metrics.AddSampleWithLabels([]string{"serf", "coordinate", "adjustment-ms"}, d, p.serf.metricLabels)
8181

8282
// Cache the coordinate for the other node, and add our own
8383
// to the cache as well since it just got updated. This lets

serf/serf.go

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,9 @@ type Serf struct {
105105
coordClient *coordinate.Client
106106
coordCache map[string]*coordinate.Coordinate
107107
coordCacheLock sync.RWMutex
108+
109+
// metricLabels is the slice of labels to put on all emitted metrics
110+
metricLabels []metrics.Label
108111
}
109112

110113
// SerfState is the state of the Serf instance.
@@ -270,6 +273,7 @@ func Create(conf *Config) (*Serf, error) {
270273
queryResponse: make(map[LamportTime]*QueryResponse),
271274
shutdownCh: make(chan struct{}),
272275
state: SerfAlive,
276+
metricLabels: conf.MetricLabels,
273277
}
274278
serf.eventJoinIgnore.Store(false)
275279

@@ -313,7 +317,9 @@ func Create(conf *Config) (*Serf, error) {
313317

314318
// Set up network coordinate client.
315319
if !conf.DisableCoordinates {
316-
serf.coordClient, err = coordinate.NewClient(coordinate.DefaultConfig())
320+
coordinateConfig := coordinate.DefaultConfig()
321+
coordinateConfig.MetricLabels = serf.metricLabels
322+
serf.coordClient, err = coordinate.NewClient(coordinateConfig)
317323
if err != nil {
318324
return nil, fmt.Errorf("Failed to create coordinate client: %v", err)
319325
}
@@ -334,6 +340,7 @@ func Create(conf *Config) (*Serf, error) {
334340
if err != nil {
335341
return nil, fmt.Errorf("Failed to setup snapshot: %v", err)
336342
}
343+
snap.metricLabels = serf.metricLabels
337344
serf.snapshotter = snap
338345
conf.EventCh = eventCh
339346
prev = snap.AliveNodes()
@@ -404,6 +411,8 @@ func Create(conf *Config) (*Serf, error) {
404411
conf.MemberlistConfig.Alive = md
405412
}
406413

414+
conf.MemberlistConfig.MetricLabels = conf.MetricLabels
415+
407416
// Create the underlying memberlist that will manage membership
408417
// and failure detection for the Serf instance.
409418
memberlist, err := memberlist.Create(conf.MemberlistConfig)
@@ -953,7 +962,7 @@ func (s *Serf) handleNodeJoin(n *memberlist.Node) {
953962
oldStatus = member.Status
954963
deadTime := time.Now().Sub(member.leaveTime)
955964
if oldStatus == StatusFailed && deadTime < s.config.FlapTimeout {
956-
metrics.IncrCounter([]string{"serf", "member", "flap"}, 1)
965+
metrics.IncrCounterWithLabels([]string{"serf", "member", "flap"}, 1, s.metricLabels)
957966
}
958967

959968
member.Status = StatusAlive
@@ -980,7 +989,7 @@ func (s *Serf) handleNodeJoin(n *memberlist.Node) {
980989
}
981990

982991
// Update some metrics
983-
metrics.IncrCounter([]string{"serf", "member", "join"}, 1)
992+
metrics.IncrCounterWithLabels([]string{"serf", "member", "join"}, 1, s.metricLabels)
984993

985994
// Send an event along
986995
s.logger.Printf("[INFO] serf: EventMemberJoin: %s %s",
@@ -1030,7 +1039,7 @@ func (s *Serf) handleNodeLeave(n *memberlist.Node) {
10301039
}
10311040

10321041
// Update some metrics
1033-
metrics.IncrCounter([]string{"serf", "member", member.Status.String()}, 1)
1042+
metrics.IncrCounterWithLabels([]string{"serf", "member", member.Status.String()}, 1, s.metricLabels)
10341043

10351044
s.logger.Printf("[INFO] serf: %s: %s %s",
10361045
eventStr, member.Member.Name, member.Member.Addr)
@@ -1074,7 +1083,7 @@ func (s *Serf) handleNodeUpdate(n *memberlist.Node) {
10741083
member.DelegateCur = n.DCur
10751084

10761085
// Update some metrics
1077-
metrics.IncrCounter([]string{"serf", "member", "update"}, 1)
1086+
metrics.IncrCounterWithLabels([]string{"serf", "member", "update"}, 1, s.metricLabels)
10781087

10791088
// Send an event along
10801089
s.logger.Printf("[INFO] serf: EventMemberUpdate: %s", member.Member.Name)
@@ -1272,8 +1281,8 @@ func (s *Serf) handleUserEvent(eventMsg *messageUserEvent) bool {
12721281
seen.Events = append(seen.Events, userEvent)
12731282

12741283
// Update some metrics
1275-
metrics.IncrCounter([]string{"serf", "events"}, 1)
1276-
metrics.IncrCounter([]string{"serf", "events", eventMsg.Name}, 1)
1284+
metrics.IncrCounterWithLabels([]string{"serf", "events"}, 1, s.metricLabels)
1285+
metrics.IncrCounterWithLabels([]string{"serf", "events", eventMsg.Name}, 1, s.metricLabels)
12771286

12781287
if s.config.EventCh != nil {
12791288
s.config.EventCh <- UserEvent{
@@ -1331,8 +1340,8 @@ func (s *Serf) handleQuery(query *messageQuery) bool {
13311340
seen.QueryIDs = append(seen.QueryIDs, query.ID)
13321341

13331342
// Update some metrics
1334-
metrics.IncrCounter([]string{"serf", "queries"}, 1)
1335-
metrics.IncrCounter([]string{"serf", "queries", query.Name}, 1)
1343+
metrics.IncrCounterWithLabels([]string{"serf", "queries"}, 1, s.metricLabels)
1344+
metrics.IncrCounterWithLabels([]string{"serf", "queries", query.Name}, 1, s.metricLabels)
13361345

13371346
// Check if we should rebroadcast, this may be disabled by a flag
13381347
rebroadcast := true
@@ -1419,23 +1428,23 @@ func (s *Serf) handleQueryResponse(resp *messageQueryResponse) {
14191428
if resp.Ack() {
14201429
// Exit early if this is a duplicate ack
14211430
if _, ok := query.acks[resp.From]; ok {
1422-
metrics.IncrCounter([]string{"serf", "query_duplicate_acks"}, 1)
1431+
metrics.IncrCounterWithLabels([]string{"serf", "query_duplicate_acks"}, 1, s.metricLabels)
14231432
return
14241433
}
14251434

1426-
metrics.IncrCounter([]string{"serf", "query_acks"}, 1)
1435+
metrics.IncrCounterWithLabels([]string{"serf", "query_acks"}, 1, s.metricLabels)
14271436
err := query.sendAck(resp)
14281437
if err != nil {
14291438
s.logger.Printf("[WARN] %v", err)
14301439
}
14311440
} else {
14321441
// Exit early if this is a duplicate response
14331442
if _, ok := query.responses[resp.From]; ok {
1434-
metrics.IncrCounter([]string{"serf", "query_duplicate_responses"}, 1)
1443+
metrics.IncrCounterWithLabels([]string{"serf", "query_duplicate_responses"}, 1, s.metricLabels)
14351444
return
14361445
}
14371446

1438-
metrics.IncrCounter([]string{"serf", "query_responses"}, 1)
1447+
metrics.IncrCounterWithLabels([]string{"serf", "query_responses"}, 1, s.metricLabels)
14391448
err := query.sendResponse(NodeResponse{From: resp.From, Payload: resp.Payload})
14401449
if err != nil {
14411450
s.logger.Printf("[WARN] %v", err)
@@ -1676,7 +1685,7 @@ func (s *Serf) checkQueueDepth(name string, queue *memberlist.TransmitLimitedQue
16761685
select {
16771686
case <-time.After(s.config.QueueCheckInterval):
16781687
numq := queue.NumQueued()
1679-
metrics.AddSample([]string{"serf", "queue", name}, float32(numq))
1688+
metrics.AddSampleWithLabels([]string{"serf", "queue", name}, float32(numq), s.metricLabels)
16801689
if numq >= s.config.QueueDepthWarning {
16811690
s.logger.Printf("[WARN] serf: %s queue depth: %d", name, numq)
16821691
}

serf/snapshot.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ type Snapshotter struct {
7878
shutdownCh <-chan struct{}
7979
waitCh chan struct{}
8080
lastAttemptedCompaction time.Time
81+
metricLabels []metrics.Label
8182
}
8283

8384
// PreviousNode is used to represent the previously known alive nodes
@@ -390,7 +391,7 @@ func (s *Snapshotter) tryAppend(l string) {
390391

391392
// appendLine is used to append a line to the existing log
392393
func (s *Snapshotter) appendLine(l string) error {
393-
defer metrics.MeasureSince([]string{"serf", "snapshot", "appendLine"}, time.Now())
394+
defer metrics.MeasureSinceWithLabels([]string{"serf", "snapshot", "appendLine"}, time.Now(), s.metricLabels)
394395

395396
n, err := s.buffered.WriteString(l)
396397
if err != nil {
@@ -429,7 +430,7 @@ func (s *Snapshotter) snapshotMaxSize() int64 {
429430

430431
// Compact is used to compact the snapshot once it is too large
431432
func (s *Snapshotter) compact() error {
432-
defer metrics.MeasureSince([]string{"serf", "snapshot", "compact"}, time.Now())
433+
defer metrics.MeasureSinceWithLabels([]string{"serf", "snapshot", "compact"}, time.Now(), s.metricLabels)
433434

434435
// Try to open the file to new fiel
435436
newPath := s.path + tmpExt

0 commit comments

Comments
 (0)