|  | 
| 1 | 1 | package servicecheck | 
| 2 | 2 | 
 | 
| 3 | 3 | import ( | 
|  | 4 | +	"crypto/tls" | 
|  | 5 | +	"log" | 
| 4 | 6 | 	"net/http" | 
|  | 7 | +	"net/http/httptrace" | 
|  | 8 | +	"time" | 
| 5 | 9 | 
 | 
| 6 | 10 | 	"github.com/prometheus/client_golang/prometheus" | 
| 7 | 11 | 	"github.com/prometheus/client_golang/prometheus/promhttp" | 
| 8 | 12 | ) | 
| 9 | 13 | 
 | 
| 10 |  | -func withRequestTracing(registry *prometheus.Registry, transport http.RoundTripper) http.RoundTripper { | 
| 11 |  | -	counter := prometheus.NewCounterVec( | 
|  | 14 | +// unique type for context.Context to avoid collisions. | 
|  | 15 | +type kubenurseTypeKey struct{} | 
|  | 16 | + | 
|  | 17 | +// // http.RoundTripper | 
|  | 18 | +type RoundTripperFunc func(req *http.Request) (*http.Response, error) | 
|  | 19 | + | 
|  | 20 | +func (rt RoundTripperFunc) RoundTrip(r *http.Request) (*http.Response, error) { | 
|  | 21 | +	return rt(r) | 
|  | 22 | +} | 
|  | 23 | + | 
|  | 24 | +// This collects traces and logs errors. As promhttp.InstrumentRoundTripperTrace doesn't process | 
|  | 25 | +// errors, this is custom made and inspired by prometheus/client_golang's promhttp | 
|  | 26 | +func withHttptrace(registry *prometheus.Registry, next http.RoundTripper, durationHistogram []float64) http.RoundTripper { | 
|  | 27 | +	httpclientReqTotal := prometheus.NewCounterVec( | 
| 12 | 28 | 		prometheus.CounterOpts{ | 
| 13 | 29 | 			Namespace: metricsNamespace, | 
| 14 | 30 | 			Name:      "httpclient_requests_total", | 
| 15 | 31 | 			Help:      "A counter for requests from the kubenurse http client.", | 
| 16 | 32 | 		}, | 
|  | 33 | +		// []string{"code", "method", "type"}, // TODO | 
| 17 | 34 | 		[]string{"code", "method"}, | 
| 18 | 35 | 	) | 
| 19 | 36 | 
 | 
| 20 |  | -	latencyVec := prometheus.NewHistogramVec( | 
|  | 37 | +	httpclientReqDuration := prometheus.NewHistogramVec( | 
| 21 | 38 | 		prometheus.HistogramOpts{ | 
| 22 | 39 | 			Namespace: metricsNamespace, | 
| 23 |  | -			Name:      "httpclient_trace_request_duration_seconds", | 
| 24 |  | -			Help:      "Latency histogram for requests from the kubenurse http client. Time in seconds since the start of the http request.", | 
| 25 |  | -			Buckets:   []float64{.0005, .005, .01, .025, .05, .1, .25, .5, 1}, | 
|  | 40 | +			Name:      "httpclient_request_duration_seconds", | 
|  | 41 | +			Help:      "A latency histogram of request latencies from the kubenurse http client.", | 
|  | 42 | +			Buckets:   durationHistogram, | 
| 26 | 43 | 		}, | 
| 27 |  | -		[]string{"event"}, | 
|  | 44 | +		// []string{"type"}, // TODO | 
|  | 45 | +		[]string{}, | 
| 28 | 46 | 	) | 
| 29 | 47 | 
 | 
| 30 |  | -	// histVec has no labels, making it a zero-dimensional ObserverVec. | 
| 31 |  | -	histVec := prometheus.NewHistogramVec( | 
|  | 48 | +	httpclientTraceReqDuration := prometheus.NewHistogramVec( | 
| 32 | 49 | 		prometheus.HistogramOpts{ | 
| 33 | 50 | 			Namespace: metricsNamespace, | 
| 34 |  | -			Name:      "httpclient_request_duration_seconds", | 
| 35 |  | -			Help:      "A latency histogram of request latencies from the kubenurse http client.", | 
| 36 |  | -			Buckets:   prometheus.DefBuckets, | 
|  | 51 | +			Name:      "httpclient_trace_request_duration_seconds", | 
|  | 52 | +			Help:      "Latency histogram for requests from the kubenurse http client. Time in seconds since the start of the http request.", | 
|  | 53 | +			Buckets:   durationHistogram, | 
| 37 | 54 | 		}, | 
| 38 |  | -		[]string{}, | 
|  | 55 | +		[]string{"event"}, | 
|  | 56 | +		// []string{"event", "type"}, // TODO | 
| 39 | 57 | 	) | 
| 40 | 58 | 
 | 
| 41 |  | -	// Register all of the metrics in the standard registry. | 
| 42 |  | -	registry.MustRegister(counter, latencyVec, histVec) | 
|  | 59 | +	registry.MustRegister(httpclientReqTotal, httpclientReqDuration, httpclientTraceReqDuration) | 
| 43 | 60 | 
 | 
| 44 |  | -	// Define functions for the available httptrace.ClientTrace hook | 
| 45 |  | -	// functions that we want to instrument. | 
| 46 |  | -	trace := &promhttp.InstrumentTrace{ | 
| 47 |  | -		DNSStart: func(t float64) { | 
| 48 |  | -			latencyVec.WithLabelValues("dns_start").Observe(t) | 
| 49 |  | -		}, | 
| 50 |  | -		DNSDone: func(t float64) { | 
| 51 |  | -			latencyVec.WithLabelValues("dns_done").Observe(t) | 
| 52 |  | -		}, | 
| 53 |  | -		ConnectStart: func(t float64) { | 
| 54 |  | -			latencyVec.WithLabelValues("connect_start").Observe(t) | 
| 55 |  | -		}, | 
| 56 |  | -		ConnectDone: func(t float64) { | 
| 57 |  | -			latencyVec.WithLabelValues("connect_done").Observe(t) | 
| 58 |  | -		}, | 
| 59 |  | -		TLSHandshakeStart: func(t float64) { | 
| 60 |  | -			latencyVec.WithLabelValues("tls_handshake_start").Observe(t) | 
| 61 |  | -		}, | 
| 62 |  | -		TLSHandshakeDone: func(t float64) { | 
| 63 |  | -			latencyVec.WithLabelValues("tls_handshake_done").Observe(t) | 
| 64 |  | -		}, | 
| 65 |  | -		WroteRequest: func(t float64) { | 
| 66 |  | -			latencyVec.WithLabelValues("wrote_request").Observe(t) | 
| 67 |  | -		}, | 
| 68 |  | -		GotFirstResponseByte: func(t float64) { | 
| 69 |  | -			latencyVec.WithLabelValues("got_first_resp_byte").Observe(t) | 
| 70 |  | -		}, | 
|  | 61 | +	collectMetric := func(traceEventType string, start time.Time, r *http.Request, err error) { | 
|  | 62 | +		td := time.Since(start).Seconds() | 
|  | 63 | +		kubenurseTypeLabel := r.Context().Value(kubenurseTypeKey{}).(string) | 
|  | 64 | + | 
|  | 65 | +		// If we got an error inside a trace, log it and do not collect metrics | 
|  | 66 | +		if err != nil { | 
|  | 67 | +			log.Printf("httptrace: failed %s for %s with %v", traceEventType, kubenurseTypeLabel, err) | 
|  | 68 | +			return | 
|  | 69 | +		} | 
|  | 70 | + | 
|  | 71 | +		httpclientTraceReqDuration.WithLabelValues(traceEventType).Observe(td) // TODO: add back kubenurseTypeKey | 
| 71 | 72 | 	} | 
| 72 | 73 | 
 | 
| 73 |  | -	// Wrap the default RoundTripper with middleware. | 
| 74 |  | -	roundTripper := promhttp.InstrumentRoundTripperCounter(counter, | 
| 75 |  | -		promhttp.InstrumentRoundTripperTrace(trace, | 
| 76 |  | -			promhttp.InstrumentRoundTripperDuration(histVec, | 
| 77 |  | -				transport, | 
| 78 |  | -			), | 
| 79 |  | -		), | 
| 80 |  | -	) | 
|  | 74 | +	// Return a http.RoundTripper for tracing requests | 
|  | 75 | +	return RoundTripperFunc(func(r *http.Request) (*http.Response, error) { | 
|  | 76 | +		// Capture request time | 
|  | 77 | +		start := time.Now() | 
|  | 78 | + | 
|  | 79 | +		// Add tracing hooks | 
|  | 80 | +		trace := &httptrace.ClientTrace{ | 
|  | 81 | +			GotConn: func(info httptrace.GotConnInfo) { | 
|  | 82 | +				collectMetric("got_conn", start, r, nil) | 
|  | 83 | +			}, | 
|  | 84 | +			DNSStart: func(info httptrace.DNSStartInfo) { | 
|  | 85 | +				collectMetric("dns_start", start, r, nil) | 
|  | 86 | +			}, | 
|  | 87 | +			DNSDone: func(info httptrace.DNSDoneInfo) { | 
|  | 88 | +				collectMetric("dns_done", start, r, info.Err) | 
|  | 89 | +			}, | 
|  | 90 | +			ConnectStart: func(_, _ string) { | 
|  | 91 | +				collectMetric("connect_start", start, r, nil) | 
|  | 92 | +			}, | 
|  | 93 | +			ConnectDone: func(_, _ string, err error) { | 
|  | 94 | +				collectMetric("connect_done", start, r, err) | 
|  | 95 | +			}, | 
|  | 96 | +			TLSHandshakeStart: func() { | 
|  | 97 | +				collectMetric("tls_handshake_start", start, r, nil) | 
|  | 98 | +			}, | 
|  | 99 | +			TLSHandshakeDone: func(_ tls.ConnectionState, err error) { | 
|  | 100 | +				collectMetric("tls_handshake_done", start, r, nil) | 
|  | 101 | +			}, | 
|  | 102 | +			WroteRequest: func(info httptrace.WroteRequestInfo) { | 
|  | 103 | +				collectMetric("wrote_request", start, r, info.Err) | 
|  | 104 | +			}, | 
|  | 105 | +			GotFirstResponseByte: func() { | 
|  | 106 | +				collectMetric("got_first_resp_byte", start, r, nil) | 
|  | 107 | +			}, | 
|  | 108 | +		} | 
|  | 109 | + | 
|  | 110 | +		// Do request with tracing enabled | 
|  | 111 | +		r = r.WithContext(httptrace.WithClientTrace(r.Context(), trace)) | 
|  | 112 | + | 
|  | 113 | +		// // TODO: uncomment when issue #55 is solved (N^2 request will increase cardinality of path_ metrics too much otherwise) | 
|  | 114 | +		// typeFromCtxFn := promhttp.WithLabelFromCtx("type", func(ctx context.Context) string { | 
|  | 115 | +		// 	return ctx.Value(kubenurseTypeKey{}).(string) | 
|  | 116 | +		// }) | 
| 81 | 117 | 
 | 
| 82 |  | -	return roundTripper | 
|  | 118 | +		rt := next // variable pinning :) essential, to prevent always re-instrumenting the original variable | 
|  | 119 | +		rt = promhttp.InstrumentRoundTripperCounter(httpclientReqTotal, rt) | 
|  | 120 | +		rt = promhttp.InstrumentRoundTripperDuration(httpclientReqDuration, rt) | 
|  | 121 | +		return rt.RoundTrip(r) | 
|  | 122 | +	}) | 
| 83 | 123 | } | 
0 commit comments