@@ -75,8 +75,49 @@ func NewHandler(
75
75
76
76
// V1 service routes
77
77
v1router := r .PathPrefix ("/v1" ).Subrouter ()
78
+ // registerStandardEndpoint registers an HTTP endpoint with all of the expected middleware
79
+ // for authentication, latency, instrumentationm, etc.
80
+ registerStandardEndpoint := func (name , route string , attributes attribute.Set , handler http.Handler ) {
81
+ // Create an HTTP handler that will update the "concurrent_upstream_requests" metric.
82
+ gaugedHandler := gaugeHandler (
83
+ counter ,
84
+ attributes ,
85
+ authr .Middleware (
86
+ requestlogger .Middleware (
87
+ logger ,
88
+ handler ,
89
+ ),
90
+ ))
91
+ // Wrap that in our instrumentation middleware, adding more logging.
92
+ instrumentedHandler := instrumentation .HTTPMiddleware (
93
+ name ,
94
+ gaugedHandler ,
95
+ otelhttp .WithPublicEndpoint ())
96
+ // Finally wrap that again in our overall middleware.
97
+ overheadMiddleware := overhead .HTTPMiddleware (latencyHistogram , instrumentedHandler )
98
+
99
+ v1router .Path (route ).Methods (http .MethodPost ).Handler (overheadMiddleware )
100
+ }
101
+
102
+ // registerSimpleGETEndpoint registers a basic HTTP GET endpoint, without the
103
+ // latency and performance counter middle ware that we register for other endpoints.
104
+ registerSimpleGETEndpoint := func (name , route string , handler http.Handler ) {
105
+ v1router .Path (route ).Methods (http .MethodGet ).Handler (
106
+ instrumentation .HTTPMiddleware (name ,
107
+ authr .Middleware (
108
+ requestlogger .Middleware (
109
+ logger ,
110
+ handler ,
111
+ ),
112
+ ),
113
+ otelhttp .WithPublicEndpoint (),
114
+ ),
115
+ )
116
+ }
78
117
79
- if config .Anthropic .AccessToken != "" {
118
+ if config .Anthropic .AccessToken == "" {
119
+ logger .Error ("Anthropic access token not set. Not registering Anthropic-related endpoints." )
120
+ } else {
80
121
anthropicHandler , err := completions .NewAnthropicHandler (
81
122
logger ,
82
123
eventLogger ,
@@ -85,28 +126,15 @@ func NewHandler(
85
126
httpClient ,
86
127
config .Anthropic ,
87
128
promptRecorder ,
88
- config .AutoFlushStreamingResponses ,
89
- )
129
+ config .AutoFlushStreamingResponses )
90
130
if err != nil {
91
131
return nil , errors .Wrap (err , "init Anthropic handler" )
92
132
}
93
-
94
- v1router .Path ("/completions/anthropic" ).Methods (http .MethodPost ).Handler (
95
- overhead .HTTPMiddleware (latencyHistogram ,
96
- instrumentation .HTTPMiddleware ("v1.completions.anthropic" ,
97
- gaugeHandler (
98
- counter ,
99
- attributesAnthropicCompletions ,
100
- authr .Middleware (
101
- requestlogger .Middleware (
102
- logger ,
103
- anthropicHandler ,
104
- ),
105
- ),
106
- ),
107
- otelhttp .WithPublicEndpoint (),
108
- ),
109
- ))
133
+ registerStandardEndpoint (
134
+ "v1.completions.anthropic" ,
135
+ "/completions/anthropic" ,
136
+ attributesAnthropicCompletions ,
137
+ anthropicHandler )
110
138
111
139
anthropicMessagesHandler , err := completions .NewAnthropicMessagesHandler (
112
140
logger ,
@@ -116,143 +144,79 @@ func NewHandler(
116
144
httpClient ,
117
145
config .Anthropic ,
118
146
promptRecorder ,
119
- config .AutoFlushStreamingResponses ,
120
- )
147
+ config .AutoFlushStreamingResponses )
121
148
if err != nil {
122
149
return nil , errors .Wrap (err , "init anthropicMessages handler" )
123
150
}
151
+ registerStandardEndpoint (
152
+ "v1.completions.anthropicmessages" ,
153
+ "/completions/anthropic-messages" ,
154
+ attributesAnthropicCompletions ,
155
+ anthropicMessagesHandler )
156
+ }
124
157
125
- v1router .Path ("/completions/anthropic-messages" ).Methods (http .MethodPost ).Handler (
126
- overhead .HTTPMiddleware (latencyHistogram ,
127
- instrumentation .HTTPMiddleware ("v1.completions.anthropicmessages" ,
128
- gaugeHandler (
129
- counter ,
130
- attributesAnthropicCompletions ,
131
- authr .Middleware (
132
- requestlogger .Middleware (
133
- logger ,
134
- anthropicMessagesHandler ,
135
- ),
136
- ),
137
- ),
138
- otelhttp .WithPublicEndpoint (),
139
- ),
140
- ))
158
+ if config .OpenAI .AccessToken == "" {
159
+ logger .Error ("OpenAI access token not set. Not registering OpenAI-related endpoints." )
141
160
} else {
142
- logger .Error ("Anthropic access token not set" )
143
- }
144
- if config .OpenAI .AccessToken != "" {
145
- v1router .Path ("/completions/openai" ).Methods (http .MethodPost ).Handler (
146
- overhead .HTTPMiddleware (latencyHistogram ,
147
- instrumentation .HTTPMiddleware ("v1.completions.openai" ,
148
- gaugeHandler (
149
- counter ,
150
- attributesOpenAICompletions ,
151
- authr .Middleware (
152
- requestlogger .Middleware (
153
- logger ,
154
- completions .NewOpenAIHandler (
155
- logger ,
156
- eventLogger ,
157
- rs ,
158
- config .RateLimitNotifier ,
159
- httpClient ,
160
- config .OpenAI ,
161
- config .AutoFlushStreamingResponses ,
162
- ),
163
- ),
164
- ),
165
- ),
166
- otelhttp .WithPublicEndpoint (),
167
- ),
168
- ))
161
+ openAIHandler := completions .NewOpenAIHandler (
162
+ logger ,
163
+ eventLogger ,
164
+ rs ,
165
+ config .RateLimitNotifier ,
166
+ httpClient ,
167
+ config .OpenAI ,
168
+ config .AutoFlushStreamingResponses )
169
+ registerStandardEndpoint (
170
+ "v1.completions.openai" ,
171
+ "/completions/openai" ,
172
+ attributesOpenAICompletions ,
173
+ openAIHandler )
169
174
170
- v1router .Path ("/embeddings/models" ).Methods (http .MethodGet ).Handler (
171
- instrumentation .HTTPMiddleware ("v1.embeddings.models" ,
172
- authr .Middleware (
173
- requestlogger .Middleware (
174
- logger ,
175
- embeddings .NewListHandler (),
176
- ),
177
- ),
178
- otelhttp .WithPublicEndpoint (),
179
- ),
180
- )
175
+ registerSimpleGETEndpoint ("v1.embeddings.models" , "/embeddings/models" , embeddings .NewListHandler ())
181
176
182
- v1router .Path ("/embeddings" ).Methods (http .MethodPost ).Handler (
183
- overhead .HTTPMiddleware (latencyHistogram ,
184
- instrumentation .HTTPMiddleware ("v1.embeddings" ,
185
- gaugeHandler (
186
- counter ,
187
- // TODO - if embeddings.ModelFactoryMap includes more than
188
- // just OpenAI we might need to move how we count concurrent
189
- // requests into the handler, instead of assuming we are
190
- // counting OpenAI requests
191
- attributesOpenAIEmbeddings ,
192
- authr .Middleware (
193
- requestlogger .Middleware (
194
- logger ,
195
- embeddings .NewHandler (
196
- logger ,
197
- eventLogger ,
198
- rs ,
199
- config .RateLimitNotifier ,
200
- embeddings.ModelFactoryMap {
201
- embeddings .ModelNameOpenAIAda : embeddings .NewOpenAIClient (httpClient , config .OpenAI .AccessToken ),
202
- embeddings .ModelNameSourcegraphTriton : embeddings .NewSourcegraphClient (httpClient , config .Sourcegraph .TritonURL ),
203
- },
204
- config .EmbeddingsAllowedModels ,
205
- ),
206
- ),
207
- ),
208
- ),
209
- otelhttp .WithPublicEndpoint (),
210
- ),
211
- ))
212
- } else {
213
- logger .Error ("OpenAI access token not set" )
177
+ embeddingsHandler := embeddings .NewHandler (
178
+ logger ,
179
+ eventLogger ,
180
+ rs ,
181
+ config .RateLimitNotifier ,
182
+ embeddings.ModelFactoryMap {
183
+ embeddings .ModelNameOpenAIAda : embeddings .NewOpenAIClient (httpClient , config .OpenAI .AccessToken ),
184
+ embeddings .ModelNameSourcegraphTriton : embeddings .NewSourcegraphClient (httpClient , config .Sourcegraph .TritonURL ),
185
+ },
186
+ config .EmbeddingsAllowedModels )
187
+ // TODO: If embeddings.ModelFactoryMap includes more than just OpenAI, we might want to
188
+ // revisit how we count concurrent requests into the handler. (Instead of assuming they are
189
+ // all OpenAI-related requests. (i.e. maybe we should use something other than
190
+ // attributesOpenAIEmbeddings here.)
191
+ registerStandardEndpoint (
192
+ "v1.embeddings" ,
193
+ "/embeddings" ,
194
+ attributesOpenAIEmbeddings ,
195
+ embeddingsHandler )
214
196
}
215
- if config .Fireworks .AccessToken != "" {
216
- v1router .Path ("/completions/fireworks" ).Methods (http .MethodPost ).Handler (
217
- overhead .HTTPMiddleware (latencyHistogram ,
218
- instrumentation .HTTPMiddleware ("v1.completions.fireworks" ,
219
- gaugeHandler (
220
- counter ,
221
- attributesFireworksCompletions ,
222
- authr .Middleware (
223
- requestlogger .Middleware (
224
- logger ,
225
- completions .NewFireworksHandler (
226
- logger ,
227
- eventLogger ,
228
- rs ,
229
- config .RateLimitNotifier ,
230
- httpClient ,
231
- config .Fireworks ,
232
- config .AutoFlushStreamingResponses ,
233
- ),
234
- ),
235
- ),
236
- ),
237
- otelhttp .WithPublicEndpoint (),
238
- ),
239
- ))
197
+
198
+ if config .Fireworks .AccessToken == "" {
199
+ logger .Error ("Fireworks access token not set. Not registering Fireworks-related endpoints." )
240
200
} else {
241
- logger .Error ("Fireworks access token not set" )
201
+ fireworksHandler := completions .NewFireworksHandler (
202
+ logger ,
203
+ eventLogger ,
204
+ rs ,
205
+ config .RateLimitNotifier ,
206
+ httpClient ,
207
+ config .Fireworks ,
208
+ config .AutoFlushStreamingResponses )
209
+ registerStandardEndpoint (
210
+ "v1.completions.fireworks" ,
211
+ "/completions/fireworks" ,
212
+ attributesFireworksCompletions ,
213
+ fireworksHandler )
242
214
}
243
215
244
216
// Register a route where actors can retrieve their current rate limit state.
245
- v1router .Path ("/limits" ).Methods (http .MethodGet ).Handler (
246
- instrumentation .HTTPMiddleware ("v1.limits" ,
247
- authr .Middleware (
248
- requestlogger .Middleware (
249
- logger ,
250
- featurelimiter .ListLimitsHandler (logger , rs ),
251
- ),
252
- ),
253
- otelhttp .WithPublicEndpoint (),
254
- ),
255
- )
217
+ limitsHandler := featurelimiter .ListLimitsHandler (logger , rs )
218
+ registerSimpleGETEndpoint ("v1.limits" , "/limits" , limitsHandler )
219
+
256
220
// Register a route where actors can refresh their rate limit state.
257
221
v1router .Path ("/limits/refresh" ).Methods (http .MethodPost ).Handler (
258
222
instrumentation .HTTPMiddleware ("v1.limits" ,
0 commit comments