@@ -52,6 +52,8 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
52
52
...convertToOpenAiMessages ( messages ) ,
53
53
]
54
54
55
+ const { id : modelId , info : modelInfo } = this . getModel ( )
56
+
55
57
// prompt caching: https://openrouter.ai/docs/prompt-caching
56
58
// this is specifically for claude models (some models may 'support prompt caching' automatically without this)
57
59
switch ( true ) {
@@ -95,10 +97,7 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
95
97
let topP : number | undefined = undefined
96
98
97
99
// Handle models based on deepseek-r1
98
- if (
99
- this . getModel ( ) . id . startsWith ( "deepseek/deepseek-r1" ) ||
100
- this . getModel ( ) . id === "perplexity/sonar-reasoning"
101
- ) {
100
+ if ( modelId . startsWith ( "deepseek/deepseek-r1" ) || modelId === "perplexity/sonar-reasoning" ) {
102
101
// Recommended temperature for DeepSeek reasoning models
103
102
defaultTemperature = DEEP_SEEK_DEFAULT_TEMPERATURE
104
103
// DeepSeek highly recommends using user instead of system role
@@ -107,24 +106,34 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
107
106
topP = 0.95
108
107
}
109
108
109
+ let temperature = this . options . modelTemperature ?? defaultTemperature
110
+
111
+ // Anthropic "Thinking" models require a temperature of 1.0.
112
+ if ( modelInfo . thinking ) {
113
+ temperature = 1.0
114
+ }
115
+
110
116
// https://openrouter.ai/docs/transforms
111
117
let fullResponseText = ""
112
- const stream = await this . client . chat . completions . create ( {
113
- model : this . getModel ( ) . id ,
114
- max_tokens : this . getModel ( ) . info . maxTokens ,
115
- temperature : this . options . modelTemperature ?? defaultTemperature ,
118
+
119
+ const completionParams : OpenRouterChatCompletionParams = {
120
+ model : modelId ,
121
+ max_tokens : modelInfo . maxTokens ,
122
+ temperature,
116
123
top_p : topP ,
117
124
messages : openAiMessages ,
118
125
stream : true ,
119
126
include_reasoning : true ,
120
127
// This way, the transforms field will only be included in the parameters when openRouterUseMiddleOutTransform is true.
121
128
...( this . options . openRouterUseMiddleOutTransform && { transforms : [ "middle-out" ] } ) ,
122
- } as OpenRouterChatCompletionParams )
129
+ }
130
+
131
+ const stream = await this . client . chat . completions . create ( completionParams )
123
132
124
133
let genId : string | undefined
125
134
126
135
for await ( const chunk of stream as unknown as AsyncIterable < OpenAI . Chat . Completions . ChatCompletionChunk > ) {
127
- // openrouter returns an error object instead of the openai sdk throwing an error
136
+ // OpenRouter returns an error object instead of the OpenAI SDK throwing an error.
128
137
if ( "error" in chunk ) {
129
138
const error = chunk . error as { message ?: string ; code ?: number }
130
139
console . error ( `OpenRouter API Error: ${ error ?. code } - ${ error ?. message } ` )
@@ -136,19 +145,22 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
136
145
}
137
146
138
147
const delta = chunk . choices [ 0 ] ?. delta
148
+
139
149
if ( "reasoning" in delta && delta . reasoning ) {
140
150
yield {
141
151
type : "reasoning" ,
142
152
text : delta . reasoning ,
143
153
} as ApiStreamChunk
144
154
}
155
+
145
156
if ( delta ?. content ) {
146
157
fullResponseText += delta . content
147
158
yield {
148
159
type : "text" ,
149
160
text : delta . content ,
150
161
} as ApiStreamChunk
151
162
}
163
+
152
164
// if (chunk.usage) {
153
165
// yield {
154
166
// type: "usage",
@@ -158,10 +170,12 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
158
170
// }
159
171
}
160
172
161
- // retry fetching generation details
173
+ // Retry fetching generation details.
162
174
let attempt = 0
175
+
163
176
while ( attempt ++ < 10 ) {
164
177
await delay ( 200 ) // FIXME: necessary delay to ensure generation endpoint is ready
178
+
165
179
try {
166
180
const response = await axios . get ( `https://openrouter.ai/api/v1/generation?id=${ genId } ` , {
167
181
headers : {
@@ -171,7 +185,7 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
171
185
} )
172
186
173
187
const generation = response . data ?. data
174
- console . log ( "OpenRouter generation details:" , response . data )
188
+
175
189
yield {
176
190
type : "usage" ,
177
191
// cacheWriteTokens: 0,
@@ -182,20 +196,21 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
182
196
totalCost : generation ?. total_cost || 0 ,
183
197
fullResponseText,
184
198
} as OpenRouterApiStreamUsageChunk
199
+
185
200
return
186
201
} catch ( error ) {
187
202
// ignore if fails
188
203
console . error ( "Error fetching OpenRouter generation details:" , error )
189
204
}
190
205
}
191
206
}
192
- getModel ( ) : { id : string ; info : ModelInfo } {
207
+
208
+ getModel ( ) {
193
209
const modelId = this . options . openRouterModelId
194
210
const modelInfo = this . options . openRouterModelInfo
195
- if ( modelId && modelInfo ) {
196
- return { id : modelId , info : modelInfo }
197
- }
198
- return { id : openRouterDefaultModelId , info : openRouterDefaultModelInfo }
211
+ return modelId && modelInfo
212
+ ? { id : modelId , info : modelInfo }
213
+ : { id : openRouterDefaultModelId , info : openRouterDefaultModelInfo }
199
214
}
200
215
201
216
async completePrompt ( prompt : string ) : Promise < string > {
@@ -218,6 +233,7 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler {
218
233
if ( error instanceof Error ) {
219
234
throw new Error ( `OpenRouter completion error: ${ error . message } ` )
220
235
}
236
+
221
237
throw error
222
238
}
223
239
}
@@ -239,6 +255,7 @@ export async function getOpenRouterModels() {
239
255
inputPrice : parseApiPrice ( rawModel . pricing ?. prompt ) ,
240
256
outputPrice : parseApiPrice ( rawModel . pricing ?. completion ) ,
241
257
description : rawModel . description ,
258
+ thinking : rawModel . id === "anthropic/claude-3.7-sonnet:thinking" ,
242
259
}
243
260
244
261
// NOTE: this needs to be synced with api.ts/openrouter default model info.
0 commit comments