File tree Expand file tree Collapse file tree 3 files changed +48
-3
lines changed Expand file tree Collapse file tree 3 files changed +48
-3
lines changed Original file line number Diff line number Diff line change @@ -1292,6 +1292,47 @@ class PoolingResponse(OpenAIBaseModel):
1292
1292
usage : UsageInfo
1293
1293
1294
1294
1295
+ class ClassificationRequest (OpenAIBaseModel ):
1296
+ model : Optional [str ] = None
1297
+ input : Union [list [str ], str ]
1298
+ truncate_prompt_tokens : Optional [int ] = None
1299
+ user : Optional [str ] = None
1300
+
1301
+ # --8<-- [start:classification-pooling-params]
1302
+ additional_data : Optional [Any ] = None
1303
+ # --8<-- [end:classification-pooling-params]
1304
+
1305
+ # --8<-- [start:classification-extra-params]
1306
+ priority : int = Field (
1307
+ default = 0 ,
1308
+ description = (
1309
+ "The priority of the request (lower means earlier handling; "
1310
+ "default: 0). Any priority other than 0 will raise an error "
1311
+ "if the served model does not use priority scheduling." ),
1312
+ )
1313
+
1314
+ # --8<-- [end:classification-extra-params]
1315
+
1316
+ def to_pooling_params (self ):
1317
+ return PoolingParams (additional_data = self .additional_data )
1318
+
1319
+
1320
+ class ClassificationData (OpenAIBaseModel ):
1321
+ index : int
1322
+ label : Optional [str ]
1323
+ probs : list [float ]
1324
+ num_classes : int
1325
+
1326
+
1327
+ class ClassificationResponse (OpenAIBaseModel ):
1328
+ id : str = Field (default_factory = lambda : f"classify-{ random_uuid ()} " )
1329
+ object : str = "list"
1330
+ created : int = Field (default_factory = lambda : int (time .time ()))
1331
+ model : str
1332
+ data : list [ClassificationData ]
1333
+ usage : UsageInfo
1334
+
1335
+
1295
1336
class ScoreResponseData (OpenAIBaseModel ):
1296
1337
index : int
1297
1338
object : str = "score"
Original file line number Diff line number Diff line change @@ -465,9 +465,13 @@ def load_model(self, vllm_config: VllmConfig) -> nn.Module:
465
465
if model_config .quantization is None and loaded_weights is not None :
466
466
weights_not_loaded = weights_to_load - loaded_weights
467
467
if weights_not_loaded :
468
- raise ValueError (
468
+ logger . error (
469
469
"Following weights were not initialized from "
470
- f"checkpoint: { weights_not_loaded } " )
470
+ "checkpoint: %s" , weights_not_loaded )
471
+
472
+ # raise ValueError(
473
+ # "Following weights were not initialized from "
474
+ # f"checkpoint: {weights_not_loaded}")
471
475
472
476
_process_weights_after_loading (model , model_config , target_device )
473
477
Original file line number Diff line number Diff line change @@ -267,7 +267,7 @@ def create_attention_instances(self) -> dict[int, Attention]:
267
267
self .config .global_attention_layers , list ):
268
268
global_attention_layers = self .config .global_attention_layers
269
269
else :
270
- global_attention_layers = None
270
+ global_attention_layers = []
271
271
272
272
for i in range (start , end ):
273
273
sliding_window = None
You can’t perform that action at this time.
0 commit comments