File tree Expand file tree Collapse file tree 1 file changed +22
-10
lines changed
examples/offline_inference Expand file tree Collapse file tree 1 file changed +22
-10
lines changed Original file line number Diff line number Diff line change 22
22
# If you want to load the official original version, the init parameters are
23
23
# as follows.
24
24
25
- model = LLM (
26
- model = model_name ,
27
- task = "score" ,
28
- hf_overrides = {
29
- "architectures" : ["Qwen3ForSequenceClassification" ],
30
- "classifier_from_token" : ["no" , "yes" ],
31
- "is_original_qwen3_reranker" : True ,
32
- },
33
- )
25
+
26
+ def get_model () -> LLM :
27
+ """Initializes and returns the LLM model for Qwen3-Reranker."""
28
+ return LLM (
29
+ model = model_name ,
30
+ task = "score" ,
31
+ hf_overrides = {
32
+ "architectures" : ["Qwen3ForSequenceClassification" ],
33
+ "classifier_from_token" : ["no" , "yes" ],
34
+ "is_original_qwen3_reranker" : True ,
35
+ },
36
+ )
37
+
34
38
35
39
# Why do we need hf_overrides for the official original version:
36
40
# vllm converts it to Qwen3ForSequenceClassification when loaded for
51
55
query_template = "{prefix}<Instruct>: {instruction}\n <Query>: {query}\n "
52
56
document_template = "<Document>: {doc}{suffix}"
53
57
54
- if __name__ == "__main__" :
58
+
59
+ def main () -> None :
55
60
instruction = (
56
61
"Given a web search query, retrieve relevant passages that answer the query"
57
62
)
72
77
]
73
78
documents = [document_template .format (doc = doc , suffix = suffix ) for doc in documents ]
74
79
80
+ model = get_model ()
75
81
outputs = model .score (queries , documents )
76
82
83
+ print ("-" * 30 )
77
84
print ([output .outputs .score for output in outputs ])
85
+ print ("-" * 30 )
86
+
87
+
88
+ if __name__ == "__main__" :
89
+ main ()
You can’t perform that action at this time.
0 commit comments