diff --git a/examples/offline_inference/qwen3_reranker.py b/examples/offline_inference/qwen3_reranker.py index 27c4071bf094..fe3cebc348f1 100644 --- a/examples/offline_inference/qwen3_reranker.py +++ b/examples/offline_inference/qwen3_reranker.py @@ -22,15 +22,19 @@ # If you want to load the official original version, the init parameters are # as follows. -model = LLM( - model=model_name, - task="score", - hf_overrides={ - "architectures": ["Qwen3ForSequenceClassification"], - "classifier_from_token": ["no", "yes"], - "is_original_qwen3_reranker": True, - }, -) + +def get_model() -> LLM: + """Initializes and returns the LLM model for Qwen3-Reranker.""" + return LLM( + model=model_name, + task="score", + hf_overrides={ + "architectures": ["Qwen3ForSequenceClassification"], + "classifier_from_token": ["no", "yes"], + "is_original_qwen3_reranker": True, + }, + ) + # Why do we need hf_overrides for the official original version: # vllm converts it to Qwen3ForSequenceClassification when loaded for @@ -51,7 +55,8 @@ query_template = "{prefix}: {instruction}\n: {query}\n" document_template = ": {doc}{suffix}" -if __name__ == "__main__": + +def main() -> None: instruction = ( "Given a web search query, retrieve relevant passages that answer the query" ) @@ -72,6 +77,13 @@ ] documents = [document_template.format(doc=doc, suffix=suffix) for doc in documents] + model = get_model() outputs = model.score(queries, documents) + print("-" * 30) print([output.outputs.score for output in outputs]) + print("-" * 30) + + +if __name__ == "__main__": + main()