From f69ae3508be2a06be945c662abed76e22d146b55 Mon Sep 17 00:00:00 2001 From: reidliu41 Date: Thu, 19 Jun 2025 16:58:40 +0800 Subject: [PATCH] refactor example - qwen3_reranker Signed-off-by: reidliu41 --- examples/offline_inference/qwen3_reranker.py | 32 ++++++++++++++------ 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/examples/offline_inference/qwen3_reranker.py b/examples/offline_inference/qwen3_reranker.py index 27c4071bf094..fe3cebc348f1 100644 --- a/examples/offline_inference/qwen3_reranker.py +++ b/examples/offline_inference/qwen3_reranker.py @@ -22,15 +22,19 @@ # If you want to load the official original version, the init parameters are # as follows. -model = LLM( - model=model_name, - task="score", - hf_overrides={ - "architectures": ["Qwen3ForSequenceClassification"], - "classifier_from_token": ["no", "yes"], - "is_original_qwen3_reranker": True, - }, -) + +def get_model() -> LLM: + """Initializes and returns the LLM model for Qwen3-Reranker.""" + return LLM( + model=model_name, + task="score", + hf_overrides={ + "architectures": ["Qwen3ForSequenceClassification"], + "classifier_from_token": ["no", "yes"], + "is_original_qwen3_reranker": True, + }, + ) + # Why do we need hf_overrides for the official original version: # vllm converts it to Qwen3ForSequenceClassification when loaded for @@ -51,7 +55,8 @@ query_template = "{prefix}: {instruction}\n: {query}\n" document_template = ": {doc}{suffix}" -if __name__ == "__main__": + +def main() -> None: instruction = ( "Given a web search query, retrieve relevant passages that answer the query" ) @@ -72,6 +77,13 @@ ] documents = [document_template.format(doc=doc, suffix=suffix) for doc in documents] + model = get_model() outputs = model.score(queries, documents) + print("-" * 30) print([output.outputs.score for output in outputs]) + print("-" * 30) + + +if __name__ == "__main__": + main()