From f69ae3508be2a06be945c662abed76e22d146b55 Mon Sep 17 00:00:00 2001
From: reidliu41 <reid201711@gmail.com>
Date: Thu, 19 Jun 2025 16:58:40 +0800
Subject: [PATCH] refactor example - qwen3_reranker

Signed-off-by: reidliu41 <reid201711@gmail.com>
---
 examples/offline_inference/qwen3_reranker.py | 32 ++++++++++++++------
 1 file changed, 22 insertions(+), 10 deletions(-)
diff --git a/examples/offline_inference/qwen3_reranker.py b/examples/offline_inference/qwen3_reranker.py
index 27c4071bf094..fe3cebc348f1 100644
--- a/examples/offline_inference/qwen3_reranker.py
+++ b/examples/offline_inference/qwen3_reranker.py
@@ -22,15 +22,19 @@
 # If you want to load the official original version, the init parameters are
 # as follows.
 
-model = LLM(
-    model=model_name,
-    task="score",
-    hf_overrides={
-        "architectures": ["Qwen3ForSequenceClassification"],
-        "classifier_from_token": ["no", "yes"],
-        "is_original_qwen3_reranker": True,
-    },
-)
+
+def get_model() -> LLM:
+    """Initializes and returns the LLM model for Qwen3-Reranker."""
+    return LLM(
+        model=model_name,
+        task="score",
+        hf_overrides={
+            "architectures": ["Qwen3ForSequenceClassification"],
+            "classifier_from_token": ["no", "yes"],
+            "is_original_qwen3_reranker": True,
+        },
+    )
+
 
 # Why do we need hf_overrides for the official original version:
 # vllm converts it to Qwen3ForSequenceClassification when loaded for
@@ -51,7 +55,8 @@
 query_template = "{prefix}<Instruct>: {instruction}\n<Query>: {query}\n"
 document_template = "<Document>: {doc}{suffix}"
 
-if __name__ == "__main__":
+
+def main() -> None:
     instruction = (
         "Given a web search query, retrieve relevant passages that answer the query"
     )
@@ -72,6 +77,13 @@
     ]
     documents = [document_template.format(doc=doc, suffix=suffix) for doc in documents]
 
+    model = get_model()
     outputs = model.score(queries, documents)
 
+    print("-" * 30)
     print([output.outputs.score for output in outputs])
+    print("-" * 30)
+
+
+if __name__ == "__main__":
+    main()