Fix model selection (#43)

huydhn · web-flow · commit 55172b098a5c · 2025-07-02T10:32:10.000-07:00
* Add ROCm benchmarks

Signed-off-by: Huy Do &lt;huydhn@gmail.com&gt;

* Fix typo

Signed-off-by: Huy Do &lt;huydhn@gmail.com&gt;

* Add missing argument

Signed-off-by: Huy Do &lt;huydhn@gmail.com&gt;

* More tweaks

Signed-off-by: Huy Do &lt;huydhn@gmail.com&gt;

* Use rocm-smi

Signed-off-by: Huy Do &lt;huydhn@gmail.com&gt;

* Authenticate with AWS on ROCm

Signed-off-by: Huy Do &lt;huydhn@gmail.com&gt;

* Ready for review

Signed-off-by: Huy Do &lt;huydhn@gmail.com&gt;

* Beautifier

Signed-off-by: Huy Do &lt;huydhn@gmail.com&gt;

* Add id-token

Signed-off-by: Huy Do &lt;huydhn@gmail.com&gt;

* Use DEVICE_NAME and DEVICE_TYPE consistently

Signed-off-by: Huy Do &lt;huydhn@gmail.com&gt;

* Fix model selection

Signed-off-by: Huy Do &lt;huydhn@gmail.com&gt;

---------

Signed-off-by: Huy Do &lt;huydhn@gmail.com&gt;
diff --git a/.github/scripts/generate_vllm_benchmark_matrix.py b/.github/scripts/generate_vllm_benchmark_matrix.py
@@ -113,13 +113,12 @@ def generate_benchmark_matrix(
     Parse all the JSON files in vLLM benchmark configs directory to get the
     model name and tensor parallel size (aka number of GPUs)
     """
-    get_all_models = True if not models else False
     use_all_gpus = True if not gpus else False
-
     benchmark_matrix: Dict[str, Any] = {
         "include": [],
     }
 
+    selected_models = []
     for file in glob.glob(f"{benchmark_configs_dir}/*.json"):
         with open(file) as f:
             try:
@@ -139,10 +138,12 @@ def generate_benchmark_matrix(
             model = benchmark_config["model"].lower()
 
             # Dedup
-            if model in models:
+            if model in selected_models:
+                continue
+            # and only choose the selected model:
+            if models and model not in models:
                 continue
-            if get_all_models:
-                models.append(model)
+            selected_models.append(model)
 
             if "tensor_parallel_size" in benchmark_config:
                 tp = benchmark_config["tensor_parallel_size"]