Minor fixes for batch inference (#426)

yunfeng-scale · web-flow · commit 5b54c29207ab · 2024-01-25T17:53:03.000-08:00
* Fix file not found

* progress fix

* add tests

* bump

* typing
diff --git a/clients/python/llmengine/__init__.py b/clients/python/llmengine/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "0.0.0b21"
+__version__ = "0.0.0b22"
 
 import os
 from typing import Sequence
diff --git a/clients/python/pyproject.toml b/clients/python/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "scale-llm-engine"
-version = "0.0.0.beta21"
+version = "0.0.0.beta22"
 description = "Scale LLM Engine Python client"
 license = "Apache-2.0"
 authors = ["Phil Chen <phil.chen@scale.com>"]
diff --git a/clients/python/setup.py b/clients/python/setup.py
@@ -3,6 +3,6 @@
 setup(
     name="scale-llm-engine",
     python_requires=">=3.7",
-    version="0.0.0.beta21",
+    version="0.0.0.beta22",
     packages=find_packages(),
 )
diff --git a/model-engine/model_engine_server/inference/batch_inference/vllm_batch.py b/model-engine/model_engine_server/inference/batch_inference/vllm_batch.py
@@ -48,7 +48,8 @@ def file_exists(path):
     try:
         with smart_open.open(path, "r"):
             return True
-    except FileNotFoundError:
+    except Exception as exc:
+        print(f"Error checking if file exists: {exc}")
         return False
 
 
@@ -124,7 +125,7 @@ async def batch_inference():
 
     results_generators = await generate_with_vllm(request, content, model, job_index)
 
-    bar = tqdm(total=len(content.prompts), desc="Processed prompts")
+    bar = tqdm(total=len(results_generators), desc="Processed prompts")
 
     outputs = []
     for generator in results_generators:
diff --git a/model-engine/tests/unit/inference/test_vllm_batch.py b/model-engine/tests/unit/inference/test_vllm_batch.py
@@ -2,7 +2,7 @@
 from unittest.mock import MagicMock, call, mock_open, patch
 
 import pytest
-from model_engine_server.inference.batch_inference.vllm_batch import batch_inference
+from model_engine_server.inference.batch_inference.vllm_batch import batch_inference, file_exists
 
 
 @pytest.mark.asyncio
@@ -272,3 +272,28 @@ def side_effect(key, default):
     mock_s3_client.delete_object.assert_has_calls(
         [call(Bucket="bucket", Key="key.0"), call(Bucket="bucket", Key="key.1")]
     )
+
+
+def test_file_exists():
+    mock_open_func = mock_open()
+    path = "test_path"
+
+    with patch(
+        "model_engine_server.inference.batch_inference.vllm_batch.smart_open.open", mock_open_func
+    ):
+        result = file_exists(path)
+
+    mock_open_func.assert_called_once_with(path, "r")
+    assert result is True
+
+
+def test_file_exists_no_such_key():
+    path = "test_path"
+
+    with patch(
+        "model_engine_server.inference.batch_inference.vllm_batch.smart_open.open",
+        side_effect=IOError("No such key"),
+    ):
+        result = file_exists(path)
+
+    assert result is False