Updated pr.

lu-ohai · lu-ohai · commit e84715c1369e · 2024-12-16T23:04:40.000-05:00
diff --git a/ads/model/framework/embedding_onnx_model.py b/ads/model/framework/embedding_onnx_model.py
@@ -3,7 +3,7 @@
 # Copyright (c) 2024 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
-from typing import Dict
+from typing import Dict, Optional
 
 from ads.model.extractor.embedding_onnx_extractor import EmbeddingONNXExtractor
 from ads.model.generic_model import FrameworkSpecificModel
@@ -108,18 +108,26 @@ class EmbeddingONNXModel(FrameworkSpecificModel):
     >>> from huggingface_hub import snapshot_download
 
     >>> local_dir=tempfile.mkdtemp()
-    >>> # download sentence-transformers/all-MiniLM-L6-v2 from huggingface
+    >>> allow_patterns=[
+    ...     "onnx/model.onnx",
+    ...     "config.json",
+    ...     "special_tokens_map.json",
+    ...     "tokenizer_config.json",
+    ...     "tokenizer.json",
+    ...     "vocab.txt"
+    ... ]
+
+    >>> # download files needed for this demostration to local folder
     >>> snapshot_download(
     ...     repo_id="sentence-transformers/all-MiniLM-L6-v2",
-    ...     local_dir=local_dir
+    ...     local_dir=local_dir,
+    ...     allow_patterns=allow_patterns
     ... )
 
-    >>> # copy all files from local_dir to artifact_dir
     >>> artifact_dir = tempfile.mkdtemp()
-    >>> for root, dirs, files in os.walk(local_dir):
-    >>>     for file in files:
-    >>>         src_path = os.path.join(root, file)
-    >>>         shutil.copy(src_path, artifact_dir)
+    >>> # copy all downloaded files to artifact folder
+    >>> for file in allow_patterns:
+    >>>     shutil.copy(local_dir + "/" + file, artifact_dir)
 
     >>> model = EmbeddingONNXModel(artifact_dir=artifact_dir)
     >>> model.summary_status()
@@ -157,8 +165,8 @@ class EmbeddingONNXModel(FrameworkSpecificModel):
 
     def __init__(
         self,
-        artifact_dir: str | None = None,
-        auth: Dict | None = None,
+        artifact_dir: Optional[str] = None,
+        auth: Optional[Dict] = None,
         serialize: bool = False,
         **kwargs: dict,
     ):
@@ -191,18 +199,26 @@ def __init__(
         >>> from huggingface_hub import snapshot_download
 
         >>> local_dir=tempfile.mkdtemp()
-        >>> # download sentence-transformers/all-MiniLM-L6-v2 from huggingface
+        >>> allow_patterns=[
+        ...     "onnx/model.onnx",
+        ...     "config.json",
+        ...     "special_tokens_map.json",
+        ...     "tokenizer_config.json",
+        ...     "tokenizer.json",
+        ...     "vocab.txt"
+        ... ]
+
+        >>> # download files needed for this demostration to local folder
         >>> snapshot_download(
         ...     repo_id="sentence-transformers/all-MiniLM-L6-v2",
-        ...     local_dir=local_dir
+        ...     local_dir=local_dir,
+        ...     allow_patterns=allow_patterns
         ... )
 
-        >>> # copy all files from subdirectory to artifact_dir
         >>> artifact_dir = tempfile.mkdtemp()
-        >>> for root, dirs, files in os.walk(local_dir):
-        >>>     for file in files:
-        >>>         src_path = os.path.join(root, file)
-        >>>         shutil.copy(src_path, artifact_dir)
+        >>> # copy all downloaded files to artifact folder
+        >>> for file in allow_patterns:
+        >>>     shutil.copy(local_dir + "/" + file, artifact_dir)
 
         >>> model = EmbeddingONNXModel(artifact_dir=artifact_dir)
         >>> model.summary_status()
diff --git a/ads/templates/score_embedding_onnx.jinja2 b/ads/templates/score_embedding_onnx.jinja2
@@ -2,6 +2,7 @@
 import os
 import sys
 import json
+import subprocess
 from functools import lru_cache
 import onnxruntime as ort
 import jsonschema
@@ -33,13 +34,26 @@ def load_model(model_file_name=model_name):
     contents = os.listdir(model_dir)
     if model_file_name in contents:
         print(f'Start loading {model_file_name} from model directory {model_dir} ...')
-        model = ort.InferenceSession(os.path.join(model_dir, model_file_name), providers=['CUDAExecutionProvider','CPUExecutionProvider'])
+        providers= ['CPUExecutionProvider']
+        if is_gpu_available():
+            providers=['CUDAExecutionProvider','CPUExecutionProvider']
+        model = ort.InferenceSession(os.path.join(model_dir, model_file_name), providers=providers)
         print("Model is successfully loaded.")
         return model
     else:
         raise Exception(f'{model_file_name} is not found in model directory {model_dir}')
 
 
+def is_gpu_available():
+    """Check if gpu is available on the infrastructure."""
+    try:
+        result = subprocess.run(["nvidia-smi"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        if result.returncode == 0:
+           return True
+    except FileNotFoundError:
+        return False
+
+
 @lru_cache(maxsize=1)
 def load_tokenizer(model_full_name):
 
diff --git a/docs/source/user_guide/model_registration/frameworks/embeddingonnxmodel.rst b/docs/source/user_guide/model_registration/frameworks/embeddingonnxmodel.rst
@@ -6,7 +6,7 @@ See `API Documentation <../../../ads.model.framework.html#ads.model.framework.em
 Overview
 ========
 
-The ``ads.model.framework.embedding_onnx_model.EmbeddingONNXModel`` class in ADS is designed to rapidly get an Embedding ONNX Model into production. The ``.prepare()`` method creates the model artifacts that are needed without configuring it or writing code. However, you can customize the required ``score.py`` file.
+The ``ads.model.framework.embedding_onnx_model.EmbeddingONNXModel`` class in ADS is designed to rapidly get an Embedding ONNX Model into production. The ``.prepare()`` method creates the model artifacts that are needed without configuring it or writing code. ``EmbeddingONNXModel`` supports `OpenAI spec <https://github.com/huggingface/text-embeddings-inference/blob/main/docs/openapi.json>`_ for embeddings endpoint.
 
 .. include:: ../_template/overview.rst
 
@@ -24,26 +24,26 @@ The following steps take the `sentence-transformers/all-MiniLM-L6-v2 <https://hu
 
     local_dir = tempfile.mkdtemp()
 
+    allow_patterns=[
+        "onnx/model.onnx",
+        "config.json",
+        "special_tokens_map.json",
+        "tokenizer_config.json",
+        "tokenizer.json",
+        "vocab.txt"
+    ]
+
     # download files needed for this demostration to local folder
     snapshot_download(
         repo_id="sentence-transformers/all-MiniLM-L6-v2",
         local_dir=local_dir,
-        allow_patterns=[
-            "onnx/model.onnx",
-            "config.json",
-            "special_tokens_map.json",
-            "tokenizer_config.json",
-            "tokenizer.json",
-            "vocab.txt"
-        ]
+        allow_patterns=allow_patterns
     )
 
     artifact_dir = tempfile.mkdtemp()
     # copy all downloaded files to artifact folder
-    for root, dirs, files in os.walk(local_dir):
-        for file in files:
-            src_path = os.path.join(root, file)
-            shutil.copy(src_path, artifact_dir)
+    for file in allow_patterns:
+        shutil.copy(local_dir + "/" + file, artifact_dir)
 
 
 Install Conda Pack
@@ -213,26 +213,26 @@ Example
 
     local_dir = tempfile.mkdtemp()
 
-    # download files needed for the demostration to local folder
+    allow_patterns=[
+        "onnx/model.onnx",
+        "config.json",
+        "special_tokens_map.json",
+        "tokenizer_config.json",
+        "tokenizer.json",
+        "vocab.txt"
+    ]
+
+    # download files needed for this demostration to local folder
     snapshot_download(
         repo_id="sentence-transformers/all-MiniLM-L6-v2",
         local_dir=local_dir,
-        allow_patterns=[
-            "onnx/model.onnx",
-            "config.json",
-            "special_tokens_map.json",
-            "tokenizer_config.json",
-            "tokenizer.json",
-            "vocab.txt"
-        ]
+        allow_patterns=allow_patterns
     )
 
     artifact_dir = tempfile.mkdtemp()
     # copy all downloaded files to artifact folder
-    for root, dirs, files in os.walk(local_dir):
-        for file in files:
-            src_path = os.path.join(root, file)
-            shutil.copy(src_path, artifact_dir)
+    for file in allow_patterns:
+        shutil.copy(local_dir + "/" + file, artifact_dir)
 
     # initialize EmbeddingONNXModel instance and prepare score.py, runtime.yaml and openapi.json files.
     embedding_onnx_model = EmbeddingONNXModel(artifact_dir=artifact_dir)