Add omniinfer as a new backend (FlagOpen#630)

Hchnr · yefubao · commit 4f0b0113458f · 2025-07-01T19:01:56.000+08:00
Import a new backend called OmniInfer and create an installation script
for it. The deployment and inference processes will be the same as those
used with VLLM.

Patch details:
1. Apply `dos2unix()` to all files.
2. Change permissions to executable (`chmod a+x`) for all shell scripts.
3. Loosen restrictions on the Python version in `pyproject.toml`.
4. Complete the VLLM unpatching in `omni_infer`, as unpatching is
currently tricky and depends on VLLM at commit @65334ef3.
diff --git a/.gitmodules b/.gitmodules
@@ -16,3 +16,6 @@
 [submodule "third_party/sglang"]
 	path = third_party/sglang
 	url = https://github.com/sgl-project/sglang.git
+[submodule "third_party/omniinfer"]
+	path = third_party/omniinfer
+	url = https://gitee.com/omniai/omniinfer.git
diff --git a/install/install-requirements.sh b/install/install-requirements.sh
@@ -12,12 +12,14 @@ print_help() {
 # Initialize the variable
 env=""
 llama_cpp_backend="cpu"
+omni_infer="0"
 
 # Parse command-line options
 while [[ "$#" -gt 0 ]]; do
     case $1 in
         --env) env="$2"; shift ;;  # Assign the value after '--env'
         --llama-cpp-backend) llama_cpp_backend="$2"; shift ;;  # Assign the value after '--llama-cpp-backend'
+        --omni_infer) omni_infer="$2"; shift ;;  # Assign the value after '--llama-cpp-backend'
         --help|-h) print_help; exit 0 ;;
         *) echo "Error: Unknown parameter passed."; print_help; exit 1 ;;
     esac
@@ -193,6 +195,7 @@ if [ "${env}" == "inference" ]; then
     # Unpatch
     python tools/patch/unpatch.py --backend vllm
     python tools/patch/unpatch.py --backend llama.cpp
+    python tools/patch/unpatch.py --backend omniinfer
 
     # Build vllm
     # Navigate to requirements directory and install inference dependencies
@@ -257,6 +260,49 @@ if [ "${env}" == "inference" ]; then
             ;;
     esac
 
+    cd ../..
+    # Build omniinfer
+    if [ "${omni_infer}" == "1" ]; then
+        # process repo
+        find ./third_party/omniinfer -type f -exec dos2unix {} +
+        find ./third_party/omniinfer -type f -path '*.sh' -exec chmod a+x {} \;
+
+        # unpatch vllm
+        cd ./third_party/omniinfer/infer_engines/
+        git clone https://github.com/vllm-project/vllm.git
+        git checkout 65334ef3
+        bash bash_install_code.sh
+        cd ../../..
+
+        # install dependencies
+        pip install -r ./third_party/omniinfer/tests/requirements.txt
+
+        # build whl for vllm
+        mkdir -p ./third_party/omniinfer/build/dist
+        cd ./third_party/omniinfer/infer_engines/vllm
+        VLLM_TARGET_DEVICE=empty python setup.py bdist_wheel
+        mv dist/vllm* ../../build/dist
+
+        # build whl for omniinfer
+        cd ../..
+        pip install build
+        python -m build
+        mv dist/omni_i* ./build/dist
+
+        # build whl for omniinfer omni_placement
+        cd ./omni/accelerators/placement
+        python setup.py bdist_wheel
+        mv dist/omni_placement* ../../../build/dist
+
+        # install 3 whl
+        cd ../../../build/dist
+        pip install omni_i*.whl
+        pip install vllm*.whl
+        pip install omni_placement*.whl
+
+        cd ../../../..
+    fi
+
     # For FlagRelease
     pip install --no-build-isolation git+https://github.com/FlagOpen/FlagGems.git@release_v1.0.0
 fi
diff --git a/third_party/omniinfer b/third_party/omniinfer
@@ -0,0 +1 @@
+Subproject commit 6f79695ffd6fa8dda63e46963a708c63281e33a3
diff --git a/tools/patch/patch.py b/tools/patch/patch.py
@@ -455,6 +455,8 @@ def normalize_backend(backend):
         return "sglang"
     elif input_lower in ["llama.cpp", "llama_cpp"]:
         return "llama.cpp"
+    elif input_lower in ["omniinfer", "omni_infer", "OmniInfer"]:
+        return "omniinfer"
 
     raise ValueError(f'Unsupported backend {backend}')