From 7e363f34ed352ab11f23522136420052a833246f Mon Sep 17 00:00:00 2001
From: kondratyevd <kondratyev.d.95@gmail.com>
Date: Thu, 13 Mar 2025 13:28:32 -0400
Subject: [PATCH 01/20] don't build dependencies when installing from repo

---
 installer-plugin/installer.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/installer-plugin/installer.py b/installer-plugin/installer.py
index 606b4b2..bfc8852 100755
--- a/installer-plugin/installer.py
+++ b/installer-plugin/installer.py
@@ -106,7 +106,8 @@ def main() -> None:
             repo_commands.append(["helm", "repo", "add", "grafana", "https://grafana.github.io/helm-charts"])
         if merged_values.get("opentelemetry-collector", {}).get("enabled", False):
             repo_commands.append(["helm", "repo", "add", "opentelemetry", "https://open-telemetry.github.io/opentelemetry-helm-charts"])
-        repo_commands.append(["helm", "dependency", "build", chart_source])
+        if args.local:
+            repo_commands.append(["helm", "dependency", "build", chart_source])
 
         for cmd in repo_commands:
             logger.info(f"\nExecuting: {' '.join(cmd)}")

From d9a4c4b278755b68c1521743dc83256b4bae3adb Mon Sep 17 00:00:00 2001
From: kondratyevd <kondratyev.d.95@gmail.com>
Date: Thu, 13 Mar 2025 13:32:58 -0400
Subject: [PATCH 02/20] bump release

---
 helm/supersonic/Chart.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/helm/supersonic/Chart.yaml b/helm/supersonic/Chart.yaml
index 0a774da..00440c2 100644
--- a/helm/supersonic/Chart.yaml
+++ b/helm/supersonic/Chart.yaml
@@ -2,7 +2,7 @@ apiVersion: v2
 name: supersonic
 description: Server infrastructure for inference-as-a-service in large scientific experiments.
 icon: https://github.com/fastmachinelearning/SuperSONIC/blob/main/docs/img/SuperSONIC_small_512.png?raw=true
-version: 0.2.1
+version: 0.2.1 
 type: application
 home: https://fastmachinelearning.org/SuperSONIC/
 annotations:

From 9d892a5a0d8249c6ec40a005e9daae0c84a174d5 Mon Sep 17 00:00:00 2001
From: kondratyevd <kondratyev.d.95@gmail.com>
Date: Fri, 23 May 2025 16:58:10 -0400
Subject: [PATCH 03/20] first steps towards dynamic loading: configure
 model-specific routing in Envoy

---
 helm/supersonic/cfg/envoy-filter.lua          | 55 +++++++++++++++++--
 .../templates/envoy/configmaps.yaml           | 29 +++++++++-
 helm/supersonic/templates/triton/service.yaml | 31 +++++++++++
 3 files changed, 108 insertions(+), 7 deletions(-)

diff --git a/helm/supersonic/cfg/envoy-filter.lua b/helm/supersonic/cfg/envoy-filter.lua
index 0af267c..d3c349c 100644
--- a/helm/supersonic/cfg/envoy-filter.lua
+++ b/helm/supersonic/cfg/envoy-filter.lua
@@ -2,10 +2,6 @@ function envoy_on_request(request_handle)
     local path = request_handle:headers():get(":path")
     local contentType = request_handle:headers():get("content-type")
 
-    -- Any other request except model index
-    request_handle:streamInfo():dynamicMetadata():set("envoy.lua", "accept_request", true)
-
-    -- Model index requested?
     if path == "/inference.GRPCInferenceService/RepositoryIndex" and contentType == "application/grpc" then
         request_handle:streamInfo():dynamicMetadata():set("envoy.lua", "accept_request", false)
 
@@ -55,15 +51,62 @@ function envoy_on_request(request_handle)
             end
         else
             request_handle:logErr("Failed to parse metric value from Prometheus response.")
+            ---- Temporary ---- 
+            request_handle:logErr("Accepting request regardless of metric value.")
+            request_handle:streamInfo():dynamicMetadata():set("envoy.lua", "accept_request", true)
+        end
+    end
+
+    ---- Extract model_name from ModelInferRequest ----
+    if contentType == "application/grpc" then
+        if path == "/inference.GRPCInferenceService/ModelInfer" then
+            -- grab entire request body (you may need to configure the filter to buffer bodies)
+            local body = request_handle:body():getBytes(0, request_handle:body():length())
+            if body and #body > 5 then
+                -- strip the 5-byte gRPC header (1-byte flag + 4-byte msg-len)
+                local msg = body:sub(6)
+
+                -- protobuf wire format for field 1, wire type 2: tag = 0x0A
+                if msg:byte(1) == 0x0A then
+                    -- next byte is a varint length (assumes <128 bytes)
+                    local name_len = msg:byte(2)
+                    -- extract UTF-8 model name
+                    local model_name = msg:sub(3, 2 + name_len)
+
+                    -- log and propagate via dynamic metadata
+                    request_handle:logInfo("ModelInfer model_name = " .. model_name)
+                    if model_name then
+                        local hostHeader = model_name .. ".cms.svc.cluster.local:8001"
+                        request_handle:logInfo("x-model-host = " .. hostHeader)
+                        request_handle:headers():add("x-model-host", hostHeader)
+                    end
+                    for k, v in pairs(request_handle:headers()) do
+                        request_handle:logInfo("Header " .. k .. ": " .. v)
+                    end
+                else
+                    request_handle:logErr("Unexpected protobuf tag: " .. string.format("0x%02X", msg:byte(1)))
+                end
+            end
+        else
+            --- for non-inference calls, for now just forward to default service
+            request_handle:headers():add("x-model-host", "supersonic-test-triton.cms.svc.cluster.local:8001")
         end
     end
 end
 
 function envoy_on_response(response_handle)
-    -- Send error back if request was not accepted
+    local md = response_handle:streamInfo():dynamicMetadata():get("envoy.lua")
+
+    if not md or md.accept_request == nil then
+      return
+    end
+
     if not response_handle:streamInfo():dynamicMetadata():get("envoy.lua")["accept_request"] then
         response_handle:logInfo("Sending error as a response.")
-        response_handle:body():setBytes("")
+        local body = response_handle:body()
+        if body then
+          body:setBytes("")
+        end
         response_handle:headers():replace("grpc-status", "1")
     end
 end
diff --git a/helm/supersonic/templates/envoy/configmaps.yaml b/helm/supersonic/templates/envoy/configmaps.yaml
index a96eaec..59770c7 100644
--- a/helm/supersonic/templates/envoy/configmaps.yaml
+++ b/helm/supersonic/templates/envoy/configmaps.yaml
@@ -57,9 +57,14 @@ static_resources:
                       routes:
                         - match:
                             prefix: "/"
+                          typed_per_filter_config:
+                            envoy.filters.http.dynamic_forward_proxy:
+                              "@type": type.googleapis.com/envoy.extensions.filters.http.dynamic_forward_proxy.v3.PerRouteConfig
+                              host_rewrite_header: "x-model-host"
                           route:
-                            cluster: triton_grpc_service
+                            cluster: dynamic_forward_proxy_cluster
                             timeout: {{ .envoy.grpc_route_timeout }}
+
                 http_filters:
                   {{- with .envoy.rate_limiter.prometheus_based }}
                   {{- if .enabled }}
@@ -94,6 +99,14 @@ static_resources:
                           provider_name: provider_icecube
                   {{- end }}
                   {{- end }}
+                  - name: envoy.filters.http.dynamic_forward_proxy
+                    typed_config:
+                      "@type": type.googleapis.com/envoy.extensions.filters.http.dynamic_forward_proxy.v3.FilterConfig
+                      dns_cache_config:
+                        name: dynamic_cache
+                        dns_lookup_family: ALL
+                        dns_cache_circuit_breaker:
+                          max_pending_requests: 1024
                   - name: envoy.filters.http.router
                     typed_config:
                       "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
@@ -176,6 +189,20 @@ static_resources:
                     socket_address:
                       address: {{ .tritonName }}
                       port_value: {{ .tritonGrpcPort }}
+    - name: dynamic_forward_proxy_cluster
+      connect_timeout: 2s
+      lb_policy: CLUSTER_PROVIDED
+      http2_protocol_options:
+        max_concurrent_streams: 1000
+      cluster_type:
+        name: envoy.clusters.dynamic_forward_proxy
+        typed_config:
+          "@type": type.googleapis.com/envoy.extensions.clusters.dynamic_forward_proxy.v3.ClusterConfig
+          dns_cache_config:
+            name: dynamic_cache
+            dns_lookup_family: ALL
+            dns_cache_circuit_breaker:
+              max_pending_requests: 1024
 {{- end }}
 {{- end }}
 
diff --git a/helm/supersonic/templates/triton/service.yaml b/helm/supersonic/templates/triton/service.yaml
index 57a4879..9f590df 100644
--- a/helm/supersonic/templates/triton/service.yaml
+++ b/helm/supersonic/templates/triton/service.yaml
@@ -13,6 +13,37 @@ metadata:
     {{- if .Values.triton.service.annotations }}
 {{ toYaml .Values.triton.service.annotations | nindent 4 }}
     {{- end }}
+spec:
+  clusterIP: None
+  ports:
+  {{- range .Values.triton.service.ports }}
+  - name: {{ .name }}
+    port: {{ .port }}
+    targetPort: {{ .targetPort }}
+    protocol: {{ .protocol }}
+  {{- end }}
+  selector:
+    app.kubernetes.io/name: {{ .Chart.Name }}
+    app.kubernetes.io/instance: {{ include "supersonic.name" . }}
+    app.kubernetes.io/component: triton
+
+---
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: deepmet
+  labels:
+    app.kubernetes.io/name: {{ .Chart.Name }}
+    app.kubernetes.io/instance: {{ include "supersonic.name" . }}
+    app.kubernetes.io/component: triton
+    {{- if .Values.triton.service.labels }}
+{{ toYaml .Values.triton.service.labels | nindent 4 }}
+    {{- end }}
+  annotations:
+    {{- if .Values.triton.service.annotations }}
+{{ toYaml .Values.triton.service.annotations | nindent 4 }}
+    {{- end }}
 spec:
   clusterIP: None
   ports:

From 4c33ce63c7e71bf8f2d25460ef888a31981ae2d0 Mon Sep 17 00:00:00 2001
From: kondratyevd <kondratyev.d.95@gmail.com>
Date: Fri, 30 May 2025 13:00:19 -0400
Subject: [PATCH 04/20] remove unnecessary diffs

---
 helm/supersonic/Chart.yaml    | 2 +-
 installer-plugin/installer.py | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/helm/supersonic/Chart.yaml b/helm/supersonic/Chart.yaml
index 00440c2..0a774da 100644
--- a/helm/supersonic/Chart.yaml
+++ b/helm/supersonic/Chart.yaml
@@ -2,7 +2,7 @@ apiVersion: v2
 name: supersonic
 description: Server infrastructure for inference-as-a-service in large scientific experiments.
 icon: https://github.com/fastmachinelearning/SuperSONIC/blob/main/docs/img/SuperSONIC_small_512.png?raw=true
-version: 0.2.1 
+version: 0.2.1
 type: application
 home: https://fastmachinelearning.org/SuperSONIC/
 annotations:
diff --git a/installer-plugin/installer.py b/installer-plugin/installer.py
index bfc8852..606b4b2 100755
--- a/installer-plugin/installer.py
+++ b/installer-plugin/installer.py
@@ -106,8 +106,7 @@ def main() -> None:
             repo_commands.append(["helm", "repo", "add", "grafana", "https://grafana.github.io/helm-charts"])
         if merged_values.get("opentelemetry-collector", {}).get("enabled", False):
             repo_commands.append(["helm", "repo", "add", "opentelemetry", "https://open-telemetry.github.io/opentelemetry-helm-charts"])
-        if args.local:
-            repo_commands.append(["helm", "dependency", "build", chart_source])
+        repo_commands.append(["helm", "dependency", "build", chart_source])
 
         for cmd in repo_commands:
             logger.info(f"\nExecuting: {' '.join(cmd)}")

From 704084dfb8cab63ffedc005a744d3587376f4dcd Mon Sep 17 00:00:00 2001
From: kondratyevd <kondratyev.d.95@gmail.com>
Date: Fri, 30 May 2025 13:01:38 -0400
Subject: [PATCH 05/20] restore original Lua filter

---
 helm/supersonic/cfg/envoy-filter-dynamic.lua | 118 +++++++++++++++++++
 helm/supersonic/cfg/envoy-filter.lua         |  55 +--------
 2 files changed, 124 insertions(+), 49 deletions(-)
 create mode 100644 helm/supersonic/cfg/envoy-filter-dynamic.lua

diff --git a/helm/supersonic/cfg/envoy-filter-dynamic.lua b/helm/supersonic/cfg/envoy-filter-dynamic.lua
new file mode 100644
index 0000000..d3c349c
--- /dev/null
+++ b/helm/supersonic/cfg/envoy-filter-dynamic.lua
@@ -0,0 +1,118 @@
+function envoy_on_request(request_handle)
+    local path = request_handle:headers():get(":path")
+    local contentType = request_handle:headers():get("content-type")
+
+    if path == "/inference.GRPCInferenceService/RepositoryIndex" and contentType == "application/grpc" then
+        request_handle:streamInfo():dynamicMetadata():set("envoy.lua", "accept_request", false)
+
+        local query = SERVER_LOAD_METRIC
+        local metric_threshold = tonumber(SERVER_LOAD_THRESHOLD)
+        local query_response_template = '"value":%[%d+%.%d+,"([%d%.]+)"%]'
+        local encoded_query = encode_query(query)
+
+        request_handle:logInfo("Prometheus scheme: " .. "PROMETHEUS_SCHEME")
+        request_handle:logInfo("Prometheus host: " .. "PROMETHEUS_HOST")
+        request_handle:logInfo("Prometheus port: " .. "PROMETHEUS_PORT")
+        request_handle:logInfo("Query: " .. query)
+        request_handle:logInfo("Encoded query: " .. encoded_query)
+
+        local headers, body = request_handle:httpCall(
+            "prometheus_cluster",
+            {
+                [":method"] = "GET",
+                [":path"] = "/api/v1/query?query=" .. encoded_query,
+                [":scheme"] = "PROMETHEUS_SCHEME",
+                [":authority"] = "PROMETHEUS_HOST" .. ":" .. "PROMETHEUS_PORT"
+            },
+            "",
+            5000
+        )
+        if not headers then
+            request_handle:logErr("HTTP call to Prometheus failed.")
+            return
+        end
+
+        if not body or body == "" then
+            request_handle:logErr("Prometheus could not be reached or returned no data.")
+            return
+        end
+
+        request_handle:logInfo("Query response body: " .. body)
+        local metric_value_str = string.match(body, query_response_template)
+        request_handle:logInfo("Extracted metric: " .. metric_value_str)
+
+        if metric_value_str then
+            local metric_value = tonumber(metric_value_str)
+            if metric_value > metric_threshold then
+                request_handle:logInfo("Metric value exceeds threshold: " .. metric_value .. " > " .. metric_threshold)
+            else
+                request_handle:logInfo("Metric value below threshold: " .. metric_value .. " < " .. metric_threshold)
+                request_handle:streamInfo():dynamicMetadata():set("envoy.lua", "accept_request", true)
+            end
+        else
+            request_handle:logErr("Failed to parse metric value from Prometheus response.")
+            ---- Temporary ---- 
+            request_handle:logErr("Accepting request regardless of metric value.")
+            request_handle:streamInfo():dynamicMetadata():set("envoy.lua", "accept_request", true)
+        end
+    end
+
+    ---- Extract model_name from ModelInferRequest ----
+    if contentType == "application/grpc" then
+        if path == "/inference.GRPCInferenceService/ModelInfer" then
+            -- grab entire request body (you may need to configure the filter to buffer bodies)
+            local body = request_handle:body():getBytes(0, request_handle:body():length())
+            if body and #body > 5 then
+                -- strip the 5-byte gRPC header (1-byte flag + 4-byte msg-len)
+                local msg = body:sub(6)
+
+                -- protobuf wire format for field 1, wire type 2: tag = 0x0A
+                if msg:byte(1) == 0x0A then
+                    -- next byte is a varint length (assumes <128 bytes)
+                    local name_len = msg:byte(2)
+                    -- extract UTF-8 model name
+                    local model_name = msg:sub(3, 2 + name_len)
+
+                    -- log and propagate via dynamic metadata
+                    request_handle:logInfo("ModelInfer model_name = " .. model_name)
+                    if model_name then
+                        local hostHeader = model_name .. ".cms.svc.cluster.local:8001"
+                        request_handle:logInfo("x-model-host = " .. hostHeader)
+                        request_handle:headers():add("x-model-host", hostHeader)
+                    end
+                    for k, v in pairs(request_handle:headers()) do
+                        request_handle:logInfo("Header " .. k .. ": " .. v)
+                    end
+                else
+                    request_handle:logErr("Unexpected protobuf tag: " .. string.format("0x%02X", msg:byte(1)))
+                end
+            end
+        else
+            --- for non-inference calls, for now just forward to default service
+            request_handle:headers():add("x-model-host", "supersonic-test-triton.cms.svc.cluster.local:8001")
+        end
+    end
+end
+
+function envoy_on_response(response_handle)
+    local md = response_handle:streamInfo():dynamicMetadata():get("envoy.lua")
+
+    if not md or md.accept_request == nil then
+      return
+    end
+
+    if not response_handle:streamInfo():dynamicMetadata():get("envoy.lua")["accept_request"] then
+        response_handle:logInfo("Sending error as a response.")
+        local body = response_handle:body()
+        if body then
+          body:setBytes("")
+        end
+        response_handle:headers():replace("grpc-status", "1")
+    end
+end
+
+function encode_query(query)
+    return query:gsub("([^%w _%%%-%.~])", function(c)
+        return string.format("%%%02X", string.byte(c))
+    end):gsub(" ", "+")
+end
\ No newline at end of file
diff --git a/helm/supersonic/cfg/envoy-filter.lua b/helm/supersonic/cfg/envoy-filter.lua
index d3c349c..0af267c 100644
--- a/helm/supersonic/cfg/envoy-filter.lua
+++ b/helm/supersonic/cfg/envoy-filter.lua
@@ -2,6 +2,10 @@ function envoy_on_request(request_handle)
     local path = request_handle:headers():get(":path")
     local contentType = request_handle:headers():get("content-type")
 
+    -- Any other request except model index
+    request_handle:streamInfo():dynamicMetadata():set("envoy.lua", "accept_request", true)
+
+    -- Model index requested?
     if path == "/inference.GRPCInferenceService/RepositoryIndex" and contentType == "application/grpc" then
         request_handle:streamInfo():dynamicMetadata():set("envoy.lua", "accept_request", false)
 
@@ -51,62 +55,15 @@ function envoy_on_request(request_handle)
             end
         else
             request_handle:logErr("Failed to parse metric value from Prometheus response.")
-            ---- Temporary ---- 
-            request_handle:logErr("Accepting request regardless of metric value.")
-            request_handle:streamInfo():dynamicMetadata():set("envoy.lua", "accept_request", true)
-        end
-    end
-
-    ---- Extract model_name from ModelInferRequest ----
-    if contentType == "application/grpc" then
-        if path == "/inference.GRPCInferenceService/ModelInfer" then
-            -- grab entire request body (you may need to configure the filter to buffer bodies)
-            local body = request_handle:body():getBytes(0, request_handle:body():length())
-            if body and #body > 5 then
-                -- strip the 5-byte gRPC header (1-byte flag + 4-byte msg-len)
-                local msg = body:sub(6)
-
-                -- protobuf wire format for field 1, wire type 2: tag = 0x0A
-                if msg:byte(1) == 0x0A then
-                    -- next byte is a varint length (assumes <128 bytes)
-                    local name_len = msg:byte(2)
-                    -- extract UTF-8 model name
-                    local model_name = msg:sub(3, 2 + name_len)
-
-                    -- log and propagate via dynamic metadata
-                    request_handle:logInfo("ModelInfer model_name = " .. model_name)
-                    if model_name then
-                        local hostHeader = model_name .. ".cms.svc.cluster.local:8001"
-                        request_handle:logInfo("x-model-host = " .. hostHeader)
-                        request_handle:headers():add("x-model-host", hostHeader)
-                    end
-                    for k, v in pairs(request_handle:headers()) do
-                        request_handle:logInfo("Header " .. k .. ": " .. v)
-                    end
-                else
-                    request_handle:logErr("Unexpected protobuf tag: " .. string.format("0x%02X", msg:byte(1)))
-                end
-            end
-        else
-            --- for non-inference calls, for now just forward to default service
-            request_handle:headers():add("x-model-host", "supersonic-test-triton.cms.svc.cluster.local:8001")
         end
     end
 end
 
 function envoy_on_response(response_handle)
-    local md = response_handle:streamInfo():dynamicMetadata():get("envoy.lua")
-
-    if not md or md.accept_request == nil then
-      return
-    end
-
+    -- Send error back if request was not accepted
     if not response_handle:streamInfo():dynamicMetadata():get("envoy.lua")["accept_request"] then
         response_handle:logInfo("Sending error as a response.")
-        local body = response_handle:body()
-        if body then
-          body:setBytes("")
-        end
+        response_handle:body():setBytes("")
         response_handle:headers():replace("grpc-status", "1")
     end
 end

From f76a0db615ddc7a84fc45f900022282d646d000e Mon Sep 17 00:00:00 2001
From: kondratyevd <kondratyev.d.95@gmail.com>
Date: Fri, 30 May 2025 13:07:52 -0400
Subject: [PATCH 06/20] values parameter to enable dynamic routing in envoy

---
 helm/supersonic/templates/envoy/configmaps.yaml | 10 ++++++++++
 helm/supersonic/values.yaml                     |  4 ++++
 2 files changed, 14 insertions(+)

diff --git a/helm/supersonic/templates/envoy/configmaps.yaml b/helm/supersonic/templates/envoy/configmaps.yaml
index 59770c7..6513d26 100644
--- a/helm/supersonic/templates/envoy/configmaps.yaml
+++ b/helm/supersonic/templates/envoy/configmaps.yaml
@@ -57,6 +57,7 @@ static_resources:
                       routes:
                         - match:
                             prefix: "/"
+                          {{- if .envoy.dynamic_routing.enabled }}
                           typed_per_filter_config:
                             envoy.filters.http.dynamic_forward_proxy:
                               "@type": type.googleapis.com/envoy.extensions.filters.http.dynamic_forward_proxy.v3.PerRouteConfig
@@ -64,6 +65,11 @@ static_resources:
                           route:
                             cluster: dynamic_forward_proxy_cluster
                             timeout: {{ .envoy.grpc_route_timeout }}
+                          {{- else }}
+                          route:
+                            cluster: triton_grpc_service
+                            timeout: {{ .envoy.grpc_route_timeout }}
+                          {{- end }}
 
                 http_filters:
                   {{- with .envoy.rate_limiter.prometheus_based }}
@@ -99,6 +105,7 @@ static_resources:
                           provider_name: provider_icecube
                   {{- end }}
                   {{- end }}
+                  {{- if .envoy.dynamic_routing.enabled }}
                   - name: envoy.filters.http.dynamic_forward_proxy
                     typed_config:
                       "@type": type.googleapis.com/envoy.extensions.filters.http.dynamic_forward_proxy.v3.FilterConfig
@@ -107,6 +114,7 @@ static_resources:
                         dns_lookup_family: ALL
                         dns_cache_circuit_breaker:
                           max_pending_requests: 1024
+                  {{- end }}
                   - name: envoy.filters.http.router
                     typed_config:
                       "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
@@ -189,6 +197,7 @@ static_resources:
                     socket_address:
                       address: {{ .tritonName }}
                       port_value: {{ .tritonGrpcPort }}
+    {{- if .envoy.dynamic_routing.enabled }}
     - name: dynamic_forward_proxy_cluster
       connect_timeout: 2s
       lb_policy: CLUSTER_PROVIDED
@@ -203,6 +212,7 @@ static_resources:
             dns_lookup_family: ALL
             dns_cache_circuit_breaker:
               max_pending_requests: 1024
+    {{- end }}
 {{- end }}
 {{- end }}
 
diff --git a/helm/supersonic/values.yaml b/helm/supersonic/values.yaml
index c261f19..eb85b96 100644
--- a/helm/supersonic/values.yaml
+++ b/helm/supersonic/values.yaml
@@ -162,6 +162,10 @@ envoy:
   # Options: ROUND_ROBIN, LEAST_REQUEST, RING_HASH, RANDOM, MAGLEV
   loadBalancerPolicy: "LEAST_REQUEST"
 
+  # -- Enable dynamic routing in Envoy proxy.
+  dynamic_routing:
+    enabled: false
+
   auth:
     # -- Enable authentication in Envoy proxy
     enabled: false

From 6523046368e2f0ab2f477018b33c3adae0e6fe20 Mon Sep 17 00:00:00 2001
From: GitHub Actions <actions@github.com>
Date: Fri, 30 May 2025 17:08:11 +0000
Subject: [PATCH 07/20] Update JSON schema

---
 helm/supersonic/values.schema.json | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/helm/supersonic/values.schema.json b/helm/supersonic/values.schema.json
index 4ad91bd..5dbf5b9 100644
--- a/helm/supersonic/values.schema.json
+++ b/helm/supersonic/values.schema.json
@@ -389,6 +389,17 @@
         "loadBalancerPolicy": {
           "type": "string"
         },
+        "dynamic_routing": {
+          "type": "object",
+          "properties": {
+            "enabled": {
+              "type": "boolean"
+            }
+          },
+          "required": [
+            "enabled"
+          ]
+        },
         "auth": {
           "type": "object",
           "properties": {
@@ -424,6 +435,7 @@
       "required": [
         "args",
         "auth",
+        "dynamic_routing",
         "enabled",
         "grpc_route_timeout",
         "image",

From e6ab63533cc637b088d1fdd635c709e143c51a02 Mon Sep 17 00:00:00 2001
From: GitHub Actions <actions@github.com>
Date: Fri, 30 May 2025 17:08:26 +0000
Subject: [PATCH 08/20] Update helm docs

---
 docs/.values-table.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/.values-table.md b/docs/.values-table.md
index 0847bf8..923a4d6 100644
--- a/docs/.values-table.md
+++ b/docs/.values-table.md
@@ -37,6 +37,7 @@
 | envoy.rate_limiter.prometheus_based | object | `{"enabled":false,"luaConfig":"cfg/envoy-filter.lua"}` | This rate limiter rejects new connections based on metric extracted from Prometheus (e.g. inference queue latency). The metric is taken from parameter ``prometheus.serverLoadMetric``, and the threshold is set by ``prometheus.serverLoadThreshold``. These parameters are the same as those used by the KEDA autoscaler. |
 | envoy.rate_limiter.prometheus_based.enabled | bool | `false` | Enable rate limiter |
 | envoy.loadBalancerPolicy | string | `"LEAST_REQUEST"` | Envoy load balancer policy. Options: ROUND_ROBIN, LEAST_REQUEST, RING_HASH, RANDOM, MAGLEV |
+| envoy.dynamic_routing | object | `{"enabled":false}` | Enable dynamic routing in Envoy proxy. |
 | envoy.auth.enabled | bool | `false` | Enable authentication in Envoy proxy |
 | envoy.auth.jwt_issuer | string | `""` |  |
 | envoy.auth.jwt_remote_jwks_uri | string | `""` |  |

From ac861258e9d5fac604b8b7da7a30e06b3fbf41fb Mon Sep 17 00:00:00 2001
From: kondratyevd <kondratyev.d.95@gmail.com>
Date: Fri, 30 May 2025 13:08:58 -0400
Subject: [PATCH 09/20] remove extra service

---
 helm/supersonic/templates/triton/service.yaml | 31 -------------------
 1 file changed, 31 deletions(-)

diff --git a/helm/supersonic/templates/triton/service.yaml b/helm/supersonic/templates/triton/service.yaml
index 9f590df..57a4879 100644
--- a/helm/supersonic/templates/triton/service.yaml
+++ b/helm/supersonic/templates/triton/service.yaml
@@ -13,37 +13,6 @@ metadata:
     {{- if .Values.triton.service.annotations }}
 {{ toYaml .Values.triton.service.annotations | nindent 4 }}
     {{- end }}
-spec:
-  clusterIP: None
-  ports:
-  {{- range .Values.triton.service.ports }}
-  - name: {{ .name }}
-    port: {{ .port }}
-    targetPort: {{ .targetPort }}
-    protocol: {{ .protocol }}
-  {{- end }}
-  selector:
-    app.kubernetes.io/name: {{ .Chart.Name }}
-    app.kubernetes.io/instance: {{ include "supersonic.name" . }}
-    app.kubernetes.io/component: triton
-
----
-
-apiVersion: v1
-kind: Service
-metadata:
-  name: deepmet
-  labels:
-    app.kubernetes.io/name: {{ .Chart.Name }}
-    app.kubernetes.io/instance: {{ include "supersonic.name" . }}
-    app.kubernetes.io/component: triton
-    {{- if .Values.triton.service.labels }}
-{{ toYaml .Values.triton.service.labels | nindent 4 }}
-    {{- end }}
-  annotations:
-    {{- if .Values.triton.service.annotations }}
-{{ toYaml .Values.triton.service.annotations | nindent 4 }}
-    {{- end }}
 spec:
   clusterIP: None
   ports:

From db5e9135b88b2624bfaa4e15d11022d8567ef5c9 Mon Sep 17 00:00:00 2001
From: kondratyevd <kondratyev.d.95@gmail.com>
Date: Fri, 30 May 2025 13:13:52 -0400
Subject: [PATCH 10/20] make path to lua script more configurable

---
 .../templates/envoy/configmaps.yaml           |  6 ++--
 helm/supersonic/values.yaml                   |  5 +++-
 values/values-geddes-cms.yaml                 | 29 ++++++++++++++-----
 3 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/helm/supersonic/templates/envoy/configmaps.yaml b/helm/supersonic/templates/envoy/configmaps.yaml
index 6513d26..f02d14d 100644
--- a/helm/supersonic/templates/envoy/configmaps.yaml
+++ b/helm/supersonic/templates/envoy/configmaps.yaml
@@ -72,7 +72,7 @@ static_resources:
                           {{- end }}
 
                 http_filters:
-                  {{- with .envoy.rate_limiter.prometheus_based }}
+                  {{- with .envoy.lua_filter }}
                   {{- if .enabled }}
                   - name: envoy.filters.http.lua
                     typed_config:
@@ -263,7 +263,7 @@ data:
 {{ include "envoy.configuration.yaml" $envoyContext | indent 4 }}
 ---
 
-{{- if .Values.envoy.rate_limiter.prometheus_based.enabled }}
+{{- if .Values.envoy.lua_filter.enabled }}
 {{- /* Create a ConfigMap for the Lua filter */}}
 apiVersion: v1
 kind: ConfigMap
@@ -276,7 +276,7 @@ metadata:
 data:
   envoy-filter.lua: |-
     {{- /* Read and process the Lua configuration file */}}
-    {{- $luaConfig := $.Files.Get .Values.envoy.rate_limiter.prometheus_based.luaConfig | nindent 4 }}
+    {{- $luaConfig := $.Files.Get .Values.envoy.lua_filter.lua_config | nindent 4 }}
     {{- $luaConfig = $luaConfig | replace "SERVER_LOAD_METRIC" (include "supersonic.defaultMetric" . | quote) }}
     {{- $luaConfig = $luaConfig | replace "SERVER_LOAD_THRESHOLD" (quote .Values.serverLoadThreshold) }}
     {{- $luaConfig = $luaConfig | replace "PROMETHEUS_SCHEME" (include "supersonic.prometheusScheme" .) }}
diff --git a/helm/supersonic/values.yaml b/helm/supersonic/values.yaml
index eb85b96..042a9f1 100644
--- a/helm/supersonic/values.yaml
+++ b/helm/supersonic/values.yaml
@@ -156,12 +156,15 @@ envoy:
     prometheus_based:
       # -- Enable rate limiter
       enabled: false
-      luaConfig: "cfg/envoy-filter.lua" 
 
   # -- Envoy load balancer policy.
   # Options: ROUND_ROBIN, LEAST_REQUEST, RING_HASH, RANDOM, MAGLEV
   loadBalancerPolicy: "LEAST_REQUEST"
 
+  lua_filter:
+    enabled: false
+    lua_config: "cfg/envoy-filter.lua"
+
   # -- Enable dynamic routing in Envoy proxy.
   dynamic_routing:
     enabled: false
diff --git a/values/values-geddes-cms.yaml b/values/values-geddes-cms.yaml
index 453ae6e..4e87ac0 100644
--- a/values/values-geddes-cms.yaml
+++ b/values/values-geddes-cms.yaml
@@ -11,10 +11,12 @@ triton:
       --model-repository=/cvmfs/cms.cern.ch/el9_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre7/external/el9_amd64_gcc12/data/RecoEgamma/EgammaPhotonProducers/data/models/ \
       --model-repository=/cvmfs/cms.cern.ch/el9_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre7/external/el9_amd64_gcc12/data/RecoTauTag/TrainingFiles/data/DeepTauIdSONIC/ \
       --model-repository=/cvmfs/cms.cern.ch/el9_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre7/external/el9_amd64_gcc12/data/RecoMET/METPUSubtraction/data/models/ \
+      --model-control-mode=explicit \
       --allow-gpu-metrics=true \
       --log-verbose=0 \
       --strict-model-config=false \
       --exit-timeout-secs=60
+
   resources:
     limits: { nvidia.com/gpu: 1, cpu: 2, memory: 4G}
     requests: { nvidia.com/gpu: 1, cpu: 2, memory: 4G}
@@ -37,6 +39,14 @@ envoy:
     enabled: true
     hostName: sonic-cms.geddes.rcac.purdue.edu
     ingressClassName: public
+  rate_limiter:
+    prometheus_based:
+      enabled: false
+  dynamic_routing:
+    enabled: true
+  lua_filter:
+    enabled: true
+    lua_config: "cfg/envoy-filter-dynamic.lua"
 
 autoscaler:
   enabled: true
@@ -55,15 +65,20 @@ tolerations:
     effect: NoSchedule
 
 prometheus:
-  enabled: true
-  server:
-    ingress:
-      enabled: true
-      hostName: prometheus-cms.geddes.rcac.purdue.edu
-      ingressClassName: public
+  # enabled: false
+  external:
+    enabled: true
+    url: "prometheus-cms.geddes.rcac.purdue.edu"
+    port: 443
+    scheme: https
+  # server:
+  #   ingress:
+  #     enabled: true
+  #     hostName: prometheus-cms.geddes.rcac.purdue.edu
+  #     ingressClassName: public
 
 grafana:
-  enabled: true
+  enabled: false
   ingress:
     enabled: true
     hostName: grafana-cms.geddes.rcac.purdue.edu

From 205077be87ef598e9f2e3e16105738721303f2ee Mon Sep 17 00:00:00 2001
From: GitHub Actions <actions@github.com>
Date: Fri, 30 May 2025 17:14:09 +0000
Subject: [PATCH 11/20] Update JSON schema

---
 helm/supersonic/values.schema.json | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/helm/supersonic/values.schema.json b/helm/supersonic/values.schema.json
index 5dbf5b9..fb3d1c7 100644
--- a/helm/supersonic/values.schema.json
+++ b/helm/supersonic/values.schema.json
@@ -370,14 +370,10 @@
               "properties": {
                 "enabled": {
                   "type": "boolean"
-                },
-                "luaConfig": {
-                  "type": "string"
                 }
               },
               "required": [
-                "enabled",
-                "luaConfig"
+                "enabled"
               ]
             }
           },
@@ -389,6 +385,21 @@
         "loadBalancerPolicy": {
           "type": "string"
         },
+        "lua_filter": {
+          "type": "object",
+          "properties": {
+            "enabled": {
+              "type": "boolean"
+            },
+            "lua_config": {
+              "type": "string"
+            }
+          },
+          "required": [
+            "enabled",
+            "lua_config"
+          ]
+        },
         "dynamic_routing": {
           "type": "object",
           "properties": {
@@ -441,6 +452,7 @@
         "image",
         "ingress",
         "loadBalancerPolicy",
+        "lua_filter",
         "rate_limiter",
         "replicas",
         "resources",

From aa6129f5c746976dd3a4c413d38bd0da7ed0677a Mon Sep 17 00:00:00 2001
From: GitHub Actions <actions@github.com>
Date: Fri, 30 May 2025 17:14:29 +0000
Subject: [PATCH 12/20] Update helm docs

---
 docs/.values-table.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/.values-table.md b/docs/.values-table.md
index 923a4d6..1116c8c 100644
--- a/docs/.values-table.md
+++ b/docs/.values-table.md
@@ -34,9 +34,11 @@
 | envoy.rate_limiter.listener_level.max_tokens | int | `5` | Maximum number of simultaneous connections to the Envoy Proxy. Each new connection takes a "token" from the "bucket" which initially contains ``max_tokens`` tokens. |
 | envoy.rate_limiter.listener_level.tokens_per_fill | int | `1` | ``tokens_per_fill`` tokens are added to the "bucket" every ``fill_interval``, allowing new connections to be established. |
 | envoy.rate_limiter.listener_level.fill_interval | string | `"12s"` | For example, adding a new token every 12 seconds allows 5 new connections every minute. |
-| envoy.rate_limiter.prometheus_based | object | `{"enabled":false,"luaConfig":"cfg/envoy-filter.lua"}` | This rate limiter rejects new connections based on metric extracted from Prometheus (e.g. inference queue latency). The metric is taken from parameter ``prometheus.serverLoadMetric``, and the threshold is set by ``prometheus.serverLoadThreshold``. These parameters are the same as those used by the KEDA autoscaler. |
+| envoy.rate_limiter.prometheus_based | object | `{"enabled":false}` | This rate limiter rejects new connections based on metric extracted from Prometheus (e.g. inference queue latency). The metric is taken from parameter ``prometheus.serverLoadMetric``, and the threshold is set by ``prometheus.serverLoadThreshold``. These parameters are the same as those used by the KEDA autoscaler. |
 | envoy.rate_limiter.prometheus_based.enabled | bool | `false` | Enable rate limiter |
 | envoy.loadBalancerPolicy | string | `"LEAST_REQUEST"` | Envoy load balancer policy. Options: ROUND_ROBIN, LEAST_REQUEST, RING_HASH, RANDOM, MAGLEV |
+| envoy.lua_filter.enabled | bool | `false` |  |
+| envoy.lua_filter.lua_config | string | `"cfg/envoy-filter.lua"` |  |
 | envoy.dynamic_routing | object | `{"enabled":false}` | Enable dynamic routing in Envoy proxy. |
 | envoy.auth.enabled | bool | `false` | Enable authentication in Envoy proxy |
 | envoy.auth.jwt_issuer | string | `""` |  |

From 03fda7888d9306f2859d155c142d0a3da35ad14a Mon Sep 17 00:00:00 2001
From: kondratyevd <kondratyev.d.95@gmail.com>
Date: Fri, 30 May 2025 13:25:44 -0400
Subject: [PATCH 13/20] rename header

---
 helm/supersonic/cfg/envoy-filter-dynamic.lua    | 6 +++---
 helm/supersonic/templates/envoy/configmaps.yaml | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/helm/supersonic/cfg/envoy-filter-dynamic.lua b/helm/supersonic/cfg/envoy-filter-dynamic.lua
index d3c349c..1a04a76 100644
--- a/helm/supersonic/cfg/envoy-filter-dynamic.lua
+++ b/helm/supersonic/cfg/envoy-filter-dynamic.lua
@@ -77,8 +77,8 @@ function envoy_on_request(request_handle)
                     request_handle:logInfo("ModelInfer model_name = " .. model_name)
                     if model_name then
                         local hostHeader = model_name .. ".cms.svc.cluster.local:8001"
-                        request_handle:logInfo("x-model-host = " .. hostHeader)
-                        request_handle:headers():add("x-model-host", hostHeader)
+                        request_handle:logInfo("route-to = " .. hostHeader)
+                        request_handle:headers():add("route-to", hostHeader)
                     end
                     for k, v in pairs(request_handle:headers()) do
                         request_handle:logInfo("Header " .. k .. ": " .. v)
@@ -89,7 +89,7 @@ function envoy_on_request(request_handle)
             end
         else
             --- for non-inference calls, for now just forward to default service
-            request_handle:headers():add("x-model-host", "supersonic-test-triton.cms.svc.cluster.local:8001")
+            request_handle:headers():add("route-to", "supersonic-test-triton.cms.svc.cluster.local:8001")
         end
     end
 end
diff --git a/helm/supersonic/templates/envoy/configmaps.yaml b/helm/supersonic/templates/envoy/configmaps.yaml
index f02d14d..fa97697 100644
--- a/helm/supersonic/templates/envoy/configmaps.yaml
+++ b/helm/supersonic/templates/envoy/configmaps.yaml
@@ -61,7 +61,7 @@ static_resources:
                           typed_per_filter_config:
                             envoy.filters.http.dynamic_forward_proxy:
                               "@type": type.googleapis.com/envoy.extensions.filters.http.dynamic_forward_proxy.v3.PerRouteConfig
-                              host_rewrite_header: "x-model-host"
+                              host_rewrite_header: "route-to"
                           route:
                             cluster: dynamic_forward_proxy_cluster
                             timeout: {{ .envoy.grpc_route_timeout }}

From 8cbb8081afb09f42b62b9f1e13fdd6cdec4e24c5 Mon Sep 17 00:00:00 2001
From: kondratyevd <kondratyev.d.95@gmail.com>
Date: Fri, 30 May 2025 13:30:16 -0400
Subject: [PATCH 14/20] clean up dynamic lua filter

---
 helm/supersonic/cfg/envoy-filter-dynamic.lua  | 82 +------------------
 .../templates/envoy/configmaps.yaml           |  2 +
 2 files changed, 4 insertions(+), 80 deletions(-)

diff --git a/helm/supersonic/cfg/envoy-filter-dynamic.lua b/helm/supersonic/cfg/envoy-filter-dynamic.lua
index 1a04a76..9dd117d 100644
--- a/helm/supersonic/cfg/envoy-filter-dynamic.lua
+++ b/helm/supersonic/cfg/envoy-filter-dynamic.lua
@@ -2,61 +2,6 @@ function envoy_on_request(request_handle)
     local path = request_handle:headers():get(":path")
     local contentType = request_handle:headers():get("content-type")
 
-    if path == "/inference.GRPCInferenceService/RepositoryIndex" and contentType == "application/grpc" then
-        request_handle:streamInfo():dynamicMetadata():set("envoy.lua", "accept_request", false)
-
-        local query = SERVER_LOAD_METRIC
-        local metric_threshold = tonumber(SERVER_LOAD_THRESHOLD)
-        local query_response_template = '"value":%[%d+%.%d+,"([%d%.]+)"%]'
-        local encoded_query = encode_query(query)
-
-        request_handle:logInfo("Prometheus scheme: " .. "PROMETHEUS_SCHEME")
-        request_handle:logInfo("Prometheus host: " .. "PROMETHEUS_HOST")
-        request_handle:logInfo("Prometheus port: " .. "PROMETHEUS_PORT")
-        request_handle:logInfo("Query: " .. query)
-        request_handle:logInfo("Encoded query: " .. encoded_query)
-
-        local headers, body = request_handle:httpCall(
-            "prometheus_cluster",
-            {
-                [":method"] = "GET",
-                [":path"] = "/api/v1/query?query=" .. encoded_query,
-                [":scheme"] = "PROMETHEUS_SCHEME",
-                [":authority"] = "PROMETHEUS_HOST" .. ":" .. "PROMETHEUS_PORT"
-            },
-            "",
-            5000
-        )
-        if not headers then
-            request_handle:logErr("HTTP call to Prometheus failed.")
-            return
-        end
-
-        if not body or body == "" then
-            request_handle:logErr("Prometheus could not be reached or returned no data.")
-            return
-        end
-
-        request_handle:logInfo("Query response body: " .. body)
-        local metric_value_str = string.match(body, query_response_template)
-        request_handle:logInfo("Extracted metric: " .. metric_value_str)
-
-        if metric_value_str then
-            local metric_value = tonumber(metric_value_str)
-            if metric_value > metric_threshold then
-                request_handle:logInfo("Metric value exceeds threshold: " .. metric_value .. " > " .. metric_threshold)
-            else
-                request_handle:logInfo("Metric value below threshold: " .. metric_value .. " < " .. metric_threshold)
-                request_handle:streamInfo():dynamicMetadata():set("envoy.lua", "accept_request", true)
-            end
-        else
-            request_handle:logErr("Failed to parse metric value from Prometheus response.")
-            ---- Temporary ---- 
-            request_handle:logErr("Accepting request regardless of metric value.")
-            request_handle:streamInfo():dynamicMetadata():set("envoy.lua", "accept_request", true)
-        end
-    end
-
     ---- Extract model_name from ModelInferRequest ----
     if contentType == "application/grpc" then
         if path == "/inference.GRPCInferenceService/ModelInfer" then
@@ -76,7 +21,7 @@ function envoy_on_request(request_handle)
                     -- log and propagate via dynamic metadata
                     request_handle:logInfo("ModelInfer model_name = " .. model_name)
                     if model_name then
-                        local hostHeader = model_name .. ".cms.svc.cluster.local:8001"
+                        local hostHeader = model_name .. ".NAMESPACE.svc.cluster.local:8001"
                         request_handle:logInfo("route-to = " .. hostHeader)
                         request_handle:headers():add("route-to", hostHeader)
                     end
@@ -89,30 +34,7 @@ function envoy_on_request(request_handle)
             end
         else
             --- for non-inference calls, for now just forward to default service
-            request_handle:headers():add("route-to", "supersonic-test-triton.cms.svc.cluster.local:8001")
-        end
-    end
-end
-
-function envoy_on_response(response_handle)
-    local md = response_handle:streamInfo():dynamicMetadata():get("envoy.lua")
-
-    if not md or md.accept_request == nil then
-      return
-    end
-
-    if not response_handle:streamInfo():dynamicMetadata():get("envoy.lua")["accept_request"] then
-        response_handle:logInfo("Sending error as a response.")
-        local body = response_handle:body()
-        if body then
-          body:setBytes("")
+            request_handle:headers():add("route-to", "RELEASE-triton.NAMESPACE.svc.cluster.local:8001")
         end
-        response_handle:headers():replace("grpc-status", "1")
     end
-end
-
-function encode_query(query)
-    return query:gsub("([^%w _%%%-%.~])", function(c)
-        return string.format("%%%02X", string.byte(c))
-    end):gsub(" ", "+")
 end
\ No newline at end of file
diff --git a/helm/supersonic/templates/envoy/configmaps.yaml b/helm/supersonic/templates/envoy/configmaps.yaml
index fa97697..dd2f187 100644
--- a/helm/supersonic/templates/envoy/configmaps.yaml
+++ b/helm/supersonic/templates/envoy/configmaps.yaml
@@ -282,6 +282,8 @@ data:
     {{- $luaConfig = $luaConfig | replace "PROMETHEUS_SCHEME" (include "supersonic.prometheusScheme" .) }}
     {{- $luaConfig = $luaConfig | replace "PROMETHEUS_HOST" (include "supersonic.prometheusHost" .) }}
     {{- $luaConfig = $luaConfig | replace "PROMETHEUS_PORT" (include "supersonic.prometheusPort" .) }}
+    {{- $luaConfig = $luaConfig | replace "RELEASE" .Release.Name }}
+    {{- $luaConfig = $luaConfig | replace "NAMESPACE" .Release.Namespace }}
     {{ $luaConfig | indent 4 }}
 
 ---

From 62a4fea4b6cacbcb23ccd04bf4241a0bc8cce558 Mon Sep 17 00:00:00 2001
From: kondratyevd <kondratyev.d.95@gmail.com>
Date: Fri, 30 May 2025 13:37:07 -0400
Subject: [PATCH 15/20] add some comments

---
 helm/supersonic/cfg/envoy-filter-dynamic.lua | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/helm/supersonic/cfg/envoy-filter-dynamic.lua b/helm/supersonic/cfg/envoy-filter-dynamic.lua
index 9dd117d..0cef8bb 100644
--- a/helm/supersonic/cfg/envoy-filter-dynamic.lua
+++ b/helm/supersonic/cfg/envoy-filter-dynamic.lua
@@ -12,6 +12,9 @@ function envoy_on_request(request_handle)
                 local msg = body:sub(6)
 
                 -- protobuf wire format for field 1, wire type 2: tag = 0x0A
+                -- field 1 is the model name - we know it from here:
+                -- https://github.com/kserve/open-inference-protocol/blob/main/specification/protocol/inference_grpc.md#inference
+                -- wire type 2 means that the field is length-delimited
                 if msg:byte(1) == 0x0A then
                     -- next byte is a varint length (assumes <128 bytes)
                     local name_len = msg:byte(2)
@@ -23,11 +26,12 @@ function envoy_on_request(request_handle)
                     if model_name then
                         local hostHeader = model_name .. ".NAMESPACE.svc.cluster.local:8001"
                         request_handle:logInfo("route-to = " .. hostHeader)
+                        -- add header
                         request_handle:headers():add("route-to", hostHeader)
                     end
-                    for k, v in pairs(request_handle:headers()) do
-                        request_handle:logInfo("Header " .. k .. ": " .. v)
-                    end
+                    -- for k, v in pairs(request_handle:headers()) do
+                    --     request_handle:logInfo("Header " .. k .. ": " .. v)
+                    -- end
                 else
                     request_handle:logErr("Unexpected protobuf tag: " .. string.format("0x%02X", msg:byte(1)))
                 end

From 29d22031665faf846acbf6e100d9e9a0acf213b6 Mon Sep 17 00:00:00 2001
From: kondratyevd <kondratyev.d.95@gmail.com>
Date: Fri, 30 May 2025 13:40:45 -0400
Subject: [PATCH 16/20] correctly mount lua config

---
 helm/supersonic/templates/envoy/deployment.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/helm/supersonic/templates/envoy/deployment.yaml b/helm/supersonic/templates/envoy/deployment.yaml
index 7b2666c..5a48944 100644
--- a/helm/supersonic/templates/envoy/deployment.yaml
+++ b/helm/supersonic/templates/envoy/deployment.yaml
@@ -47,7 +47,7 @@ spec:
         volumeMounts:
         - name: {{ include "supersonic.name" . }}-envoy-config
           mountPath: /etc/envoy
-        {{- if .Values.envoy.rate_limiter.prometheus_based.enabled }}
+        {{- if .Values.envoy.lua_filter.enabled }}
         - name: {{ include "supersonic.name" . }}-lua-volume
           mountPath: /etc/envoy/lua
           readOnly: true
@@ -58,7 +58,7 @@ spec:
       - name: {{ include "supersonic.name" . }}-envoy-config
         configMap:
           name: {{ include "supersonic.name" . }}-envoy-config
-      {{- if .Values.envoy.rate_limiter.prometheus_based.enabled }}
+      {{- if .Values.envoy.lua_filter.enabled }}
       - name: {{ include "supersonic.name" . }}-lua-volume
         configMap:
           name: {{ include "supersonic.name" . }}-lua-config

From 987549b99ba438bc86d7e5541e37f7dccface720 Mon Sep 17 00:00:00 2001
From: kondratyevd <kondratyev.d.95@gmail.com>
Date: Fri, 30 May 2025 13:44:50 -0400
Subject: [PATCH 17/20] change log level for testing

---
 helm/supersonic/cfg/envoy-filter-dynamic.lua | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/helm/supersonic/cfg/envoy-filter-dynamic.lua b/helm/supersonic/cfg/envoy-filter-dynamic.lua
index 0cef8bb..ee7f28d 100644
--- a/helm/supersonic/cfg/envoy-filter-dynamic.lua
+++ b/helm/supersonic/cfg/envoy-filter-dynamic.lua
@@ -22,10 +22,10 @@ function envoy_on_request(request_handle)
                     local model_name = msg:sub(3, 2 + name_len)
 
                     -- log and propagate via dynamic metadata
-                    request_handle:logInfo("ModelInfer model_name = " .. model_name)
+                    request_handle:logWarn("ModelInfer model_name = " .. model_name)
                     if model_name then
                         local hostHeader = model_name .. ".NAMESPACE.svc.cluster.local:8001"
-                        request_handle:logInfo("route-to = " .. hostHeader)
+                        request_handle:logWarn("route-to = " .. hostHeader)
                         -- add header
                         request_handle:headers():add("route-to", hostHeader)
                     end

From a946f300e38770765c866f8cb97c0d36bb67d805 Mon Sep 17 00:00:00 2001
From: kondratyevd <kondratyev.d.95@gmail.com>
Date: Fri, 30 May 2025 13:53:07 -0400
Subject: [PATCH 18/20] undo

---
 helm/supersonic/cfg/envoy-filter-dynamic.lua | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/helm/supersonic/cfg/envoy-filter-dynamic.lua b/helm/supersonic/cfg/envoy-filter-dynamic.lua
index ee7f28d..0cef8bb 100644
--- a/helm/supersonic/cfg/envoy-filter-dynamic.lua
+++ b/helm/supersonic/cfg/envoy-filter-dynamic.lua
@@ -22,10 +22,10 @@ function envoy_on_request(request_handle)
                     local model_name = msg:sub(3, 2 + name_len)
 
                     -- log and propagate via dynamic metadata
-                    request_handle:logWarn("ModelInfer model_name = " .. model_name)
+                    request_handle:logInfo("ModelInfer model_name = " .. model_name)
                     if model_name then
                         local hostHeader = model_name .. ".NAMESPACE.svc.cluster.local:8001"
-                        request_handle:logWarn("route-to = " .. hostHeader)
+                        request_handle:logInfo("route-to = " .. hostHeader)
                         -- add header
                         request_handle:headers():add("route-to", hostHeader)
                     end

From 45108812392a2042498cd8ac2d936d3c4d5d5081 Mon Sep 17 00:00:00 2001
From: kondratyevd <kondratyev.d.95@gmail.com>
Date: Fri, 30 May 2025 14:26:33 -0400
Subject: [PATCH 19/20] extract model version from gRPC body

---
 helm/supersonic/cfg/envoy-filter-dynamic.lua | 23 +++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/helm/supersonic/cfg/envoy-filter-dynamic.lua b/helm/supersonic/cfg/envoy-filter-dynamic.lua
index 0cef8bb..2171322 100644
--- a/helm/supersonic/cfg/envoy-filter-dynamic.lua
+++ b/helm/supersonic/cfg/envoy-filter-dynamic.lua
@@ -13,21 +13,34 @@ function envoy_on_request(request_handle)
 
                 -- protobuf wire format for field 1, wire type 2: tag = 0x0A
                 -- field 1 is the model name - we know it from here:
-                -- https://github.com/kserve/open-inference-protocol/blob/main/specification/protocol/inference_grpc.md#inference
                 -- wire type 2 means that the field is length-delimited
                 if msg:byte(1) == 0x0A then
                     -- next byte is a varint length (assumes <128 bytes)
                     local name_len = msg:byte(2)
                     -- extract UTF-8 model name
                     local model_name = msg:sub(3, 2 + name_len)
+                    local offset = 3 + name_len
+
+                    -- Extract model version (field 2, wire type 2, tag 0x12)
+                    local model_version = ""
+                    if msg:byte(offset) == 0x12 then
+                        local ver_len = msg:byte(offset + 1)
+                        model_version = msg:sub(offset + 2, offset + 1 + ver_len)
+                        -- request_handle:logInfo("ModelInfer model_version = " .. model_version)
+                        offset = offset + 2 + ver_len
+                    else
+                        request_handle:logWarn(string.format("No model_version field (expected tag 0x12 at offset %d, got 0x%02X)", 
+                            offset, msg:byte(offset)))
+                    end
 
                     -- log and propagate via dynamic metadata
-                    request_handle:logInfo("ModelInfer model_name = " .. model_name)
+                    -- request_handle:logInfo("ModelInfer model_name = " .. model_name)
                     if model_name then
-                        local hostHeader = model_name .. ".NAMESPACE.svc.cluster.local:8001"
-                        request_handle:logInfo("route-to = " .. hostHeader)
+                        local svc_name = "RELEASE-" .. model_name .. "-v" .. model_version
+                        local header_value = svc_name .. ".NAMESPACE.svc.cluster.local:8001"
+                        -- request_handle:logInfo("route-to = " .. header_value)
                         -- add header
-                        request_handle:headers():add("route-to", hostHeader)
+                        request_handle:headers():add("route-to", header_value)
                     end
                     -- for k, v in pairs(request_handle:headers()) do
                     --     request_handle:logInfo("Header " .. k .. ": " .. v)

From 3261947b2858ffa4f025860abc56591ac37f7ccd Mon Sep 17 00:00:00 2001
From: kondratyevd <kondratyev.d.95@gmail.com>
Date: Fri, 30 May 2025 15:04:16 -0400
Subject: [PATCH 20/20] imrprove lua script

---
 helm/supersonic/cfg/envoy-filter-dynamic.lua | 93 +++++++++++---------
 1 file changed, 50 insertions(+), 43 deletions(-)

diff --git a/helm/supersonic/cfg/envoy-filter-dynamic.lua b/helm/supersonic/cfg/envoy-filter-dynamic.lua
index 2171322..0740052 100644
--- a/helm/supersonic/cfg/envoy-filter-dynamic.lua
+++ b/helm/supersonic/cfg/envoy-filter-dynamic.lua
@@ -2,56 +2,63 @@ function envoy_on_request(request_handle)
     local path = request_handle:headers():get(":path")
     local contentType = request_handle:headers():get("content-type")
 
+
     ---- Extract model_name from ModelInferRequest ----
     if contentType == "application/grpc" then
+        -- request_handle:logInfo("path = " .. path)
         if path == "/inference.GRPCInferenceService/ModelInfer" then
-            -- grab entire request body (you may need to configure the filter to buffer bodies)
-            local body = request_handle:body():getBytes(0, request_handle:body():length())
-            if body and #body > 5 then
-                -- strip the 5-byte gRPC header (1-byte flag + 4-byte msg-len)
-                local msg = body:sub(6)
-
-                -- protobuf wire format for field 1, wire type 2: tag = 0x0A
-                -- field 1 is the model name - we know it from here:
-                -- wire type 2 means that the field is length-delimited
-                if msg:byte(1) == 0x0A then
-                    -- next byte is a varint length (assumes <128 bytes)
-                    local name_len = msg:byte(2)
-                    -- extract UTF-8 model name
-                    local model_name = msg:sub(3, 2 + name_len)
-                    local offset = 3 + name_len
-
-                    -- Extract model version (field 2, wire type 2, tag 0x12)
-                    local model_version = ""
-                    if msg:byte(offset) == 0x12 then
-                        local ver_len = msg:byte(offset + 1)
-                        model_version = msg:sub(offset + 2, offset + 1 + ver_len)
-                        -- request_handle:logInfo("ModelInfer model_version = " .. model_version)
-                        offset = offset + 2 + ver_len
-                    else
-                        request_handle:logWarn(string.format("No model_version field (expected tag 0x12 at offset %d, got 0x%02X)", 
-                            offset, msg:byte(offset)))
-                    end
-
-                    -- log and propagate via dynamic metadata
-                    -- request_handle:logInfo("ModelInfer model_name = " .. model_name)
-                    if model_name then
-                        local svc_name = "RELEASE-" .. model_name .. "-v" .. model_version
-                        local header_value = svc_name .. ".NAMESPACE.svc.cluster.local:8001"
-                        -- request_handle:logInfo("route-to = " .. header_value)
-                        -- add header
-                        request_handle:headers():add("route-to", header_value)
-                    end
-                    -- for k, v in pairs(request_handle:headers()) do
-                    --     request_handle:logInfo("Header " .. k .. ": " .. v)
-                    -- end
-                else
-                    request_handle:logErr("Unexpected protobuf tag: " .. string.format("0x%02X", msg:byte(1)))
-                end
+
+            local model_name, model_version = extract_model_name_and_version(request_handle, body)
+            -- request_handle:logInfo("ModelInfer model_name = " .. model_name .. " model_version = " .. model_version)
+
+            -- log and propagate via dynamic metadata
+            if model_name and model_version then
+                local svc_name = "RELEASE-" .. model_name .. "-v" .. model_version
+                local header_value = svc_name .. ".NAMESPACE.svc.cluster.local:8001"
+                request_handle:logInfo("route-to = " .. header_value)
+                -- add header
+                request_handle:headers():add("route-to", header_value)
             end
         else
             --- for non-inference calls, for now just forward to default service
             request_handle:headers():add("route-to", "RELEASE-triton.NAMESPACE.svc.cluster.local:8001")
         end
     end
+end
+
+function extract_model_name_and_version(request_handle)
+    local model_name = ""
+    local model_version = ""
+    local body = request_handle:body():getBytes(0, request_handle:body():length())
+
+    if body and #body > 5 then
+        -- strip the 5-byte gRPC header (1-byte flag + 4-byte msg-len)
+        local msg = body:sub(6)
+
+        -- protobuf wire format for field 1, wire type 2: tag = 0x0A
+        -- field 1 is the model name - we know it from here:
+        -- wire type 2 means that the field is length-delimited
+        if msg:byte(1) == 0x0A then
+            -- next byte is a varint length (assumes <128 bytes)
+            local name_len = msg:byte(2)
+            -- extract UTF-8 model name
+            model_name = msg:sub(3, 2 + name_len)
+            -- request_handle:logInfo("ModelInfer model_name = " .. model_name)
+            local offset = 3 + name_len
+
+            -- Extract model version (field 2, wire type 2, tag 0x12)
+            if msg:byte(offset) == 0x12 then
+                local ver_len = msg:byte(offset + 1)
+                model_version = msg:sub(offset + 2, offset + 1 + ver_len)
+                -- request_handle:logInfo("ModelInfer model_version = " .. model_version)
+                offset = offset + 2 + ver_len
+            else
+                request_handle:logWarn(string.format("No model_version field (expected tag 0x12 at offset %d, got 0x%02X)", 
+                    offset, msg:byte(offset)))
+            end
+        else
+            request_handle:logErr("Unexpected protobuf tag: " .. string.format("0x%02X", msg:byte(1)))
+        end
+    end
+    return model_name, model_version
 end
\ No newline at end of file