From 7e363f34ed352ab11f23522136420052a833246f Mon Sep 17 00:00:00 2001 From: kondratyevd Date: Thu, 13 Mar 2025 13:28:32 -0400 Subject: [PATCH 01/20] don't build dependencies when installing from repo --- installer-plugin/installer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/installer-plugin/installer.py b/installer-plugin/installer.py index 606b4b2..bfc8852 100755 --- a/installer-plugin/installer.py +++ b/installer-plugin/installer.py @@ -106,7 +106,8 @@ def main() -> None: repo_commands.append(["helm", "repo", "add", "grafana", "https://grafana.github.io/helm-charts"]) if merged_values.get("opentelemetry-collector", {}).get("enabled", False): repo_commands.append(["helm", "repo", "add", "opentelemetry", "https://open-telemetry.github.io/opentelemetry-helm-charts"]) - repo_commands.append(["helm", "dependency", "build", chart_source]) + if args.local: + repo_commands.append(["helm", "dependency", "build", chart_source]) for cmd in repo_commands: logger.info(f"\nExecuting: {' '.join(cmd)}") From d9a4c4b278755b68c1521743dc83256b4bae3adb Mon Sep 17 00:00:00 2001 From: kondratyevd Date: Thu, 13 Mar 2025 13:32:58 -0400 Subject: [PATCH 02/20] bump release --- helm/supersonic/Chart.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helm/supersonic/Chart.yaml b/helm/supersonic/Chart.yaml index 0a774da..00440c2 100644 --- a/helm/supersonic/Chart.yaml +++ b/helm/supersonic/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: supersonic description: Server infrastructure for inference-as-a-service in large scientific experiments. icon: https://github.com/fastmachinelearning/SuperSONIC/blob/main/docs/img/SuperSONIC_small_512.png?raw=true -version: 0.2.1 +version: 0.2.1 type: application home: https://fastmachinelearning.org/SuperSONIC/ annotations: From 9d892a5a0d8249c6ec40a005e9daae0c84a174d5 Mon Sep 17 00:00:00 2001 From: kondratyevd Date: Fri, 23 May 2025 16:58:10 -0400 Subject: [PATCH 03/20] first steps towards dynamic loading: configure model-specific routing in Envoy --- helm/supersonic/cfg/envoy-filter.lua | 55 +++++++++++++++++-- .../templates/envoy/configmaps.yaml | 29 +++++++++- helm/supersonic/templates/triton/service.yaml | 31 +++++++++++ 3 files changed, 108 insertions(+), 7 deletions(-) diff --git a/helm/supersonic/cfg/envoy-filter.lua b/helm/supersonic/cfg/envoy-filter.lua index 0af267c..d3c349c 100644 --- a/helm/supersonic/cfg/envoy-filter.lua +++ b/helm/supersonic/cfg/envoy-filter.lua @@ -2,10 +2,6 @@ function envoy_on_request(request_handle) local path = request_handle:headers():get(":path") local contentType = request_handle:headers():get("content-type") - -- Any other request except model index - request_handle:streamInfo():dynamicMetadata():set("envoy.lua", "accept_request", true) - - -- Model index requested? if path == "/inference.GRPCInferenceService/RepositoryIndex" and contentType == "application/grpc" then request_handle:streamInfo():dynamicMetadata():set("envoy.lua", "accept_request", false) @@ -55,15 +51,62 @@ function envoy_on_request(request_handle) end else request_handle:logErr("Failed to parse metric value from Prometheus response.") + ---- Temporary ---- + request_handle:logErr("Accepting request regardless of metric value.") + request_handle:streamInfo():dynamicMetadata():set("envoy.lua", "accept_request", true) + end + end + + ---- Extract model_name from ModelInferRequest ---- + if contentType == "application/grpc" then + if path == "/inference.GRPCInferenceService/ModelInfer" then + -- grab entire request body (you may need to configure the filter to buffer bodies) + local body = request_handle:body():getBytes(0, request_handle:body():length()) + if body and #body > 5 then + -- strip the 5-byte gRPC header (1-byte flag + 4-byte msg-len) + local msg = body:sub(6) + + -- protobuf wire format for field 1, wire type 2: tag = 0x0A + if msg:byte(1) == 0x0A then + -- next byte is a varint length (assumes <128 bytes) + local name_len = msg:byte(2) + -- extract UTF-8 model name + local model_name = msg:sub(3, 2 + name_len) + + -- log and propagate via dynamic metadata + request_handle:logInfo("ModelInfer model_name = " .. model_name) + if model_name then + local hostHeader = model_name .. ".cms.svc.cluster.local:8001" + request_handle:logInfo("x-model-host = " .. hostHeader) + request_handle:headers():add("x-model-host", hostHeader) + end + for k, v in pairs(request_handle:headers()) do + request_handle:logInfo("Header " .. k .. ": " .. v) + end + else + request_handle:logErr("Unexpected protobuf tag: " .. string.format("0x%02X", msg:byte(1))) + end + end + else + --- for non-inference calls, for now just forward to default service + request_handle:headers():add("x-model-host", "supersonic-test-triton.cms.svc.cluster.local:8001") end end end function envoy_on_response(response_handle) - -- Send error back if request was not accepted + local md = response_handle:streamInfo():dynamicMetadata():get("envoy.lua") + + if not md or md.accept_request == nil then + return + end + if not response_handle:streamInfo():dynamicMetadata():get("envoy.lua")["accept_request"] then response_handle:logInfo("Sending error as a response.") - response_handle:body():setBytes("") + local body = response_handle:body() + if body then + body:setBytes("") + end response_handle:headers():replace("grpc-status", "1") end end diff --git a/helm/supersonic/templates/envoy/configmaps.yaml b/helm/supersonic/templates/envoy/configmaps.yaml index a96eaec..59770c7 100644 --- a/helm/supersonic/templates/envoy/configmaps.yaml +++ b/helm/supersonic/templates/envoy/configmaps.yaml @@ -57,9 +57,14 @@ static_resources: routes: - match: prefix: "/" + typed_per_filter_config: + envoy.filters.http.dynamic_forward_proxy: + "@type": type.googleapis.com/envoy.extensions.filters.http.dynamic_forward_proxy.v3.PerRouteConfig + host_rewrite_header: "x-model-host" route: - cluster: triton_grpc_service + cluster: dynamic_forward_proxy_cluster timeout: {{ .envoy.grpc_route_timeout }} + http_filters: {{- with .envoy.rate_limiter.prometheus_based }} {{- if .enabled }} @@ -94,6 +99,14 @@ static_resources: provider_name: provider_icecube {{- end }} {{- end }} + - name: envoy.filters.http.dynamic_forward_proxy + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.dynamic_forward_proxy.v3.FilterConfig + dns_cache_config: + name: dynamic_cache + dns_lookup_family: ALL + dns_cache_circuit_breaker: + max_pending_requests: 1024 - name: envoy.filters.http.router typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router @@ -176,6 +189,20 @@ static_resources: socket_address: address: {{ .tritonName }} port_value: {{ .tritonGrpcPort }} + - name: dynamic_forward_proxy_cluster + connect_timeout: 2s + lb_policy: CLUSTER_PROVIDED + http2_protocol_options: + max_concurrent_streams: 1000 + cluster_type: + name: envoy.clusters.dynamic_forward_proxy + typed_config: + "@type": type.googleapis.com/envoy.extensions.clusters.dynamic_forward_proxy.v3.ClusterConfig + dns_cache_config: + name: dynamic_cache + dns_lookup_family: ALL + dns_cache_circuit_breaker: + max_pending_requests: 1024 {{- end }} {{- end }} diff --git a/helm/supersonic/templates/triton/service.yaml b/helm/supersonic/templates/triton/service.yaml index 57a4879..9f590df 100644 --- a/helm/supersonic/templates/triton/service.yaml +++ b/helm/supersonic/templates/triton/service.yaml @@ -13,6 +13,37 @@ metadata: {{- if .Values.triton.service.annotations }} {{ toYaml .Values.triton.service.annotations | nindent 4 }} {{- end }} +spec: + clusterIP: None + ports: + {{- range .Values.triton.service.ports }} + - name: {{ .name }} + port: {{ .port }} + targetPort: {{ .targetPort }} + protocol: {{ .protocol }} + {{- end }} + selector: + app.kubernetes.io/name: {{ .Chart.Name }} + app.kubernetes.io/instance: {{ include "supersonic.name" . }} + app.kubernetes.io/component: triton + +--- + +apiVersion: v1 +kind: Service +metadata: + name: deepmet + labels: + app.kubernetes.io/name: {{ .Chart.Name }} + app.kubernetes.io/instance: {{ include "supersonic.name" . }} + app.kubernetes.io/component: triton + {{- if .Values.triton.service.labels }} +{{ toYaml .Values.triton.service.labels | nindent 4 }} + {{- end }} + annotations: + {{- if .Values.triton.service.annotations }} +{{ toYaml .Values.triton.service.annotations | nindent 4 }} + {{- end }} spec: clusterIP: None ports: From 4c33ce63c7e71bf8f2d25460ef888a31981ae2d0 Mon Sep 17 00:00:00 2001 From: kondratyevd Date: Fri, 30 May 2025 13:00:19 -0400 Subject: [PATCH 04/20] remove unnecessary diffs --- helm/supersonic/Chart.yaml | 2 +- installer-plugin/installer.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/helm/supersonic/Chart.yaml b/helm/supersonic/Chart.yaml index 00440c2..0a774da 100644 --- a/helm/supersonic/Chart.yaml +++ b/helm/supersonic/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: supersonic description: Server infrastructure for inference-as-a-service in large scientific experiments. icon: https://github.com/fastmachinelearning/SuperSONIC/blob/main/docs/img/SuperSONIC_small_512.png?raw=true -version: 0.2.1 +version: 0.2.1 type: application home: https://fastmachinelearning.org/SuperSONIC/ annotations: diff --git a/installer-plugin/installer.py b/installer-plugin/installer.py index bfc8852..606b4b2 100755 --- a/installer-plugin/installer.py +++ b/installer-plugin/installer.py @@ -106,8 +106,7 @@ def main() -> None: repo_commands.append(["helm", "repo", "add", "grafana", "https://grafana.github.io/helm-charts"]) if merged_values.get("opentelemetry-collector", {}).get("enabled", False): repo_commands.append(["helm", "repo", "add", "opentelemetry", "https://open-telemetry.github.io/opentelemetry-helm-charts"]) - if args.local: - repo_commands.append(["helm", "dependency", "build", chart_source]) + repo_commands.append(["helm", "dependency", "build", chart_source]) for cmd in repo_commands: logger.info(f"\nExecuting: {' '.join(cmd)}") From 704084dfb8cab63ffedc005a744d3587376f4dcd Mon Sep 17 00:00:00 2001 From: kondratyevd Date: Fri, 30 May 2025 13:01:38 -0400 Subject: [PATCH 05/20] restore original Lua filter --- helm/supersonic/cfg/envoy-filter-dynamic.lua | 118 +++++++++++++++++++ helm/supersonic/cfg/envoy-filter.lua | 55 +-------- 2 files changed, 124 insertions(+), 49 deletions(-) create mode 100644 helm/supersonic/cfg/envoy-filter-dynamic.lua diff --git a/helm/supersonic/cfg/envoy-filter-dynamic.lua b/helm/supersonic/cfg/envoy-filter-dynamic.lua new file mode 100644 index 0000000..d3c349c --- /dev/null +++ b/helm/supersonic/cfg/envoy-filter-dynamic.lua @@ -0,0 +1,118 @@ +function envoy_on_request(request_handle) + local path = request_handle:headers():get(":path") + local contentType = request_handle:headers():get("content-type") + + if path == "/inference.GRPCInferenceService/RepositoryIndex" and contentType == "application/grpc" then + request_handle:streamInfo():dynamicMetadata():set("envoy.lua", "accept_request", false) + + local query = SERVER_LOAD_METRIC + local metric_threshold = tonumber(SERVER_LOAD_THRESHOLD) + local query_response_template = '"value":%[%d+%.%d+,"([%d%.]+)"%]' + local encoded_query = encode_query(query) + + request_handle:logInfo("Prometheus scheme: " .. "PROMETHEUS_SCHEME") + request_handle:logInfo("Prometheus host: " .. "PROMETHEUS_HOST") + request_handle:logInfo("Prometheus port: " .. "PROMETHEUS_PORT") + request_handle:logInfo("Query: " .. query) + request_handle:logInfo("Encoded query: " .. encoded_query) + + local headers, body = request_handle:httpCall( + "prometheus_cluster", + { + [":method"] = "GET", + [":path"] = "/api/v1/query?query=" .. encoded_query, + [":scheme"] = "PROMETHEUS_SCHEME", + [":authority"] = "PROMETHEUS_HOST" .. ":" .. "PROMETHEUS_PORT" + }, + "", + 5000 + ) + if not headers then + request_handle:logErr("HTTP call to Prometheus failed.") + return + end + + if not body or body == "" then + request_handle:logErr("Prometheus could not be reached or returned no data.") + return + end + + request_handle:logInfo("Query response body: " .. body) + local metric_value_str = string.match(body, query_response_template) + request_handle:logInfo("Extracted metric: " .. metric_value_str) + + if metric_value_str then + local metric_value = tonumber(metric_value_str) + if metric_value > metric_threshold then + request_handle:logInfo("Metric value exceeds threshold: " .. metric_value .. " > " .. metric_threshold) + else + request_handle:logInfo("Metric value below threshold: " .. metric_value .. " < " .. metric_threshold) + request_handle:streamInfo():dynamicMetadata():set("envoy.lua", "accept_request", true) + end + else + request_handle:logErr("Failed to parse metric value from Prometheus response.") + ---- Temporary ---- + request_handle:logErr("Accepting request regardless of metric value.") + request_handle:streamInfo():dynamicMetadata():set("envoy.lua", "accept_request", true) + end + end + + ---- Extract model_name from ModelInferRequest ---- + if contentType == "application/grpc" then + if path == "/inference.GRPCInferenceService/ModelInfer" then + -- grab entire request body (you may need to configure the filter to buffer bodies) + local body = request_handle:body():getBytes(0, request_handle:body():length()) + if body and #body > 5 then + -- strip the 5-byte gRPC header (1-byte flag + 4-byte msg-len) + local msg = body:sub(6) + + -- protobuf wire format for field 1, wire type 2: tag = 0x0A + if msg:byte(1) == 0x0A then + -- next byte is a varint length (assumes <128 bytes) + local name_len = msg:byte(2) + -- extract UTF-8 model name + local model_name = msg:sub(3, 2 + name_len) + + -- log and propagate via dynamic metadata + request_handle:logInfo("ModelInfer model_name = " .. model_name) + if model_name then + local hostHeader = model_name .. ".cms.svc.cluster.local:8001" + request_handle:logInfo("x-model-host = " .. hostHeader) + request_handle:headers():add("x-model-host", hostHeader) + end + for k, v in pairs(request_handle:headers()) do + request_handle:logInfo("Header " .. k .. ": " .. v) + end + else + request_handle:logErr("Unexpected protobuf tag: " .. string.format("0x%02X", msg:byte(1))) + end + end + else + --- for non-inference calls, for now just forward to default service + request_handle:headers():add("x-model-host", "supersonic-test-triton.cms.svc.cluster.local:8001") + end + end +end + +function envoy_on_response(response_handle) + local md = response_handle:streamInfo():dynamicMetadata():get("envoy.lua") + + if not md or md.accept_request == nil then + return + end + + if not response_handle:streamInfo():dynamicMetadata():get("envoy.lua")["accept_request"] then + response_handle:logInfo("Sending error as a response.") + local body = response_handle:body() + if body then + body:setBytes("") + end + response_handle:headers():replace("grpc-status", "1") + end +end + +function encode_query(query) + return query:gsub("([^%w _%%%-%.~])", function(c) + return string.format("%%%02X", string.byte(c)) + end):gsub(" ", "+") +end \ No newline at end of file diff --git a/helm/supersonic/cfg/envoy-filter.lua b/helm/supersonic/cfg/envoy-filter.lua index d3c349c..0af267c 100644 --- a/helm/supersonic/cfg/envoy-filter.lua +++ b/helm/supersonic/cfg/envoy-filter.lua @@ -2,6 +2,10 @@ function envoy_on_request(request_handle) local path = request_handle:headers():get(":path") local contentType = request_handle:headers():get("content-type") + -- Any other request except model index + request_handle:streamInfo():dynamicMetadata():set("envoy.lua", "accept_request", true) + + -- Model index requested? if path == "/inference.GRPCInferenceService/RepositoryIndex" and contentType == "application/grpc" then request_handle:streamInfo():dynamicMetadata():set("envoy.lua", "accept_request", false) @@ -51,62 +55,15 @@ function envoy_on_request(request_handle) end else request_handle:logErr("Failed to parse metric value from Prometheus response.") - ---- Temporary ---- - request_handle:logErr("Accepting request regardless of metric value.") - request_handle:streamInfo():dynamicMetadata():set("envoy.lua", "accept_request", true) - end - end - - ---- Extract model_name from ModelInferRequest ---- - if contentType == "application/grpc" then - if path == "/inference.GRPCInferenceService/ModelInfer" then - -- grab entire request body (you may need to configure the filter to buffer bodies) - local body = request_handle:body():getBytes(0, request_handle:body():length()) - if body and #body > 5 then - -- strip the 5-byte gRPC header (1-byte flag + 4-byte msg-len) - local msg = body:sub(6) - - -- protobuf wire format for field 1, wire type 2: tag = 0x0A - if msg:byte(1) == 0x0A then - -- next byte is a varint length (assumes <128 bytes) - local name_len = msg:byte(2) - -- extract UTF-8 model name - local model_name = msg:sub(3, 2 + name_len) - - -- log and propagate via dynamic metadata - request_handle:logInfo("ModelInfer model_name = " .. model_name) - if model_name then - local hostHeader = model_name .. ".cms.svc.cluster.local:8001" - request_handle:logInfo("x-model-host = " .. hostHeader) - request_handle:headers():add("x-model-host", hostHeader) - end - for k, v in pairs(request_handle:headers()) do - request_handle:logInfo("Header " .. k .. ": " .. v) - end - else - request_handle:logErr("Unexpected protobuf tag: " .. string.format("0x%02X", msg:byte(1))) - end - end - else - --- for non-inference calls, for now just forward to default service - request_handle:headers():add("x-model-host", "supersonic-test-triton.cms.svc.cluster.local:8001") end end end function envoy_on_response(response_handle) - local md = response_handle:streamInfo():dynamicMetadata():get("envoy.lua") - - if not md or md.accept_request == nil then - return - end - + -- Send error back if request was not accepted if not response_handle:streamInfo():dynamicMetadata():get("envoy.lua")["accept_request"] then response_handle:logInfo("Sending error as a response.") - local body = response_handle:body() - if body then - body:setBytes("") - end + response_handle:body():setBytes("") response_handle:headers():replace("grpc-status", "1") end end From f76a0db615ddc7a84fc45f900022282d646d000e Mon Sep 17 00:00:00 2001 From: kondratyevd Date: Fri, 30 May 2025 13:07:52 -0400 Subject: [PATCH 06/20] values parameter to enable dynamic routing in envoy --- helm/supersonic/templates/envoy/configmaps.yaml | 10 ++++++++++ helm/supersonic/values.yaml | 4 ++++ 2 files changed, 14 insertions(+) diff --git a/helm/supersonic/templates/envoy/configmaps.yaml b/helm/supersonic/templates/envoy/configmaps.yaml index 59770c7..6513d26 100644 --- a/helm/supersonic/templates/envoy/configmaps.yaml +++ b/helm/supersonic/templates/envoy/configmaps.yaml @@ -57,6 +57,7 @@ static_resources: routes: - match: prefix: "/" + {{- if .envoy.dynamic_routing.enabled }} typed_per_filter_config: envoy.filters.http.dynamic_forward_proxy: "@type": type.googleapis.com/envoy.extensions.filters.http.dynamic_forward_proxy.v3.PerRouteConfig @@ -64,6 +65,11 @@ static_resources: route: cluster: dynamic_forward_proxy_cluster timeout: {{ .envoy.grpc_route_timeout }} + {{- else }} + route: + cluster: triton_grpc_service + timeout: {{ .envoy.grpc_route_timeout }} + {{- end }} http_filters: {{- with .envoy.rate_limiter.prometheus_based }} @@ -99,6 +105,7 @@ static_resources: provider_name: provider_icecube {{- end }} {{- end }} + {{- if .envoy.dynamic_routing.enabled }} - name: envoy.filters.http.dynamic_forward_proxy typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.dynamic_forward_proxy.v3.FilterConfig @@ -107,6 +114,7 @@ static_resources: dns_lookup_family: ALL dns_cache_circuit_breaker: max_pending_requests: 1024 + {{- end }} - name: envoy.filters.http.router typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router @@ -189,6 +197,7 @@ static_resources: socket_address: address: {{ .tritonName }} port_value: {{ .tritonGrpcPort }} + {{- if .envoy.dynamic_routing.enabled }} - name: dynamic_forward_proxy_cluster connect_timeout: 2s lb_policy: CLUSTER_PROVIDED @@ -203,6 +212,7 @@ static_resources: dns_lookup_family: ALL dns_cache_circuit_breaker: max_pending_requests: 1024 + {{- end }} {{- end }} {{- end }} diff --git a/helm/supersonic/values.yaml b/helm/supersonic/values.yaml index c261f19..eb85b96 100644 --- a/helm/supersonic/values.yaml +++ b/helm/supersonic/values.yaml @@ -162,6 +162,10 @@ envoy: # Options: ROUND_ROBIN, LEAST_REQUEST, RING_HASH, RANDOM, MAGLEV loadBalancerPolicy: "LEAST_REQUEST" + # -- Enable dynamic routing in Envoy proxy. + dynamic_routing: + enabled: false + auth: # -- Enable authentication in Envoy proxy enabled: false From 6523046368e2f0ab2f477018b33c3adae0e6fe20 Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Fri, 30 May 2025 17:08:11 +0000 Subject: [PATCH 07/20] Update JSON schema --- helm/supersonic/values.schema.json | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/helm/supersonic/values.schema.json b/helm/supersonic/values.schema.json index 4ad91bd..5dbf5b9 100644 --- a/helm/supersonic/values.schema.json +++ b/helm/supersonic/values.schema.json @@ -389,6 +389,17 @@ "loadBalancerPolicy": { "type": "string" }, + "dynamic_routing": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + } + }, + "required": [ + "enabled" + ] + }, "auth": { "type": "object", "properties": { @@ -424,6 +435,7 @@ "required": [ "args", "auth", + "dynamic_routing", "enabled", "grpc_route_timeout", "image", From e6ab63533cc637b088d1fdd635c709e143c51a02 Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Fri, 30 May 2025 17:08:26 +0000 Subject: [PATCH 08/20] Update helm docs --- docs/.values-table.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/.values-table.md b/docs/.values-table.md index 0847bf8..923a4d6 100644 --- a/docs/.values-table.md +++ b/docs/.values-table.md @@ -37,6 +37,7 @@ | envoy.rate_limiter.prometheus_based | object | `{"enabled":false,"luaConfig":"cfg/envoy-filter.lua"}` | This rate limiter rejects new connections based on metric extracted from Prometheus (e.g. inference queue latency). The metric is taken from parameter ``prometheus.serverLoadMetric``, and the threshold is set by ``prometheus.serverLoadThreshold``. These parameters are the same as those used by the KEDA autoscaler. | | envoy.rate_limiter.prometheus_based.enabled | bool | `false` | Enable rate limiter | | envoy.loadBalancerPolicy | string | `"LEAST_REQUEST"` | Envoy load balancer policy. Options: ROUND_ROBIN, LEAST_REQUEST, RING_HASH, RANDOM, MAGLEV | +| envoy.dynamic_routing | object | `{"enabled":false}` | Enable dynamic routing in Envoy proxy. | | envoy.auth.enabled | bool | `false` | Enable authentication in Envoy proxy | | envoy.auth.jwt_issuer | string | `""` | | | envoy.auth.jwt_remote_jwks_uri | string | `""` | | From ac861258e9d5fac604b8b7da7a30e06b3fbf41fb Mon Sep 17 00:00:00 2001 From: kondratyevd Date: Fri, 30 May 2025 13:08:58 -0400 Subject: [PATCH 09/20] remove extra service --- helm/supersonic/templates/triton/service.yaml | 31 ------------------- 1 file changed, 31 deletions(-) diff --git a/helm/supersonic/templates/triton/service.yaml b/helm/supersonic/templates/triton/service.yaml index 9f590df..57a4879 100644 --- a/helm/supersonic/templates/triton/service.yaml +++ b/helm/supersonic/templates/triton/service.yaml @@ -13,37 +13,6 @@ metadata: {{- if .Values.triton.service.annotations }} {{ toYaml .Values.triton.service.annotations | nindent 4 }} {{- end }} -spec: - clusterIP: None - ports: - {{- range .Values.triton.service.ports }} - - name: {{ .name }} - port: {{ .port }} - targetPort: {{ .targetPort }} - protocol: {{ .protocol }} - {{- end }} - selector: - app.kubernetes.io/name: {{ .Chart.Name }} - app.kubernetes.io/instance: {{ include "supersonic.name" . }} - app.kubernetes.io/component: triton - ---- - -apiVersion: v1 -kind: Service -metadata: - name: deepmet - labels: - app.kubernetes.io/name: {{ .Chart.Name }} - app.kubernetes.io/instance: {{ include "supersonic.name" . }} - app.kubernetes.io/component: triton - {{- if .Values.triton.service.labels }} -{{ toYaml .Values.triton.service.labels | nindent 4 }} - {{- end }} - annotations: - {{- if .Values.triton.service.annotations }} -{{ toYaml .Values.triton.service.annotations | nindent 4 }} - {{- end }} spec: clusterIP: None ports: From db5e9135b88b2624bfaa4e15d11022d8567ef5c9 Mon Sep 17 00:00:00 2001 From: kondratyevd Date: Fri, 30 May 2025 13:13:52 -0400 Subject: [PATCH 10/20] make path to lua script more configurable --- .../templates/envoy/configmaps.yaml | 6 ++-- helm/supersonic/values.yaml | 5 +++- values/values-geddes-cms.yaml | 29 ++++++++++++++----- 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/helm/supersonic/templates/envoy/configmaps.yaml b/helm/supersonic/templates/envoy/configmaps.yaml index 6513d26..f02d14d 100644 --- a/helm/supersonic/templates/envoy/configmaps.yaml +++ b/helm/supersonic/templates/envoy/configmaps.yaml @@ -72,7 +72,7 @@ static_resources: {{- end }} http_filters: - {{- with .envoy.rate_limiter.prometheus_based }} + {{- with .envoy.lua_filter }} {{- if .enabled }} - name: envoy.filters.http.lua typed_config: @@ -263,7 +263,7 @@ data: {{ include "envoy.configuration.yaml" $envoyContext | indent 4 }} --- -{{- if .Values.envoy.rate_limiter.prometheus_based.enabled }} +{{- if .Values.envoy.lua_filter.enabled }} {{- /* Create a ConfigMap for the Lua filter */}} apiVersion: v1 kind: ConfigMap @@ -276,7 +276,7 @@ metadata: data: envoy-filter.lua: |- {{- /* Read and process the Lua configuration file */}} - {{- $luaConfig := $.Files.Get .Values.envoy.rate_limiter.prometheus_based.luaConfig | nindent 4 }} + {{- $luaConfig := $.Files.Get .Values.envoy.lua_filter.lua_config | nindent 4 }} {{- $luaConfig = $luaConfig | replace "SERVER_LOAD_METRIC" (include "supersonic.defaultMetric" . | quote) }} {{- $luaConfig = $luaConfig | replace "SERVER_LOAD_THRESHOLD" (quote .Values.serverLoadThreshold) }} {{- $luaConfig = $luaConfig | replace "PROMETHEUS_SCHEME" (include "supersonic.prometheusScheme" .) }} diff --git a/helm/supersonic/values.yaml b/helm/supersonic/values.yaml index eb85b96..042a9f1 100644 --- a/helm/supersonic/values.yaml +++ b/helm/supersonic/values.yaml @@ -156,12 +156,15 @@ envoy: prometheus_based: # -- Enable rate limiter enabled: false - luaConfig: "cfg/envoy-filter.lua" # -- Envoy load balancer policy. # Options: ROUND_ROBIN, LEAST_REQUEST, RING_HASH, RANDOM, MAGLEV loadBalancerPolicy: "LEAST_REQUEST" + lua_filter: + enabled: false + lua_config: "cfg/envoy-filter.lua" + # -- Enable dynamic routing in Envoy proxy. dynamic_routing: enabled: false diff --git a/values/values-geddes-cms.yaml b/values/values-geddes-cms.yaml index 453ae6e..4e87ac0 100644 --- a/values/values-geddes-cms.yaml +++ b/values/values-geddes-cms.yaml @@ -11,10 +11,12 @@ triton: --model-repository=/cvmfs/cms.cern.ch/el9_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre7/external/el9_amd64_gcc12/data/RecoEgamma/EgammaPhotonProducers/data/models/ \ --model-repository=/cvmfs/cms.cern.ch/el9_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre7/external/el9_amd64_gcc12/data/RecoTauTag/TrainingFiles/data/DeepTauIdSONIC/ \ --model-repository=/cvmfs/cms.cern.ch/el9_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre7/external/el9_amd64_gcc12/data/RecoMET/METPUSubtraction/data/models/ \ + --model-control-mode=explicit \ --allow-gpu-metrics=true \ --log-verbose=0 \ --strict-model-config=false \ --exit-timeout-secs=60 + resources: limits: { nvidia.com/gpu: 1, cpu: 2, memory: 4G} requests: { nvidia.com/gpu: 1, cpu: 2, memory: 4G} @@ -37,6 +39,14 @@ envoy: enabled: true hostName: sonic-cms.geddes.rcac.purdue.edu ingressClassName: public + rate_limiter: + prometheus_based: + enabled: false + dynamic_routing: + enabled: true + lua_filter: + enabled: true + lua_config: "cfg/envoy-filter-dynamic.lua" autoscaler: enabled: true @@ -55,15 +65,20 @@ tolerations: effect: NoSchedule prometheus: - enabled: true - server: - ingress: - enabled: true - hostName: prometheus-cms.geddes.rcac.purdue.edu - ingressClassName: public + # enabled: false + external: + enabled: true + url: "prometheus-cms.geddes.rcac.purdue.edu" + port: 443 + scheme: https + # server: + # ingress: + # enabled: true + # hostName: prometheus-cms.geddes.rcac.purdue.edu + # ingressClassName: public grafana: - enabled: true + enabled: false ingress: enabled: true hostName: grafana-cms.geddes.rcac.purdue.edu From 205077be87ef598e9f2e3e16105738721303f2ee Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Fri, 30 May 2025 17:14:09 +0000 Subject: [PATCH 11/20] Update JSON schema --- helm/supersonic/values.schema.json | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/helm/supersonic/values.schema.json b/helm/supersonic/values.schema.json index 5dbf5b9..fb3d1c7 100644 --- a/helm/supersonic/values.schema.json +++ b/helm/supersonic/values.schema.json @@ -370,14 +370,10 @@ "properties": { "enabled": { "type": "boolean" - }, - "luaConfig": { - "type": "string" } }, "required": [ - "enabled", - "luaConfig" + "enabled" ] } }, @@ -389,6 +385,21 @@ "loadBalancerPolicy": { "type": "string" }, + "lua_filter": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "lua_config": { + "type": "string" + } + }, + "required": [ + "enabled", + "lua_config" + ] + }, "dynamic_routing": { "type": "object", "properties": { @@ -441,6 +452,7 @@ "image", "ingress", "loadBalancerPolicy", + "lua_filter", "rate_limiter", "replicas", "resources", From aa6129f5c746976dd3a4c413d38bd0da7ed0677a Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Fri, 30 May 2025 17:14:29 +0000 Subject: [PATCH 12/20] Update helm docs --- docs/.values-table.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/.values-table.md b/docs/.values-table.md index 923a4d6..1116c8c 100644 --- a/docs/.values-table.md +++ b/docs/.values-table.md @@ -34,9 +34,11 @@ | envoy.rate_limiter.listener_level.max_tokens | int | `5` | Maximum number of simultaneous connections to the Envoy Proxy. Each new connection takes a "token" from the "bucket" which initially contains ``max_tokens`` tokens. | | envoy.rate_limiter.listener_level.tokens_per_fill | int | `1` | ``tokens_per_fill`` tokens are added to the "bucket" every ``fill_interval``, allowing new connections to be established. | | envoy.rate_limiter.listener_level.fill_interval | string | `"12s"` | For example, adding a new token every 12 seconds allows 5 new connections every minute. | -| envoy.rate_limiter.prometheus_based | object | `{"enabled":false,"luaConfig":"cfg/envoy-filter.lua"}` | This rate limiter rejects new connections based on metric extracted from Prometheus (e.g. inference queue latency). The metric is taken from parameter ``prometheus.serverLoadMetric``, and the threshold is set by ``prometheus.serverLoadThreshold``. These parameters are the same as those used by the KEDA autoscaler. | +| envoy.rate_limiter.prometheus_based | object | `{"enabled":false}` | This rate limiter rejects new connections based on metric extracted from Prometheus (e.g. inference queue latency). The metric is taken from parameter ``prometheus.serverLoadMetric``, and the threshold is set by ``prometheus.serverLoadThreshold``. These parameters are the same as those used by the KEDA autoscaler. | | envoy.rate_limiter.prometheus_based.enabled | bool | `false` | Enable rate limiter | | envoy.loadBalancerPolicy | string | `"LEAST_REQUEST"` | Envoy load balancer policy. Options: ROUND_ROBIN, LEAST_REQUEST, RING_HASH, RANDOM, MAGLEV | +| envoy.lua_filter.enabled | bool | `false` | | +| envoy.lua_filter.lua_config | string | `"cfg/envoy-filter.lua"` | | | envoy.dynamic_routing | object | `{"enabled":false}` | Enable dynamic routing in Envoy proxy. | | envoy.auth.enabled | bool | `false` | Enable authentication in Envoy proxy | | envoy.auth.jwt_issuer | string | `""` | | From 03fda7888d9306f2859d155c142d0a3da35ad14a Mon Sep 17 00:00:00 2001 From: kondratyevd Date: Fri, 30 May 2025 13:25:44 -0400 Subject: [PATCH 13/20] rename header --- helm/supersonic/cfg/envoy-filter-dynamic.lua | 6 +++--- helm/supersonic/templates/envoy/configmaps.yaml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/helm/supersonic/cfg/envoy-filter-dynamic.lua b/helm/supersonic/cfg/envoy-filter-dynamic.lua index d3c349c..1a04a76 100644 --- a/helm/supersonic/cfg/envoy-filter-dynamic.lua +++ b/helm/supersonic/cfg/envoy-filter-dynamic.lua @@ -77,8 +77,8 @@ function envoy_on_request(request_handle) request_handle:logInfo("ModelInfer model_name = " .. model_name) if model_name then local hostHeader = model_name .. ".cms.svc.cluster.local:8001" - request_handle:logInfo("x-model-host = " .. hostHeader) - request_handle:headers():add("x-model-host", hostHeader) + request_handle:logInfo("route-to = " .. hostHeader) + request_handle:headers():add("route-to", hostHeader) end for k, v in pairs(request_handle:headers()) do request_handle:logInfo("Header " .. k .. ": " .. v) @@ -89,7 +89,7 @@ function envoy_on_request(request_handle) end else --- for non-inference calls, for now just forward to default service - request_handle:headers():add("x-model-host", "supersonic-test-triton.cms.svc.cluster.local:8001") + request_handle:headers():add("route-to", "supersonic-test-triton.cms.svc.cluster.local:8001") end end end diff --git a/helm/supersonic/templates/envoy/configmaps.yaml b/helm/supersonic/templates/envoy/configmaps.yaml index f02d14d..fa97697 100644 --- a/helm/supersonic/templates/envoy/configmaps.yaml +++ b/helm/supersonic/templates/envoy/configmaps.yaml @@ -61,7 +61,7 @@ static_resources: typed_per_filter_config: envoy.filters.http.dynamic_forward_proxy: "@type": type.googleapis.com/envoy.extensions.filters.http.dynamic_forward_proxy.v3.PerRouteConfig - host_rewrite_header: "x-model-host" + host_rewrite_header: "route-to" route: cluster: dynamic_forward_proxy_cluster timeout: {{ .envoy.grpc_route_timeout }} From 8cbb8081afb09f42b62b9f1e13fdd6cdec4e24c5 Mon Sep 17 00:00:00 2001 From: kondratyevd Date: Fri, 30 May 2025 13:30:16 -0400 Subject: [PATCH 14/20] clean up dynamic lua filter --- helm/supersonic/cfg/envoy-filter-dynamic.lua | 82 +------------------ .../templates/envoy/configmaps.yaml | 2 + 2 files changed, 4 insertions(+), 80 deletions(-) diff --git a/helm/supersonic/cfg/envoy-filter-dynamic.lua b/helm/supersonic/cfg/envoy-filter-dynamic.lua index 1a04a76..9dd117d 100644 --- a/helm/supersonic/cfg/envoy-filter-dynamic.lua +++ b/helm/supersonic/cfg/envoy-filter-dynamic.lua @@ -2,61 +2,6 @@ function envoy_on_request(request_handle) local path = request_handle:headers():get(":path") local contentType = request_handle:headers():get("content-type") - if path == "/inference.GRPCInferenceService/RepositoryIndex" and contentType == "application/grpc" then - request_handle:streamInfo():dynamicMetadata():set("envoy.lua", "accept_request", false) - - local query = SERVER_LOAD_METRIC - local metric_threshold = tonumber(SERVER_LOAD_THRESHOLD) - local query_response_template = '"value":%[%d+%.%d+,"([%d%.]+)"%]' - local encoded_query = encode_query(query) - - request_handle:logInfo("Prometheus scheme: " .. "PROMETHEUS_SCHEME") - request_handle:logInfo("Prometheus host: " .. "PROMETHEUS_HOST") - request_handle:logInfo("Prometheus port: " .. "PROMETHEUS_PORT") - request_handle:logInfo("Query: " .. query) - request_handle:logInfo("Encoded query: " .. encoded_query) - - local headers, body = request_handle:httpCall( - "prometheus_cluster", - { - [":method"] = "GET", - [":path"] = "/api/v1/query?query=" .. encoded_query, - [":scheme"] = "PROMETHEUS_SCHEME", - [":authority"] = "PROMETHEUS_HOST" .. ":" .. "PROMETHEUS_PORT" - }, - "", - 5000 - ) - if not headers then - request_handle:logErr("HTTP call to Prometheus failed.") - return - end - - if not body or body == "" then - request_handle:logErr("Prometheus could not be reached or returned no data.") - return - end - - request_handle:logInfo("Query response body: " .. body) - local metric_value_str = string.match(body, query_response_template) - request_handle:logInfo("Extracted metric: " .. metric_value_str) - - if metric_value_str then - local metric_value = tonumber(metric_value_str) - if metric_value > metric_threshold then - request_handle:logInfo("Metric value exceeds threshold: " .. metric_value .. " > " .. metric_threshold) - else - request_handle:logInfo("Metric value below threshold: " .. metric_value .. " < " .. metric_threshold) - request_handle:streamInfo():dynamicMetadata():set("envoy.lua", "accept_request", true) - end - else - request_handle:logErr("Failed to parse metric value from Prometheus response.") - ---- Temporary ---- - request_handle:logErr("Accepting request regardless of metric value.") - request_handle:streamInfo():dynamicMetadata():set("envoy.lua", "accept_request", true) - end - end - ---- Extract model_name from ModelInferRequest ---- if contentType == "application/grpc" then if path == "/inference.GRPCInferenceService/ModelInfer" then @@ -76,7 +21,7 @@ function envoy_on_request(request_handle) -- log and propagate via dynamic metadata request_handle:logInfo("ModelInfer model_name = " .. model_name) if model_name then - local hostHeader = model_name .. ".cms.svc.cluster.local:8001" + local hostHeader = model_name .. ".NAMESPACE.svc.cluster.local:8001" request_handle:logInfo("route-to = " .. hostHeader) request_handle:headers():add("route-to", hostHeader) end @@ -89,30 +34,7 @@ function envoy_on_request(request_handle) end else --- for non-inference calls, for now just forward to default service - request_handle:headers():add("route-to", "supersonic-test-triton.cms.svc.cluster.local:8001") - end - end -end - -function envoy_on_response(response_handle) - local md = response_handle:streamInfo():dynamicMetadata():get("envoy.lua") - - if not md or md.accept_request == nil then - return - end - - if not response_handle:streamInfo():dynamicMetadata():get("envoy.lua")["accept_request"] then - response_handle:logInfo("Sending error as a response.") - local body = response_handle:body() - if body then - body:setBytes("") + request_handle:headers():add("route-to", "RELEASE-triton.NAMESPACE.svc.cluster.local:8001") end - response_handle:headers():replace("grpc-status", "1") end -end - -function encode_query(query) - return query:gsub("([^%w _%%%-%.~])", function(c) - return string.format("%%%02X", string.byte(c)) - end):gsub(" ", "+") end \ No newline at end of file diff --git a/helm/supersonic/templates/envoy/configmaps.yaml b/helm/supersonic/templates/envoy/configmaps.yaml index fa97697..dd2f187 100644 --- a/helm/supersonic/templates/envoy/configmaps.yaml +++ b/helm/supersonic/templates/envoy/configmaps.yaml @@ -282,6 +282,8 @@ data: {{- $luaConfig = $luaConfig | replace "PROMETHEUS_SCHEME" (include "supersonic.prometheusScheme" .) }} {{- $luaConfig = $luaConfig | replace "PROMETHEUS_HOST" (include "supersonic.prometheusHost" .) }} {{- $luaConfig = $luaConfig | replace "PROMETHEUS_PORT" (include "supersonic.prometheusPort" .) }} + {{- $luaConfig = $luaConfig | replace "RELEASE" .Release.Name }} + {{- $luaConfig = $luaConfig | replace "NAMESPACE" .Release.Namespace }} {{ $luaConfig | indent 4 }} --- From 62a4fea4b6cacbcb23ccd04bf4241a0bc8cce558 Mon Sep 17 00:00:00 2001 From: kondratyevd Date: Fri, 30 May 2025 13:37:07 -0400 Subject: [PATCH 15/20] add some comments --- helm/supersonic/cfg/envoy-filter-dynamic.lua | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/helm/supersonic/cfg/envoy-filter-dynamic.lua b/helm/supersonic/cfg/envoy-filter-dynamic.lua index 9dd117d..0cef8bb 100644 --- a/helm/supersonic/cfg/envoy-filter-dynamic.lua +++ b/helm/supersonic/cfg/envoy-filter-dynamic.lua @@ -12,6 +12,9 @@ function envoy_on_request(request_handle) local msg = body:sub(6) -- protobuf wire format for field 1, wire type 2: tag = 0x0A + -- field 1 is the model name - we know it from here: + -- https://github.com/kserve/open-inference-protocol/blob/main/specification/protocol/inference_grpc.md#inference + -- wire type 2 means that the field is length-delimited if msg:byte(1) == 0x0A then -- next byte is a varint length (assumes <128 bytes) local name_len = msg:byte(2) @@ -23,11 +26,12 @@ function envoy_on_request(request_handle) if model_name then local hostHeader = model_name .. ".NAMESPACE.svc.cluster.local:8001" request_handle:logInfo("route-to = " .. hostHeader) + -- add header request_handle:headers():add("route-to", hostHeader) end - for k, v in pairs(request_handle:headers()) do - request_handle:logInfo("Header " .. k .. ": " .. v) - end + -- for k, v in pairs(request_handle:headers()) do + -- request_handle:logInfo("Header " .. k .. ": " .. v) + -- end else request_handle:logErr("Unexpected protobuf tag: " .. string.format("0x%02X", msg:byte(1))) end From 29d22031665faf846acbf6e100d9e9a0acf213b6 Mon Sep 17 00:00:00 2001 From: kondratyevd Date: Fri, 30 May 2025 13:40:45 -0400 Subject: [PATCH 16/20] correctly mount lua config --- helm/supersonic/templates/envoy/deployment.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/helm/supersonic/templates/envoy/deployment.yaml b/helm/supersonic/templates/envoy/deployment.yaml index 7b2666c..5a48944 100644 --- a/helm/supersonic/templates/envoy/deployment.yaml +++ b/helm/supersonic/templates/envoy/deployment.yaml @@ -47,7 +47,7 @@ spec: volumeMounts: - name: {{ include "supersonic.name" . }}-envoy-config mountPath: /etc/envoy - {{- if .Values.envoy.rate_limiter.prometheus_based.enabled }} + {{- if .Values.envoy.lua_filter.enabled }} - name: {{ include "supersonic.name" . }}-lua-volume mountPath: /etc/envoy/lua readOnly: true @@ -58,7 +58,7 @@ spec: - name: {{ include "supersonic.name" . }}-envoy-config configMap: name: {{ include "supersonic.name" . }}-envoy-config - {{- if .Values.envoy.rate_limiter.prometheus_based.enabled }} + {{- if .Values.envoy.lua_filter.enabled }} - name: {{ include "supersonic.name" . }}-lua-volume configMap: name: {{ include "supersonic.name" . }}-lua-config From 987549b99ba438bc86d7e5541e37f7dccface720 Mon Sep 17 00:00:00 2001 From: kondratyevd Date: Fri, 30 May 2025 13:44:50 -0400 Subject: [PATCH 17/20] change log level for testing --- helm/supersonic/cfg/envoy-filter-dynamic.lua | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/helm/supersonic/cfg/envoy-filter-dynamic.lua b/helm/supersonic/cfg/envoy-filter-dynamic.lua index 0cef8bb..ee7f28d 100644 --- a/helm/supersonic/cfg/envoy-filter-dynamic.lua +++ b/helm/supersonic/cfg/envoy-filter-dynamic.lua @@ -22,10 +22,10 @@ function envoy_on_request(request_handle) local model_name = msg:sub(3, 2 + name_len) -- log and propagate via dynamic metadata - request_handle:logInfo("ModelInfer model_name = " .. model_name) + request_handle:logWarn("ModelInfer model_name = " .. model_name) if model_name then local hostHeader = model_name .. ".NAMESPACE.svc.cluster.local:8001" - request_handle:logInfo("route-to = " .. hostHeader) + request_handle:logWarn("route-to = " .. hostHeader) -- add header request_handle:headers():add("route-to", hostHeader) end From a946f300e38770765c866f8cb97c0d36bb67d805 Mon Sep 17 00:00:00 2001 From: kondratyevd Date: Fri, 30 May 2025 13:53:07 -0400 Subject: [PATCH 18/20] undo --- helm/supersonic/cfg/envoy-filter-dynamic.lua | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/helm/supersonic/cfg/envoy-filter-dynamic.lua b/helm/supersonic/cfg/envoy-filter-dynamic.lua index ee7f28d..0cef8bb 100644 --- a/helm/supersonic/cfg/envoy-filter-dynamic.lua +++ b/helm/supersonic/cfg/envoy-filter-dynamic.lua @@ -22,10 +22,10 @@ function envoy_on_request(request_handle) local model_name = msg:sub(3, 2 + name_len) -- log and propagate via dynamic metadata - request_handle:logWarn("ModelInfer model_name = " .. model_name) + request_handle:logInfo("ModelInfer model_name = " .. model_name) if model_name then local hostHeader = model_name .. ".NAMESPACE.svc.cluster.local:8001" - request_handle:logWarn("route-to = " .. hostHeader) + request_handle:logInfo("route-to = " .. hostHeader) -- add header request_handle:headers():add("route-to", hostHeader) end From 45108812392a2042498cd8ac2d936d3c4d5d5081 Mon Sep 17 00:00:00 2001 From: kondratyevd Date: Fri, 30 May 2025 14:26:33 -0400 Subject: [PATCH 19/20] extract model version from gRPC body --- helm/supersonic/cfg/envoy-filter-dynamic.lua | 23 +++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/helm/supersonic/cfg/envoy-filter-dynamic.lua b/helm/supersonic/cfg/envoy-filter-dynamic.lua index 0cef8bb..2171322 100644 --- a/helm/supersonic/cfg/envoy-filter-dynamic.lua +++ b/helm/supersonic/cfg/envoy-filter-dynamic.lua @@ -13,21 +13,34 @@ function envoy_on_request(request_handle) -- protobuf wire format for field 1, wire type 2: tag = 0x0A -- field 1 is the model name - we know it from here: - -- https://github.com/kserve/open-inference-protocol/blob/main/specification/protocol/inference_grpc.md#inference -- wire type 2 means that the field is length-delimited if msg:byte(1) == 0x0A then -- next byte is a varint length (assumes <128 bytes) local name_len = msg:byte(2) -- extract UTF-8 model name local model_name = msg:sub(3, 2 + name_len) + local offset = 3 + name_len + + -- Extract model version (field 2, wire type 2, tag 0x12) + local model_version = "" + if msg:byte(offset) == 0x12 then + local ver_len = msg:byte(offset + 1) + model_version = msg:sub(offset + 2, offset + 1 + ver_len) + -- request_handle:logInfo("ModelInfer model_version = " .. model_version) + offset = offset + 2 + ver_len + else + request_handle:logWarn(string.format("No model_version field (expected tag 0x12 at offset %d, got 0x%02X)", + offset, msg:byte(offset))) + end -- log and propagate via dynamic metadata - request_handle:logInfo("ModelInfer model_name = " .. model_name) + -- request_handle:logInfo("ModelInfer model_name = " .. model_name) if model_name then - local hostHeader = model_name .. ".NAMESPACE.svc.cluster.local:8001" - request_handle:logInfo("route-to = " .. hostHeader) + local svc_name = "RELEASE-" .. model_name .. "-v" .. model_version + local header_value = svc_name .. ".NAMESPACE.svc.cluster.local:8001" + -- request_handle:logInfo("route-to = " .. header_value) -- add header - request_handle:headers():add("route-to", hostHeader) + request_handle:headers():add("route-to", header_value) end -- for k, v in pairs(request_handle:headers()) do -- request_handle:logInfo("Header " .. k .. ": " .. v) From 3261947b2858ffa4f025860abc56591ac37f7ccd Mon Sep 17 00:00:00 2001 From: kondratyevd Date: Fri, 30 May 2025 15:04:16 -0400 Subject: [PATCH 20/20] imrprove lua script --- helm/supersonic/cfg/envoy-filter-dynamic.lua | 93 +++++++++++--------- 1 file changed, 50 insertions(+), 43 deletions(-) diff --git a/helm/supersonic/cfg/envoy-filter-dynamic.lua b/helm/supersonic/cfg/envoy-filter-dynamic.lua index 2171322..0740052 100644 --- a/helm/supersonic/cfg/envoy-filter-dynamic.lua +++ b/helm/supersonic/cfg/envoy-filter-dynamic.lua @@ -2,56 +2,63 @@ function envoy_on_request(request_handle) local path = request_handle:headers():get(":path") local contentType = request_handle:headers():get("content-type") + ---- Extract model_name from ModelInferRequest ---- if contentType == "application/grpc" then + -- request_handle:logInfo("path = " .. path) if path == "/inference.GRPCInferenceService/ModelInfer" then - -- grab entire request body (you may need to configure the filter to buffer bodies) - local body = request_handle:body():getBytes(0, request_handle:body():length()) - if body and #body > 5 then - -- strip the 5-byte gRPC header (1-byte flag + 4-byte msg-len) - local msg = body:sub(6) - - -- protobuf wire format for field 1, wire type 2: tag = 0x0A - -- field 1 is the model name - we know it from here: - -- wire type 2 means that the field is length-delimited - if msg:byte(1) == 0x0A then - -- next byte is a varint length (assumes <128 bytes) - local name_len = msg:byte(2) - -- extract UTF-8 model name - local model_name = msg:sub(3, 2 + name_len) - local offset = 3 + name_len - - -- Extract model version (field 2, wire type 2, tag 0x12) - local model_version = "" - if msg:byte(offset) == 0x12 then - local ver_len = msg:byte(offset + 1) - model_version = msg:sub(offset + 2, offset + 1 + ver_len) - -- request_handle:logInfo("ModelInfer model_version = " .. model_version) - offset = offset + 2 + ver_len - else - request_handle:logWarn(string.format("No model_version field (expected tag 0x12 at offset %d, got 0x%02X)", - offset, msg:byte(offset))) - end - - -- log and propagate via dynamic metadata - -- request_handle:logInfo("ModelInfer model_name = " .. model_name) - if model_name then - local svc_name = "RELEASE-" .. model_name .. "-v" .. model_version - local header_value = svc_name .. ".NAMESPACE.svc.cluster.local:8001" - -- request_handle:logInfo("route-to = " .. header_value) - -- add header - request_handle:headers():add("route-to", header_value) - end - -- for k, v in pairs(request_handle:headers()) do - -- request_handle:logInfo("Header " .. k .. ": " .. v) - -- end - else - request_handle:logErr("Unexpected protobuf tag: " .. string.format("0x%02X", msg:byte(1))) - end + + local model_name, model_version = extract_model_name_and_version(request_handle, body) + -- request_handle:logInfo("ModelInfer model_name = " .. model_name .. " model_version = " .. model_version) + + -- log and propagate via dynamic metadata + if model_name and model_version then + local svc_name = "RELEASE-" .. model_name .. "-v" .. model_version + local header_value = svc_name .. ".NAMESPACE.svc.cluster.local:8001" + request_handle:logInfo("route-to = " .. header_value) + -- add header + request_handle:headers():add("route-to", header_value) end else --- for non-inference calls, for now just forward to default service request_handle:headers():add("route-to", "RELEASE-triton.NAMESPACE.svc.cluster.local:8001") end end +end + +function extract_model_name_and_version(request_handle) + local model_name = "" + local model_version = "" + local body = request_handle:body():getBytes(0, request_handle:body():length()) + + if body and #body > 5 then + -- strip the 5-byte gRPC header (1-byte flag + 4-byte msg-len) + local msg = body:sub(6) + + -- protobuf wire format for field 1, wire type 2: tag = 0x0A + -- field 1 is the model name - we know it from here: + -- wire type 2 means that the field is length-delimited + if msg:byte(1) == 0x0A then + -- next byte is a varint length (assumes <128 bytes) + local name_len = msg:byte(2) + -- extract UTF-8 model name + model_name = msg:sub(3, 2 + name_len) + -- request_handle:logInfo("ModelInfer model_name = " .. model_name) + local offset = 3 + name_len + + -- Extract model version (field 2, wire type 2, tag 0x12) + if msg:byte(offset) == 0x12 then + local ver_len = msg:byte(offset + 1) + model_version = msg:sub(offset + 2, offset + 1 + ver_len) + -- request_handle:logInfo("ModelInfer model_version = " .. model_version) + offset = offset + 2 + ver_len + else + request_handle:logWarn(string.format("No model_version field (expected tag 0x12 at offset %d, got 0x%02X)", + offset, msg:byte(offset))) + end + else + request_handle:logErr("Unexpected protobuf tag: " .. string.format("0x%02X", msg:byte(1))) + end + end + return model_name, model_version end \ No newline at end of file