Skip to content

Commit 5a3e339

Browse files
Option to read Redis URL from AWS Secret (#526)
Add an option to have the pods read Redis auth info from an AWS secret. Note: there are two places the redis auth info needs to be added, since Redis is used for both the model endpoint creation request message queue and a cache for endpoint info The secret is formatted as follows: It must contain a few keys, namely host, port, scheme (optional, defaults to redis://), auth_token (optional), query_params (optional). These control which Redis gets used as the message queue for the endpoint builder. Also must contain a key cache-url, the full Redis url of the redis to be used as a cache.
1 parent 9a0e071 commit 5a3e339

File tree

4 files changed

+51
-3
lines changed

4 files changed

+51
-3
lines changed

charts/model-engine/values_sample.yaml

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,18 +156,37 @@ config:
156156
ml_account_id: "000000000000"
157157
# docker_repo_prefix [required] is the prefix for AWS ECR repositories
158158
docker_repo_prefix: "000000000000.dkr.ecr.us-east-1.amazonaws.com"
159-
# redis_host [required] is the hostname of the redis cluster you wish to connect
159+
# redis_host [required if redis_aws_secret_name not present] is the hostname of the redis cluster you wish to connect
160160
redis_host: llm-engine-prod-cache.use1.cache.amazonaws.com
161+
# redis_aws_secret_name [optional] is the AWS secret that contains the connection info of the Redis cluster.
162+
# The information provided should be as follows:
163+
# scheme: either redis:// or rediss://, will default to redis://
164+
# auth_token (optional): an auth token for the Redis cluster
165+
# host: the hostname of the Redis cluster
166+
# port: the port of the Redis cluster
167+
# query_params (optional): additional query parameters for the Redis cluster, will default to ""
168+
# The url will be built as follows:
169+
# {scheme}{host}:{port}/{db_index}{query_params} if auth_token is not provided,
170+
# {scheme}:{auth_token}@{host}:{port}/{db_index}{query_params} if auth_token is provided
171+
# db_index will be filled in by LLM Engine.
172+
# This secret must be accessible by the default LLM Engine AWS role
173+
# e.g. what is set by profile_ml_worker if provided
174+
# redis_aws_secret_name: sample-prod/redis-credentials
161175
# s3_bucket [required] is the S3 bucket you wish to connect
162176
s3_bucket: "llm-engine"
163177
launch:
164178
# endpoint_namespace [required] is K8s namespace the endpoints will be created in
165179
endpoint_namespace: llm-engine
166180
# cache_redis_aws_url is the full url for the redis cluster you wish to connect,
167181
# cache_redis_azure_host is the redis cluster host when using cloud_provider azure
168-
# one of cache_redis_aws_url and cache_redis_azure_host must be provided
182+
# cache_redis_aws_secret_name is an AWS secret that contains the Redis credentials.
183+
# It has a field "cache-url" with the full URL of the Redis cluster (including db number).
184+
# Other fields are ignored; e.g. you can use the secret for multiple purposes.
185+
# This secret must be accessible by the default LLM Engine AWS role
186+
# exactly one of cache_redis_aws_url, cache_redis_azure_host, or cache_redis_aws_secret_name must be provided
169187
cache_redis_aws_url: redis://llm-engine-prod-cache.use1.cache.amazonaws.com:6379/15
170188
cache_redis_azure_host: llm-engine-cache.redis.cache.windows.net:6380
189+
cache_redis_aws_secret_name: sample-prod/redis-credentials
171190
# s3_file_llm_fine_tuning_job_repository [required] is the S3 URI for the S3 bucket/key that you wish to save fine-tuned assests
172191
s3_file_llm_fine_tuning_job_repository: "s3://llm-engine/llm-ft-job-repository"
173192
# dd_trace_enabled specifies whether to enable datadog tracing, datadog must be installed in the cluster

model-engine/model_engine_server/common/config.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import yaml
1010
from azure.identity import DefaultAzureCredential
11+
from model_engine_server.core.aws.secrets import get_key_file
1112
from model_engine_server.core.config import infra_config
1213
from model_engine_server.core.loggers import logger_name, make_logger
1314

@@ -68,8 +69,12 @@ class HostedModelInferenceServiceConfig:
6869
user_inference_tensorflow_repository: str
6970
docker_image_layer_cache_repository: str
7071
sensitive_log_mode: bool
72+
# Exactly one of the following three must be specified
7173
cache_redis_aws_url: Optional[str] = None # also using this to store sync autoscaling metrics
7274
cache_redis_azure_host: Optional[str] = None
75+
cache_redis_aws_secret_name: Optional[
76+
str
77+
] = None # Not an env var because the redis cache info is already here
7378

7479
@classmethod
7580
def from_yaml(cls, yaml_path):
@@ -80,7 +85,18 @@ def from_yaml(cls, yaml_path):
8085
@property
8186
def cache_redis_url(self) -> str:
8287
if self.cache_redis_aws_url:
88+
assert infra_config().cloud_provider == "aws", "cache_redis_aws_url is only for AWS"
89+
if self.cache_redis_aws_secret_name:
90+
logger.warning(
91+
"Both cache_redis_aws_url and cache_redis_aws_secret_name are set. Using cache_redis_aws_url"
92+
)
8393
return self.cache_redis_aws_url
94+
elif self.cache_redis_aws_secret_name:
95+
assert (
96+
infra_config().cloud_provider == "aws"
97+
), "cache_redis_aws_secret_name is only for AWS"
98+
creds = get_key_file(self.cache_redis_aws_secret_name) # Use default role
99+
return creds["cache-url"]
84100

85101
assert self.cache_redis_azure_host and infra_config().cloud_provider == "azure"
86102
username = os.getenv("AZURE_OBJECT_ID")

model-engine/model_engine_server/core/celery/app.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from celery.app.control import Inspect
1111
from celery.result import AsyncResult
1212
from model_engine_server.core.aws.roles import session
13+
from model_engine_server.core.aws.secrets import get_key_file
1314
from model_engine_server.core.config import infra_config
1415
from model_engine_server.core.loggers import (
1516
CustomJSONFormatter,
@@ -195,6 +196,17 @@ def get_redis_host_port():
195196

196197

197198
def get_redis_endpoint(db_index: int = 0) -> str:
199+
if infra_config().redis_aws_secret_name is not None:
200+
logger.info("Using infra_config().redis_aws_secret_name for Redis endpoint")
201+
creds = get_key_file(infra_config().redis_aws_secret_name) # Use default role
202+
scheme = creds.get("scheme", "redis://")
203+
host = creds["host"]
204+
port = creds["port"]
205+
query_params = creds.get("query_params", "")
206+
auth_token = creds.get("auth_token", None)
207+
if auth_token is not None:
208+
return f"{scheme}:{auth_token}@{host}:{port}/{db_index}{query_params}"
209+
return f"{scheme}{host}:{port}/{db_index}{query_params}"
198210
host, port = get_redis_host_port()
199211
auth_token = os.getenv("REDIS_AUTH_TOKEN")
200212
if auth_token:

model-engine/model_engine_server/core/config.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,9 @@ class InfraConfig:
3838
default_region: str
3939
ml_account_id: str
4040
docker_repo_prefix: str
41-
redis_host: str
4241
s3_bucket: str
42+
redis_host: Optional[str] = None
43+
redis_aws_secret_name: Optional[str] = None
4344
profile_ml_worker: str = "default"
4445
profile_ml_inference_worker: str = "default"
4546
identity_service_url: Optional[str] = None

0 commit comments

Comments
 (0)