Skip to content

Commit 5de7187

Browse files
authored
enable sensitive log mode (#415)
Enable sensitive log mode
1 parent f3acf03 commit 5de7187

File tree

7 files changed

+39
-18
lines changed

7 files changed

+39
-18
lines changed

charts/model-engine/values_circleci.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ config:
146146
s3_file_llm_fine_tune_repository: "s3://$CIRCLECI_AWS_S3_BUCKET/fine_tune_repository"
147147
dd_trace_enabled: false
148148
istio_enabled: true
149+
sensitive_log_mode: false
149150
tgi_repository: "text-generation-inference"
150151
vllm_repository: "vllm"
151152
lightllm_repository: "lightllm"

charts/model-engine/values_sample.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ config:
163163
# dd_trace_enabled specifies whether to enable datadog tracing, datadog must be installed in the cluster
164164
dd_trace_enabled: false
165165
istio_enabled: true
166+
sensitive_log_mode: false
166167

167168
# Asynchronous endpoints configs (coming soon)
168169
sqs_profile: default

model-engine/model_engine_server/api/llms_v1.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
get_external_interfaces_read_only,
1313
verify_authentication,
1414
)
15+
from model_engine_server.common.config import hmi_config
1516
from model_engine_server.common.dtos.llms import (
1617
CancelFineTuneResponse,
1718
CompletionStreamV1Request,
@@ -307,9 +308,10 @@ async def create_completion_sync_task(
307308
"""
308309
Runs a sync prompt completion on an LLM.
309310
"""
310-
logger.info(
311-
f"POST /completion_sync with {request} to endpoint {model_endpoint_name} for {auth}"
312-
)
311+
if not hmi_config.sensitive_log_mode:
312+
logger.info(
313+
f"POST /completion_sync with {request} to endpoint {model_endpoint_name} for {auth}"
314+
)
313315
try:
314316
use_case = CompletionSyncV1UseCase(
315317
model_endpoint_service=external_interfaces.model_endpoint_service,
@@ -369,9 +371,10 @@ async def create_completion_stream_task(
369371
"""
370372
Runs a stream prompt completion on an LLM.
371373
"""
372-
logger.info(
373-
f"POST /completion_stream with {request} to endpoint {model_endpoint_name} for {auth}"
374-
)
374+
if not hmi_config.sensitive_log_mode: # pragma: no cover
375+
logger.info(
376+
f"POST /completion_stream with {request} to endpoint {model_endpoint_name} for {auth}"
377+
)
375378
use_case = CompletionStreamV1UseCase(
376379
model_endpoint_service=external_interfaces.model_endpoint_service,
377380
llm_model_endpoint_service=external_interfaces.llm_model_endpoint_service,

model-engine/model_engine_server/common/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ class HostedModelInferenceServiceConfig:
6262
user_inference_pytorch_repository: str
6363
user_inference_tensorflow_repository: str
6464
docker_image_layer_cache_repository: str
65+
sensitive_log_mode: bool
6566

6667
@classmethod
6768
def from_yaml(cls, yaml_path):

model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -682,6 +682,9 @@ async def create_vllm_bundle(
682682
else:
683683
raise InvalidRequestException(f"Quantization {quantize} is not supported by vLLM.")
684684

685+
if hmi_config.sensitive_log_mode: # pragma: no cover
686+
subcommands[-1] = subcommands[-1] + " --disable-log-requests"
687+
685688
command = [
686689
"/bin/bash",
687690
"-c",

model-engine/service_configs/service_config_circleci.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ s3_file_llm_fine_tune_repository: "s3://model-engine-integration-tests/fine_tune
5454

5555
dd_trace_enabled: false
5656
istio_enabled: true
57+
sensitive_log_mode: false
5758
tgi_repository: "text-generation-inference"
5859
vllm_repository: "vllm"
5960
lightllm_repository: "lightllm"

model-engine/tests/unit/api/test_llms.py

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import json
2-
import re
32
from typing import Any, Dict, Tuple
3+
from unittest import mock
44

55
import pytest
66
from model_engine_server.common.dtos.llms import GetLLMModelEndpointV1Response
@@ -156,12 +156,14 @@ def test_completion_sync_endpoint_not_found_returns_404(
156156
assert response_1.status_code == 404
157157

158158

159+
# When enabling this test, other tests fail with "RunTumeError got Future <Future pending> attached to a different loop"
160+
# https://github.com/encode/starlette/issues/1315#issuecomment-980784457
159161
@pytest.mark.skip(reason="Need to figure out FastAPI test client asyncio funkiness")
160162
def test_completion_stream_success(
161163
llm_model_endpoint_streaming: ModelEndpoint,
162164
completion_stream_request: Dict[str, Any],
163165
get_test_client_wrapper,
164-
):
166+
): # pragma: no cover
165167
client = get_test_client_wrapper(
166168
fake_docker_repository_image_always_exists=True,
167169
fake_model_bundle_repository_contents={},
@@ -175,19 +177,28 @@ def test_completion_stream_success(
175177
fake_batch_job_progress_gateway_contents={},
176178
fake_docker_image_batch_job_bundle_repository_contents={},
177179
)
178-
response_1 = client.post(
179-
f"/v1/llm/completions-stream?model_endpoint_name={llm_model_endpoint_streaming.record.name}",
180-
auth=("no_user", ""),
181-
json=completion_stream_request,
182-
stream=True,
183-
)
180+
with mock.patch(
181+
"model_engine_server.domain.use_cases.llm_model_endpoint_use_cases.count_tokens",
182+
return_value=5,
183+
):
184+
response_1 = client.post(
185+
f"/v1/llm/completions-stream?model_endpoint_name={llm_model_endpoint_streaming.record.name}",
186+
auth=("no_user", ""),
187+
json=completion_stream_request,
188+
stream=True,
189+
)
184190
assert response_1.status_code == 200
185191
count = 0
186192
for message in response_1:
187-
assert re.fullmatch(
188-
'data: {"request_id"}: ".*", "output": null}\r\n\r\n',
189-
message.decode("utf-8"),
190-
)
193+
decoded_message = message.decode("utf-8")
194+
assert decoded_message.startswith("data: "), "SSE does not start with 'data: '"
195+
196+
# strip 'data: ' prefix from Server-sent events format
197+
json_str = decoded_message[len("data: ") :]
198+
parsed_data = json.loads(json_str.strip())
199+
assert parsed_data["request_id"] is not None
200+
assert parsed_data["output"] is None
201+
assert parsed_data["error"] is None
191202
count += 1
192203
assert count == 1
193204

0 commit comments

Comments
 (0)