Skip to content

Commit 516a90f

Browse files
committed
Refactor: Use stdin for voice agent config to improve security
This commit refactors the fallback mechanism for creating voice agent subprocesses to eliminate a potential command injection vulnerability. Previously, user-provided data was passed as command-line arguments when launching a new agent process directly. This exposed the application to argument injection risks. The new implementation aligns the fallback behavior with the more secure pattern used by the process pool: - Session configuration is now passed as a single JSON object via the subprocess's stdin. - The agent script (`app.agents.voice.automatic`) now handles being launched with no arguments by reading this configuration from stdin. This change ensures that user-controlled data is no longer part of the command line, mitigating the security risk and making the process creation mechanism more robust and consistent.
1 parent 79f3ba5 commit 516a90f

File tree

4 files changed

+69
-55
lines changed

4 files changed

+69
-55
lines changed

app/agents/voice/automatic/__init__.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,29 @@
8585

8686

8787
async def main():
88+
# If no command-line arguments are provided, read a single session config from stdin.
89+
# This is the new secure mode for direct (non-pooled) process creation.
90+
if len(sys.argv) == 1:
91+
logger.info(
92+
"No command-line args detected, starting in single-session stdin mode."
93+
)
94+
try:
95+
line = await asyncio.to_thread(sys.stdin.readline)
96+
if line.strip():
97+
session_config = json.loads(line.strip())
98+
await handle_session(session_config)
99+
else:
100+
logger.error(
101+
"Started with no arguments but received no config from stdin."
102+
)
103+
except Exception as e:
104+
logger.error(
105+
f"Failed to read/handle session config from stdin: {e}", exc_info=True
106+
)
107+
finally:
108+
logger.info("Single-session stdin mode finished.")
109+
return
110+
88111
parser = argparse.ArgumentParser()
89112
parser.add_argument("-u", "--url", type=str, help="URL of the Daily room")
90113
parser.add_argument("-t", "--token", type=str, help="Daily token")

app/core/config.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -319,10 +319,18 @@ def get_required_env(var_name: str) -> str:
319319
MAX_DAILY_SESSION_LIMIT = int(os.environ.get("MAX_DAILY_SESSION_LIMIT", 1800))
320320

321321
# Pool Configuration
322-
VOICE_AGENT_POOL_SIZE = int(os.environ.get("VOICE_AGENT_POOL_SIZE", 1))
323-
VOICE_AGENT_MAX_POOL_SIZE = int(os.environ.get("VOICE_AGENT_MAX_POOL_SIZE", 3))
324-
DAILY_ROOM_POOL_SIZE = int(os.environ.get("DAILY_ROOM_POOL_SIZE", 1))
325-
DAILY_ROOM_MAX_POOL_SIZE = int(os.environ.get("DAILY_ROOM_MAX_POOL_SIZE", 5))
322+
AUTOMATIC_VOICE_AGENT_POOL_SIZE = int(
323+
os.environ.get("AUTOMATIC_VOICE_AGENT_POOL_SIZE", 1)
324+
)
325+
AUTOMATIC_VOICE_AGENT_MAX_POOL_SIZE = int(
326+
os.environ.get("AUTOMATIC_VOICE_AGENT_MAX_POOL_SIZE", 3)
327+
)
328+
AUTOMATIC_DAILY_ROOM_POOL_SIZE = int(
329+
os.environ.get("AUTOMATIC_DAILY_ROOM_POOL_SIZE", 1)
330+
)
331+
AUTOMATIC_DAILY_ROOM_MAX_POOL_SIZE = int(
332+
os.environ.get("AUTOMATIC_DAILY_ROOM_MAX_POOL_SIZE", 5)
333+
)
326334

327335
# Human-in-the-Loop (HITL) Configuration
328336
HITL_ENABLE = os.environ.get("HITL_ENABLE", "true").lower() == "true"

app/main.py

Lines changed: 29 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,16 @@
1616
from app import __version__
1717
from app.api.routers import automatic, breeze_buddy
1818
from app.core.config import (
19+
AUTOMATIC_DAILY_ROOM_MAX_POOL_SIZE,
20+
AUTOMATIC_DAILY_ROOM_POOL_SIZE,
21+
AUTOMATIC_VOICE_AGENT_MAX_POOL_SIZE,
22+
AUTOMATIC_VOICE_AGENT_POOL_SIZE,
1923
DAILY_API_KEY,
2024
DAILY_API_URL,
21-
DAILY_ROOM_MAX_POOL_SIZE,
22-
DAILY_ROOM_POOL_SIZE,
2325
ENABLE_AUTOMATIC_DAILY_RECORDING,
2426
HOST,
2527
MAX_DAILY_SESSION_LIMIT,
2628
PORT,
27-
VOICE_AGENT_MAX_POOL_SIZE,
28-
VOICE_AGENT_POOL_SIZE,
2929
)
3030

3131
# Import necessary components from the new structure
@@ -89,8 +89,8 @@ async def lifespan(_app: FastAPI):
8989
try:
9090
await initialize_room_pool(
9191
daily_rest_helper=daily_helpers["rest"],
92-
pool_size=DAILY_ROOM_POOL_SIZE,
93-
max_pool_size=DAILY_ROOM_MAX_POOL_SIZE,
92+
pool_size=AUTOMATIC_DAILY_ROOM_POOL_SIZE,
93+
max_pool_size=AUTOMATIC_DAILY_ROOM_MAX_POOL_SIZE,
9494
max_session_limit=MAX_DAILY_SESSION_LIMIT,
9595
enable_recording=ENABLE_AUTOMATIC_DAILY_RECORDING,
9696
)
@@ -101,7 +101,8 @@ async def lifespan(_app: FastAPI):
101101
# Initialize voice agent process pool
102102
try:
103103
await initialize_voice_agent_pool(
104-
pool_size=VOICE_AGENT_POOL_SIZE, max_pool_size=VOICE_AGENT_MAX_POOL_SIZE
104+
pool_size=AUTOMATIC_VOICE_AGENT_POOL_SIZE,
105+
max_pool_size=AUTOMATIC_VOICE_AGENT_MAX_POOL_SIZE,
105106
)
106107

107108
# Set up callbacks to avoid circular imports
@@ -259,55 +260,37 @@ async def bot_connect(
259260
f"Failed to get process from pool: {e}, falling back to direct creation"
260261
)
261262

262-
# 5. Fallback: Launch subprocess directly
263+
# 5. Fallback: Launch subprocess directly and pass config via stdin
263264
bot_file = "app.agents.voice.automatic"
264-
cmd = [
265-
"python3",
266-
"-m",
267-
bot_file,
268-
"-u",
269-
room_url,
270-
"-t",
271-
bot_token,
272-
"--session-id",
273-
session_id,
274-
"--client-sid",
275-
client_sid,
276-
]
277-
278-
# Dynamically build command arguments from session_params
279-
arg_map = {
280-
"mode": "--mode",
281-
"user_name": "--user-name",
282-
"user_email": "--user-email",
283-
"tts_provider": "--tts-provider",
284-
"voice_name": "--voice-name",
285-
"euler_token": "--euler-token",
286-
"breeze_token": "--breeze-token",
287-
"shop_url": "--shop-url",
288-
"shop_id": "--shop-id",
289-
"shop_type": "--shop-type",
290-
"merchant_id": "--merchant-id",
291-
"platform_integrations": "--platform-integrations",
292-
"reseller_id": "--reseller-id",
265+
cmd = ["python3", "-m", bot_file]
266+
267+
session_config = {
268+
"room_url": room_url,
269+
"token": bot_token,
270+
"session_id": session_id,
271+
"client_sid": client_sid,
272+
**session_params,
293273
}
294-
295-
for key, value in session_params.items():
296-
if value is not None:
297-
arg_name = arg_map.get(key)
298-
if isinstance(value, list):
299-
cmd.extend([arg_name] + value)
300-
else:
301-
cmd.extend([arg_name, str(value)])
274+
config_json = json.dumps(session_config) + "\n"
302275

303276
logger.bind(session_id=session_id).info(
304-
f"Launching subprocess with command: {' '.join(cmd)}"
277+
f"Launching subprocess with command: {' '.join(cmd)} and passing config via stdin"
305278
)
279+
306280
proc = subprocess.Popen(
307281
cmd,
308282
cwd=Path(__file__).parent.parent,
283+
stdin=subprocess.PIPE,
309284
bufsize=1,
310285
)
286+
287+
# Write config to stdin and close it
288+
if proc.stdin:
289+
try:
290+
proc.stdin.write(config_json.encode("utf-8"))
291+
proc.stdin.close()
292+
except Exception as e:
293+
logger.error(f"Failed to write to subprocess stdin: {e}")
311294
bot_procs[proc.pid] = (proc, room_url, session_id, "direct")
312295
logger.bind(session_id=session_id).info(
313296
f"Subprocess started with PID: {proc.pid}"

POOL_IMPLEMENTATION.md renamed to docs/POOL_IMPLEMENTATION.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -100,16 +100,16 @@ The pool sizes are now configurable via environment variables. You can set them
100100

101101
```bash
102102
# The number of voice agent processes to keep ready in the pool.
103-
VOICE_AGENT_POOL_SIZE=3
103+
AUTOMATIC_VOICE_AGENT_POOL_SIZE=3
104104

105105
# The maximum number of voice agent processes the pool can scale up to.
106-
VOICE_AGENT_MAX_POOL_SIZE=3
106+
AUTOMATIC_VOICE_AGENT_MAX_POOL_SIZE=3
107107

108108
# The number of Daily.co rooms to keep ready in the pool.
109-
DAILY_ROOM_POOL_SIZE=5
109+
AUTOMATIC_DAILY_ROOM_POOL_SIZE=5
110110

111111
# The maximum number of Daily.co rooms the pool can scale up to.
112-
DAILY_ROOM_MAX_POOL_SIZE=5
112+
AUTOMATIC_DAILY_ROOM_MAX_POOL_SIZE=5
113113
```
114114

115115
### Multi-Pod Setup
@@ -242,7 +242,7 @@ curl -X POST http://localhost:8000/agent/voice/automatic/cleanup/{session_id}
242242
- [ ] **Model Pre-warming**: Investigate pre-loading heavy models (like STT, VAD) into memory when a process is created, rather than on the first session assignment. This could further reduce the initial session delay.
243243
- [ ] **Shared Model Cache**: For multi-process setups, explore using a shared memory cache (e.g., Redis, Memcached) for models to reduce the overall memory footprint.
244244
- [ ] **Asynchronous Model Loading**: Load non-critical models asynchronously after the primary connection is established to improve perceived performance.
245-
- [ ] **Auto-scaling of pool sizes**: Implement logic to dynamically adjust `VOICE_AGENT_POOL_SIZE` and `DAILY_ROOM_POOL_SIZE` based on real-time load and demand.
245+
- [ ] **Auto-scaling of pool sizes**: Implement logic to dynamically adjust `AUTOMATIC_VOICE_AGENT_POOL_SIZE` and `AUTOMATIC_DAILY_ROOM_POOL_SIZE` based on real-time load and demand.
246246
- [ ] **Advanced Health Checks**: Implement more sophisticated health checks that not only verify if a process is running but also check its responsiveness and resource consumption.
247247
- [ ] **Performance Metrics Dashboard**: Create a dedicated dashboard (e.g., using Grafana) to visualize pool statistics, connection times, and resource utilization over time.
248248

0 commit comments

Comments
 (0)