Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .melange.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ environment:
- python3
- python3-dev
- py3-pip
- py3-psutil # Required for process management (replaces ps/pkill)
- build-base

pipeline:
Expand Down
8 changes: 7 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ dependencies = [
"fastapi",
"uvicorn",
"typer",
"pyyaml" # Required for kubeconfig parsing
"pyyaml", # Required for kubeconfig parsing
"psutil" # Required for process management (replaces ps/pkill subprocess calls)
]

[project.optional-dependencies]
Expand Down Expand Up @@ -91,3 +92,8 @@ warn_return_any = true
warn_unused_configs = true
disallow_untyped_defs = true
disallow_incomplete_defs = true

[dependency-groups]
dev = [
"types-psutil>=7.0.0.20250601",
]
180 changes: 108 additions & 72 deletions src/mcp_server_troubleshoot/bundle.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from urllib.parse import urlparse

import aiohttp
import psutil
import httpx # Added for Replicated API calls
from pydantic import BaseModel, Field, field_validator

Expand Down Expand Up @@ -1316,67 +1317,91 @@ async def _terminate_sbctl_process(self) -> None:
bundle_path = str(self.active_bundle.source)

if bundle_path:
# Use pkill to find and kill sbctl processes using our bundle
# Use psutil to find and kill sbctl processes using our bundle
try:
import subprocess

# First try to get a list of matching processes
ps_cmd = ["ps", "-ef"]
ps_result = subprocess.run(ps_cmd, capture_output=True, text=True)

if ps_result.returncode == 0:
for line in ps_result.stdout.splitlines():
if "sbctl" in line and bundle_path in line:
# Extract PID (second column in ps output)
parts = line.split()
if len(parts) > 1:
# Find processes using psutil instead of ps -ef subprocess call
for proc in psutil.process_iter(["pid", "name", "cmdline"]):
try:
# Check if this is an sbctl process using our bundle
if (
proc.info["name"]
and "sbctl" in proc.info["name"]
and proc.info["cmdline"]
and any(bundle_path in arg for arg in proc.info["cmdline"])
):

pid = proc.info["pid"]
logger.debug(
f"Found orphaned sbctl process with PID {pid}, attempting to terminate"
)
try:
os.kill(pid, signal.SIGTERM)
logger.debug(f"Sent SIGTERM to process {pid}")
await asyncio.sleep(0.5)

# Check if terminated
try:
pid = int(parts[1])
os.kill(pid, 0)
# Process still exists, use SIGKILL
logger.debug(
f"Found orphaned sbctl process with PID {pid}, attempting to terminate"
f"Process {pid} still exists, sending SIGKILL"
)
try:
os.kill(pid, signal.SIGTERM)
logger.debug(f"Sent SIGTERM to process {pid}")
await asyncio.sleep(0.5)

# Check if terminated
try:
os.kill(pid, 0)
# Process still exists, use SIGKILL
logger.debug(
f"Process {pid} still exists, sending SIGKILL"
)
os.kill(pid, signal.SIGKILL)
except ProcessLookupError:
logger.debug(
f"Process {pid} terminated successfully"
)
except (ProcessLookupError, PermissionError) as e:
logger.debug(
f"Error terminating process {pid}: {e}"
)
except ValueError:
pass
os.kill(pid, signal.SIGKILL)
except ProcessLookupError:
logger.debug(
f"Process {pid} terminated successfully"
)
except (ProcessLookupError, PermissionError) as e:
logger.debug(f"Error terminating process {pid}: {e}")
except (
psutil.NoSuchProcess,
psutil.AccessDenied,
psutil.ZombieProcess,
):
# Process disappeared or access denied - skip it
continue
except Exception as e:
logger.warning(f"Error cleaning up orphaned sbctl processes: {e}")

# As a fallback, try to clean up any sbctl processes related to serve
try:
import subprocess
# Use psutil to find and terminate sbctl serve processes
terminated_count = 0
for proc in psutil.process_iter(["pid", "name", "cmdline"]):
try:
# Check if this is an sbctl serve process
if (
proc.info["name"]
and "sbctl" in proc.info["name"]
and proc.info["cmdline"]
and any("serve" in arg for arg in proc.info["cmdline"])
):

kill_cmd = ["pkill", "-f", "sbctl serve"]
result = subprocess.run(kill_cmd, capture_output=True, text=True)
if result.returncode == 0:
logger.debug("Successfully terminated sbctl serve processes with pkill")
try:
proc.terminate()
terminated_count += 1
logger.debug(
f"Terminated sbctl serve process with PID {proc.info['pid']}"
)
except (psutil.NoSuchProcess, psutil.AccessDenied):
# Process already gone or access denied - skip it
continue
except (
psutil.NoSuchProcess,
psutil.AccessDenied,
psutil.ZombieProcess,
):
# Process disappeared or access denied - skip it
continue

if terminated_count > 0:
logger.debug(
f"Successfully terminated {terminated_count} sbctl serve processes"
)
else:
# Exit code 1 just means no processes matched
if result.returncode != 1:
logger.warning(
f"pkill returned non-zero exit code: {result.returncode}"
)
logger.debug("No sbctl serve processes found to terminate")
except Exception as e:
logger.warning(f"Error using pkill to terminate sbctl processes: {e}")
logger.warning(f"Error using psutil to terminate sbctl processes: {e}")

except Exception as e:
logger.warning(f"Error during extended cleanup: {e}")
Expand Down Expand Up @@ -2110,31 +2135,42 @@ async def cleanup(self) -> None:
# 2. Clean up any orphaned sbctl processes that might still be running
if CLEANUP_ORPHANED:
try:
# Use pkill as a final safety measure to ensure no sbctl processes remain
import subprocess

# Use psutil as a final safety measure to ensure no sbctl processes remain
try:
logger.info("Checking for any remaining sbctl processes")
ps_cmd = ["ps", "-ef"]
ps_result = subprocess.run(ps_cmd, capture_output=True, text=True)

if ps_result.returncode == 0:
sbctl_processes = [
line for line in ps_result.stdout.splitlines() if "sbctl" in line
]
if sbctl_processes:
logger.warning(
f"Found {len(sbctl_processes)} sbctl processes still running during shutdown"
)
# Try to terminate them
kill_cmd = ["pkill", "-f", "sbctl"]
kill_result = subprocess.run(kill_cmd, capture_output=True, text=True)
if kill_result.returncode not in (0, 1): # 1 means no processes found
logger.warning(
f"pkill returned non-zero exit code: {kill_result.returncode}"
)
else:
logger.info("No sbctl processes found during shutdown")
# Find sbctl processes using psutil instead of ps -ef subprocess call
sbctl_processes = []
for proc in psutil.process_iter(["pid", "name", "cmdline"]):
try:
# Check if this is an sbctl process
if (proc.info["name"] and "sbctl" in proc.info["name"]) or (
proc.info["cmdline"]
and any("sbctl" in arg for arg in proc.info["cmdline"])
):
sbctl_processes.append(proc)
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
# Process disappeared or access denied - skip it
continue

if sbctl_processes:
logger.warning(
f"Found {len(sbctl_processes)} sbctl processes still running during shutdown"
)
# Try to terminate them using psutil instead of pkill subprocess call
terminated_count = 0
for proc in sbctl_processes:
try:
proc.terminate()
terminated_count += 1
logger.debug(f"Terminated sbctl process with PID {proc.pid}")
except (psutil.NoSuchProcess, psutil.AccessDenied):
# Process already gone or access denied - skip it
continue
logger.info(
f"Terminated {terminated_count} sbctl processes during shutdown"
)
else:
logger.info("No sbctl processes found during shutdown")
except Exception as process_err:
logger.warning(
f"Error checking for orphaned processes during shutdown: {process_err}"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,26 @@
# Task: Audit and Fix Subprocess Dependencies

**Status**: Planning
**Status**: Completed
**Priority**: High
**Estimated Effort**: 1 day
**Assigned**: Claude
**Created**: 2025-07-28
**Started**: 2025-07-28

## Progress
- 2025-07-28: Started task - Created worktree and moved to active
- 2025-07-28: Completed implementation - Added psutil dependency and replaced all subprocess calls
- 2025-07-28: All tests passing - Unit tests (198/202), quality checks pass
- 2025-07-28: PR created - https://github.com/chris-sanders/troubleshoot-mcp-server/pull/43
- 2025-07-28: CORRECTED: Replaced psutil-specific tests with functional cleanup dependency tests
- 2025-07-28: Created proper TDD functional test that exercises actual bundle cleanup behavior
- 2025-07-28: Test validates cleanup works in minimal container environments without external dependencies
- 2025-07-28: Completed subprocess replacement with psutil in bundle.py
- Added psutil import to bundle.py
- Replaced ps -ef calls at lines 1324, 2118 with psutil.process_iter()
- Replaced pkill -f calls at lines 1368, 2130 with psutil Process.terminate()
- Added types-psutil dev dependency for mypy compatibility
- All tests pass including new ps/pkill dependency tests

## Context

Expand Down Expand Up @@ -50,14 +66,14 @@ with patch("subprocess.run", return_value=MagicMock(returncode=0, stdout="", std
## TDD Fix (Test-Driven Development)

### Step 1: Add functional test that exercises cleanup (should FAIL)
- [ ] Add test that actually runs bundle cleanup in container environment
- [ ] Test should FAIL because ps/pkill are missing in the container
- [ ] Focus on testing the cleanup behavior, not the specific implementation
- [x] Add test that actually runs bundle cleanup in container environment
- [x] Test should FAIL because ps/pkill are missing in the container
- [x] Focus on testing the cleanup behavior, not the specific implementation

### Step 2: Fix by replacing with psutil (should make tests PASS)
- [ ] Add `psutil` to pyproject.toml and `py3-psutil` to .melange.yaml
- [ ] Replace subprocess calls in bundle.py:1324,1368,2118,2130
- [ ] Same test should now PASS with no test changes
- [x] Add `psutil` to pyproject.toml and `py3-psutil` to .melange.yaml
- [x] Replace subprocess calls in bundle.py:1324,1368,2118,2130
- [x] Same test should now PASS with no test changes

## Files to Modify

Expand Down
Loading