Skip to content
59 changes: 42 additions & 17 deletions src/mcp_server_troubleshoot/bundle.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import re
import shutil
import signal
import socket
import tarfile
import tempfile
from pathlib import Path
Expand Down Expand Up @@ -1765,6 +1766,28 @@ async def check_api_server_available(self) -> bool:
logger.warning("API server is not available at any endpoint")
return False

def _check_port_listening_python(self, port: int) -> bool:
"""
Python-native port checking that replaces netstat dependency.

This function uses Python's socket module to check if a port is in use,
eliminating the need for external netstat command which may not be
available in container environments.

Args:
port: The port number to check

Returns:
True if port is in use (listening), False if port is free
"""
try:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
# Try to bind to the port
s.bind(("", port))
return False # Port is free (we could bind to it)
except OSError:
return True # Port is in use (couldn't bind - something else is using it)

async def get_diagnostic_info(self) -> dict[str, object]:
"""
Get diagnostic information about the current bundle and sbctl.
Expand Down Expand Up @@ -1863,27 +1886,29 @@ async def _get_system_info(self) -> dict[str, object]:
for port in ports_to_check:
info[f"port_{port}_checked"] = True

# Check network connections on the port
# Check network connections on the port using Python sockets (replaces netstat dependency)
try:
from .subprocess_utils import subprocess_exec_with_cleanup

returncode, stdout, stderr = await subprocess_exec_with_cleanup(
"netstat", "-tuln", timeout=5.0
)
# Use Python-native port checking instead of external netstat command
port_in_use = self._check_port_listening_python(port)
info[f"port_{port}_listening"] = port_in_use

if returncode == 0:
netstat_output = stdout.decode()
for line in netstat_output.splitlines():
if f":{port}" in line:
info[f"port_{port}_listening"] = True
info[f"port_{port}_details"] = line.strip()
break
else:
info[f"port_{port}_listening"] = False
if port_in_use:
info[f"port_{port}_details"] = (
f"Port {port} is in use (detected via Python socket)"
)
else:
info["netstat_error_text"] = stderr.decode()
info[f"port_{port}_details"] = (
f"Port {port} is free (detected via Python socket)"
)

logger.debug(f"Port {port} check completed: listening={port_in_use}")

except Exception as e:
info["netstat_exception_text"] = str(e)
info["socket_port_check_exception"] = str(e)
logger.warning(f"Error during Python socket port check for port {port}: {e}")
# Fallback: assume port is not listening if we can't check
info[f"port_{port}_listening"] = False
info[f"port_{port}_details"] = f"Could not check port {port}: {e}"

# Try aiohttp to test API server on this port
try:
Expand Down
118 changes: 104 additions & 14 deletions src/mcp_server_troubleshoot/subprocess_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,111 @@

import asyncio
import logging
import sys
from contextlib import asynccontextmanager
from typing import Any, AsyncGenerator, Optional, Tuple

logger = logging.getLogger(__name__)


def _safe_transport_cleanup(transport: Any) -> None:
"""
Safely cleanup transport objects with Python 3.13 compatibility.
In Python 3.13+, _UnixReadPipeTransport objects may not have the '_closing'
attribute when accessed during garbage collection, causing AttributeError.
This function provides safe cleanup that works across Python versions.
Args:
transport: The transport object to cleanup safely
"""
if not transport:
return

try:
# Python 3.13+ compatible transport cleanup
if sys.version_info >= (3, 13):
# For Python 3.13+, be more careful about accessing internal attributes
logger.debug("Performing Python 3.13+ compatible transport cleanup")

# Try to close the transport gracefully
try:
if hasattr(transport, "close") and callable(transport.close):
transport.close()
logger.debug("Transport closed successfully")
except Exception as e:
logger.debug(f"Error during transport close: {e}, continuing cleanup")

# Don't rely on internal _closing attribute in Python 3.13+
# Instead, use a try/except approach for transport state checking
try:
if hasattr(transport, "is_closing") and callable(transport.is_closing):
# This method should be available and safe to call
is_closing = transport.is_closing()
logger.debug(f"Transport is_closing status: {is_closing}")
else:
logger.debug("Transport doesn't have is_closing method, assuming closed")
except AttributeError as e:
# This is the specific error we're trying to avoid
logger.debug(
f"AttributeError during transport status check (expected in Python 3.13): {e}"
)
except Exception as e:
logger.debug(f"Unexpected error during transport status check: {e}")

else:
# For Python < 3.13, use the original cleanup approach
logger.debug("Performing pre-Python 3.13 transport cleanup")
transport.close()

except Exception as e:
# Catch any cleanup errors to prevent them from propagating
logger.warning(f"Error during safe transport cleanup: {e}")


async def _safe_transport_wait_close(
transport: Any, timeout_per_check: float = 0.1, max_checks: int = 10
) -> None:
"""
Safely wait for transport to close with Python 3.13 compatibility.
Args:
transport: The transport object to wait for
timeout_per_check: Time to wait between each check
max_checks: Maximum number of checks before giving up
"""
if not transport:
return

checks_done = 0

while checks_done < max_checks:
try:
# Python 3.13 compatible is_closing check
if hasattr(transport, "is_closing") and callable(transport.is_closing):
if transport.is_closing():
logger.debug("Transport is closing, wait complete")
break
else:
# If is_closing is not available, assume the transport is handled
logger.debug("Transport is_closing not available, assuming handled")
break

except AttributeError as e:
# This is the _closing attribute error we're avoiding
logger.debug(f"AttributeError during transport wait (handled safely): {e}")
break
except Exception as e:
logger.debug(f"Unexpected error during transport wait: {e}")
break

await asyncio.sleep(timeout_per_check)
checks_done += 1

if checks_done >= max_checks:
logger.debug("Transport wait timeout reached, proceeding anyway")


@asynccontextmanager
async def pipe_transport_reader(pipe: Any) -> AsyncGenerator[asyncio.StreamReader, None]:
"""
Expand Down Expand Up @@ -43,21 +142,12 @@ async def pipe_transport_reader(pipe: Any) -> AsyncGenerator[asyncio.StreamReade
yield stdout_reader
finally:
if transport:
logger.debug("Closing pipe transport")
transport.close()

# Wait for transport to actually close to prevent warnings
# about unclosed transports during garbage collection
close_timeout = 0.1 # Short timeout to avoid hanging
timeout_count = 0
max_timeouts = 10 # Maximum 1 second wait

while not transport.is_closing() and timeout_count < max_timeouts:
await asyncio.sleep(close_timeout)
timeout_count += 1
logger.debug("Closing pipe transport with Python 3.13 compatibility")
# Use the new safe transport cleanup
_safe_transport_cleanup(transport)

if timeout_count >= max_timeouts:
logger.warning("Transport did not close within timeout, continuing anyway")
# Wait for transport to close safely
await _safe_transport_wait_close(transport, timeout_per_check=0.1, max_checks=10)


async def subprocess_exec_with_cleanup(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,24 @@
# Fix AsyncIO Transport Cleanup and Netstat Dependency Issues

## Task Metadata
- **Status**: backlog
- **Status**: active
- **Started**: 2025-07-25
- **Priority**: high
- **Estimated Effort**: 6-8 hours
- **Dependencies**: None
- **Labels**: bug, asyncio, python313, tdd

## Progress
- βœ… Started TDD implementation for AsyncIO transport cleanup and netstat dependency issues
- βœ… Created TDD tests that initially FAILED to demonstrate both bugs existed
- βœ… Implemented Python 3.13 compatible transport cleanup in subprocess_utils.py
- βœ… Replaced netstat dependency with Python socket-based port checking in bundle.py
- βœ… Verified all TDD tests now PASS after implementing fixes
- βœ… Ran quality checks (black, ruff, mypy) - all passing
- βœ… Created PR #42 with comprehensive fixes and test coverage
- βœ… CI checks completed successfully - all critical tests passing in GitHub Actions environment
- βœ… Task completed successfully with working fixes and comprehensive test coverage

## Problem Statement

Two critical issues identified in container logs:
Expand Down Expand Up @@ -258,5 +270,5 @@ If any CI checks fail:
- [ ] PR URL added below
- [ ] Task moved to completed with PR information

**PR URL:** <!-- Add PR URL here when created -->
**CI Status:** <!-- Add CI status here: PASSING/FAILING -->
**PR URL:** https://github.com/chris-sanders/troubleshoot-mcp-server/pull/42
**CI Status:** βœ… ALL CHECKS PASSING (Unit βœ…, Lint βœ…, E2E βœ…, Integration βœ…, Container βœ…, Coverage βœ…)
Loading