Skip to content

CA-392674: nbd_client_manager retry connect on nbd device busy #6021

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 26 additions & 10 deletions python3/libexec/nbd_client_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
# Don't wait more than 10 minutes for the NBD device
MAX_DEVICE_WAIT_MINUTES = 10

# According to https://github.com/thom311/libnl/blob/main/include/netlink/errno.h#L38
NLE_BUSY = 25

class InvalidNbdDevName(Exception):
"""
Expand Down Expand Up @@ -80,7 +82,7 @@ def __exit__(self, *args):
FILE_LOCK = FileLock(path=LOCK_FILE)


def _call(cmd_args, error=True):
def _call(cmd_args, raise_err=True, log_err=True):
"""
[call cmd_args] executes [cmd_args] and returns the exit code.
If [error] and exit code != 0, log and throws a CalledProcessError.
Expand All @@ -94,14 +96,16 @@ def _call(cmd_args, error=True):

_, stderr = proc.communicate()

if error and proc.returncode != 0:
LOGGER.error(
"%s exited with code %d: %s", " ".join(cmd_args), proc.returncode, stderr
)
if proc.returncode != 0:
if log_err:
LOGGER.error(
"%s exited with code %d: %s", " ".join(cmd_args), proc.returncode, stderr
)

raise subprocess.CalledProcessError(
returncode=proc.returncode, cmd=cmd_args, output=stderr
)
if raise_err:
raise subprocess.CalledProcessError(
returncode=proc.returncode, cmd=cmd_args, output=stderr
)

return proc.returncode

Expand All @@ -116,7 +120,7 @@ def _is_nbd_device_connected(nbd_device):
if not os.path.exists(nbd_device):
raise NbdDeviceNotFound(nbd_device)
cmd = ["nbd-client", "-check", nbd_device]
returncode = _call(cmd, error=False)
returncode = _call(cmd, raise_err=False, log_err=False)
if returncode == 0:
return True
if returncode == 1:
Expand Down Expand Up @@ -191,6 +195,8 @@ def connect_nbd(path, exportname):
"""Connects to a free NBD device using nbd-client and returns its path"""
# We should not ask for too many nbds, as we might not have enough memory
_call(["modprobe", "nbd", "nbds_max=24"])
# Wait for systemd-udevd to process the udev rules
_call(["udevadm", "settle", "--timeout=30"])
retries = 0
while True:
try:
Expand All @@ -206,7 +212,17 @@ def connect_nbd(path, exportname):
"-name",
exportname,
]
_call(cmd)
ret = _call(cmd, raise_err=False, log_err=True)
if NLE_BUSY == ret:
# Although _find_unused_nbd_device tell us the nbd devcie is
# not connected by other nbd-client, it may be opened and locked
# by other process like systemd-udev, raise NbdDeviceNotFound to retry
LOGGER.warning("Device %s is busy, will retry", nbd_device)
raise NbdDeviceNotFound(nbd_device)

if 0 != ret:
raise subprocess.CalledProcessError(returncode=ret, cmd=cmd)

_wait_for_nbd_device(nbd_device=nbd_device, connected=True)
_persist_connect_info(nbd_device, path, exportname)
nbd = (
Expand Down
6 changes: 4 additions & 2 deletions python3/tests/test_nbd_client_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ def test_nbd_device_connected(self, mock_call, mock_exists):
result = nbd_client_manager._is_nbd_device_connected('/dev/nbd0')

self.assertTrue(result)
mock_call.assert_called_once_with(["nbd-client", "-check", "/dev/nbd0"], error=False)
mock_call.assert_called_once_with(["nbd-client", "-check", "/dev/nbd0"],
raise_err=False, log_err=False)

@patch('nbd_client_manager._call')
def test_nbd_device_not_connected(self, mock_call, mock_exists):
Expand All @@ -53,7 +54,8 @@ def test_nbd_device_not_connected(self, mock_call, mock_exists):
result = nbd_client_manager._is_nbd_device_connected('/dev/nbd1')

self.assertFalse(result)
mock_call.assert_called_once_with(["nbd-client", "-check", "/dev/nbd1"], error=False)
mock_call.assert_called_once_with(["nbd-client", "-check", "/dev/nbd1"],
raise_err=False, log_err=False)

def test_nbd_device_not_found(self, mock_exists):
mock_exists.return_value = False
Expand Down
Loading