Skip to content

nvme: Print controller GUID in shutdown_nvme_driver span #1619

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -993,6 +993,7 @@ async fn make_disk_type_from_physical_device(
// We can't validate yet that this namespace actually exists. That will
// be checked later.
return Ok(Resource::new(NvmeDiskConfig {
controller_instance_id: controller_instance_id.to_string(),
pci_id,
nsid: sub_device_path,
}));
Expand Down
40 changes: 29 additions & 11 deletions openhcl/underhill_core/src/nvme_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ impl NvmeManager {
enum Request {
Inspect(inspect::Deferred),
ForceLoadDriver(inspect::DeferredUpdate),
GetNamespace(Rpc<(String, u32), Result<nvme_driver::Namespace, NamespaceError>>),
GetNamespace(Rpc<(String, String, u32), Result<nvme_driver::Namespace, NamespaceError>>),
Save(Rpc<(), Result<NvmeManagerSavedState, anyhow::Error>>),
Shutdown {
span: tracing::Span,
Expand All @@ -191,13 +191,22 @@ pub struct NvmeManagerClient {
impl NvmeManagerClient {
pub async fn get_namespace(
&self,
controller_instance_id: String,
pci_id: String,
nsid: u32,
) -> anyhow::Result<nvme_driver::Namespace> {
Ok(self
.sender
.call(Request::GetNamespace, (pci_id.clone(), nsid))
.instrument(tracing::info_span!("nvme_get_namespace", pci_id, nsid))
.call(
Request::GetNamespace,
(controller_instance_id.clone(), pci_id.clone(), nsid),
)
.instrument(tracing::info_span!(
"nvme_get_namespace",
controller_instance_id,
pci_id,
nsid
))
.await
.context("nvme manager is shut down")??)
}
Expand Down Expand Up @@ -235,7 +244,7 @@ impl NvmeManagerWorker {
match req {
Request::Inspect(deferred) => deferred.inspect(&self),
Request::ForceLoadDriver(update) => {
match self.get_driver(update.new_value().to_owned()).await {
match self.get_driver(None, update.new_value().to_owned()).await {
Ok(_) => {
let pci_id = update.new_value().to_string();
update.succeed(pci_id);
Expand All @@ -246,8 +255,8 @@ impl NvmeManagerWorker {
}
}
Request::GetNamespace(rpc) => {
rpc.handle(async |(pci_id, nsid)| {
self.get_namespace(pci_id.clone(), nsid)
rpc.handle(async |(controller_instance_id, pci_id, nsid)| {
self.get_namespace(controller_instance_id, pci_id.clone(), nsid)
.map_err(|source| NamespaceError { pci_id, source })
.await
})
Expand Down Expand Up @@ -283,9 +292,12 @@ impl NvmeManagerWorker {
if !nvme_keepalive || !self.save_restore_supported {
async {
join_all(self.devices.drain().map(|(pci_id, driver)| {
driver
.shutdown()
.instrument(tracing::info_span!("shutdown_nvme_driver", pci_id))
let controller_instance_id = driver.controller_instance_id();
driver.shutdown().instrument(tracing::info_span!(
"shutdown_nvme_driver",
pci_id,
controller_instance_id
))
}))
.await
}
Expand All @@ -296,6 +308,7 @@ impl NvmeManagerWorker {

async fn get_driver(
&mut self,
controller_instance_id: Option<String>,
pci_id: String,
) -> Result<&mut nvme_driver::NvmeDriver<VfioDevice>, InnerError> {
let driver = match self.devices.entry(pci_id.to_owned()) {
Expand Down Expand Up @@ -328,6 +341,7 @@ impl NvmeManagerWorker {
self.vp_count,
device,
self.is_isolated,
controller_instance_id,
)
.instrument(tracing::info_span!(
"nvme_driver_init",
Expand All @@ -344,10 +358,13 @@ impl NvmeManagerWorker {

async fn get_namespace(
&mut self,
controller_instance_id: String,
pci_id: String,
nsid: u32,
) -> Result<nvme_driver::Namespace, InnerError> {
let driver = self.get_driver(pci_id.to_owned()).await?;
let driver = self
.get_driver(Some(controller_instance_id), pci_id.to_owned())
.await?;
driver
.namespace(nsid)
.await
Expand Down Expand Up @@ -440,7 +457,7 @@ impl AsyncResolveResource<DiskHandleKind, NvmeDiskConfig> for NvmeDiskResolver {
) -> Result<Self::Output, Self::Error> {
let namespace = self
.manager
.get_namespace(rsrc.pci_id, rsrc.nsid)
.get_namespace(rsrc.controller_instance_id, rsrc.pci_id, rsrc.nsid)
.await
.context("could not open nvme namespace")?;

Expand All @@ -450,6 +467,7 @@ impl AsyncResolveResource<DiskHandleKind, NvmeDiskConfig> for NvmeDiskResolver {

#[derive(MeshPayload, Default)]
pub struct NvmeDiskConfig {
pub controller_instance_id: String,
pub pci_id: String,
pub nsid: u32,
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,14 @@ impl FuzzNvmeDriver {
.unwrap();

let device = FuzzEmulatedDevice::new(nvme, msi_set, mem.dma_client());
let nvme_driver = NvmeDriver::new(&driver_source, cpu_count, device, false).await?; // TODO: [use-arbitrary-input]
let nvme_driver = NvmeDriver::new(
&driver_source,
cpu_count,
device,
false,
Some(guid.to_string()),
)
.await?; // TODO: [use-arbitrary-input]
let namespace = nvme_driver.namespace(1).await?; // TODO: [use-arbitrary-input]

Ok(Self {
Expand Down
26 changes: 23 additions & 3 deletions vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ pub struct NvmeDriver<T: DeviceBacking> {
#[inspect(flatten)]
task: Option<TaskControl<DriverWorkerTask<T>, WorkerState>>,
device_id: String,
controller_instance_id: Option<String>,
identify: Option<Arc<spec::IdentifyController>>,
#[inspect(skip)]
driver: VmTaskDriver,
Expand Down Expand Up @@ -179,11 +180,18 @@ impl<T: DeviceBacking> NvmeDriver<T> {
cpu_count: u32,
device: T,
bounce_buffer: bool,
controller_instance_id: Option<String>,
) -> anyhow::Result<Self> {
let pci_id = device.id().to_owned();
let mut this = Self::new_disabled(driver_source, cpu_count, device, bounce_buffer)
.instrument(tracing::info_span!("nvme_new_disabled", pci_id))
.await?;
let mut this = Self::new_disabled(
driver_source,
cpu_count,
device,
bounce_buffer,
controller_instance_id,
)
.instrument(tracing::info_span!("nvme_new_disabled", pci_id))
.await?;
match this
.enable(cpu_count as u16)
.instrument(tracing::info_span!("nvme_enable", pci_id))
Expand All @@ -208,6 +216,7 @@ impl<T: DeviceBacking> NvmeDriver<T> {
cpu_count: u32,
mut device: T,
bounce_buffer: bool,
controller_instance_id: Option<String>,
) -> anyhow::Result<Self> {
let driver = driver_source.simple();
let bar0 = Bar0(
Expand Down Expand Up @@ -248,6 +257,7 @@ impl<T: DeviceBacking> NvmeDriver<T> {

Ok(Self {
device_id: device.id().to_owned(),
controller_instance_id,
task: Some(TaskControl::new(DriverWorkerTask {
device,
driver: driver.clone(),
Expand Down Expand Up @@ -555,6 +565,7 @@ impl<T: DeviceBacking> NvmeDriver<T> {
// TODO: See the description above, save the vector once resolved.
namespaces: vec![],
worker_data: s,
controller_instance_id: self.controller_instance_id.clone(),
})
}
Err(e) => Err(e),
Expand Down Expand Up @@ -594,6 +605,7 @@ impl<T: DeviceBacking> NvmeDriver<T> {

let mut this = Self {
device_id: device.id().to_owned(),
controller_instance_id: saved_state.controller_instance_id.clone(),
task: Some(TaskControl::new(DriverWorkerTask {
device,
driver: driver.clone(),
Expand Down Expand Up @@ -739,6 +751,11 @@ impl<T: DeviceBacking> NvmeDriver<T> {
pub fn update_servicing_flags(&mut self, nvme_keepalive: bool) {
self.nvme_keepalive = nvme_keepalive;
}

/// Get the controller instance ID, if set.
pub fn controller_instance_id(&self) -> Option<String> {
self.controller_instance_id.clone()
}
}

async fn handle_asynchronous_events(
Expand Down Expand Up @@ -1051,6 +1068,9 @@ pub mod save_restore {
/// NVMe driver worker task data.
#[mesh(4)]
pub worker_data: NvmeDriverWorkerSavedState,
/// Controller instance ID.
#[mesh(5)]
pub controller_instance_id: Option<String>,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why would we need to save this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we don't have this, where does it get set back to its value when we do a restore, since we do not call GetDriver (the only other path to set it)?

}

/// Save/restore state for NVMe driver worker task.
Expand Down
54 changes: 42 additions & 12 deletions vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ async fn test_nvme_ioqueue_max_mqes(driver: DefaultDriver) {
// Controller Driver Setup
let driver_source = VmTaskDriverSource::new(SingleDriverBackend::new(driver));
let mut msi_set = MsiInterruptSet::new();
let nvme_controller_guid = Guid::new_random();
let nvme = nvme::NvmeController::new(
&driver_source,
guest_mem,
Expand All @@ -67,7 +68,7 @@ async fn test_nvme_ioqueue_max_mqes(driver: DefaultDriver) {
NvmeControllerCaps {
msix_count: MSIX_COUNT,
max_io_queues: IO_QUEUE_COUNT,
subsystem_id: Guid::new_random(),
subsystem_id: nvme_controller_guid,
},
);

Expand All @@ -78,7 +79,14 @@ async fn test_nvme_ioqueue_max_mqes(driver: DefaultDriver) {
let cap: Cap = Cap::new().with_mqes_z(max_u16);
device.set_mock_response_u64(Some((0, cap.into())));

let driver = NvmeDriver::new(&driver_source, CPU_COUNT, device, false).await;
let driver = NvmeDriver::new(
&driver_source,
CPU_COUNT,
device,
false,
Some(nvme_controller_guid.to_string()),
)
.await;
assert!(driver.is_ok());
}

Expand All @@ -96,6 +104,7 @@ async fn test_nvme_ioqueue_invalid_mqes(driver: DefaultDriver) {

let driver_source = VmTaskDriverSource::new(SingleDriverBackend::new(driver));
let mut msi_set = MsiInterruptSet::new();
let nvme_controller_guid = Guid::new_random();
let nvme = nvme::NvmeController::new(
&driver_source,
guest_mem,
Expand All @@ -104,7 +113,7 @@ async fn test_nvme_ioqueue_invalid_mqes(driver: DefaultDriver) {
NvmeControllerCaps {
msix_count: MSIX_COUNT,
max_io_queues: IO_QUEUE_COUNT,
subsystem_id: Guid::new_random(),
subsystem_id: nvme_controller_guid,
},
);

Expand All @@ -113,7 +122,14 @@ async fn test_nvme_ioqueue_invalid_mqes(driver: DefaultDriver) {
// Setup mock response at offset 0
let cap: Cap = Cap::new().with_mqes_z(0);
device.set_mock_response_u64(Some((0, cap.into())));
let driver = NvmeDriver::new(&driver_source, CPU_COUNT, device, false).await;
let driver = NvmeDriver::new(
&driver_source,
CPU_COUNT,
device,
false,
Some(nvme_controller_guid.to_string()),
)
.await;

assert!(driver.is_err());
}
Expand All @@ -133,6 +149,7 @@ async fn test_nvme_driver(driver: DefaultDriver, allow_dma: bool) {
// Arrange: Create the NVMe controller and driver.
let driver_source = VmTaskDriverSource::new(SingleDriverBackend::new(driver));
let mut msi_set = MsiInterruptSet::new();
let nvme_controller_guid = Guid::new_random();
let nvme = nvme::NvmeController::new(
&driver_source,
guest_mem.clone(),
Expand All @@ -141,7 +158,7 @@ async fn test_nvme_driver(driver: DefaultDriver, allow_dma: bool) {
NvmeControllerCaps {
msix_count: MSIX_COUNT,
max_io_queues: IO_QUEUE_COUNT,
subsystem_id: Guid::new_random(),
subsystem_id: nvme_controller_guid,
},
);

Expand All @@ -150,9 +167,15 @@ async fn test_nvme_driver(driver: DefaultDriver, allow_dma: bool) {
.await
.unwrap();
let device = NvmeTestEmulatedDevice::new(nvme, msi_set, dma_client.clone());
let driver = NvmeDriver::new(&driver_source, CPU_COUNT, device, false)
.await
.unwrap();
let driver = NvmeDriver::new(
&driver_source,
CPU_COUNT,
device,
false,
Some(nvme_controller_guid.to_string()),
)
.await
.unwrap();
let namespace = driver.namespace(1).await.unwrap();

// Act: Write 1024 bytes of data to disk starting at LBA 1.
Expand Down Expand Up @@ -246,6 +269,7 @@ async fn test_nvme_save_restore_inner(driver: DefaultDriver) {

let driver_source = VmTaskDriverSource::new(SingleDriverBackend::new(driver.clone()));
let mut msi_x = MsiInterruptSet::new();
let nvme_controller_guid = Guid::new_random();
let nvme_ctrl = nvme::NvmeController::new(
&driver_source,
guest_mem.clone(),
Expand All @@ -254,7 +278,7 @@ async fn test_nvme_save_restore_inner(driver: DefaultDriver) {
NvmeControllerCaps {
msix_count: MSIX_COUNT,
max_io_queues: IO_QUEUE_COUNT,
subsystem_id: Guid::new_random(),
subsystem_id: nvme_controller_guid,
},
);

Expand All @@ -266,9 +290,15 @@ async fn test_nvme_save_restore_inner(driver: DefaultDriver) {
.unwrap();

let device = NvmeTestEmulatedDevice::new(nvme_ctrl, msi_x, dma_client.clone());
let mut nvme_driver = NvmeDriver::new(&driver_source, CPU_COUNT, device, false)
.await
.unwrap();
let mut nvme_driver = NvmeDriver::new(
&driver_source,
CPU_COUNT,
device,
false,
Some(nvme_controller_guid.to_string()),
)
.await
.unwrap();
let _ns1 = nvme_driver.namespace(1).await.unwrap();
let saved_state = nvme_driver.save().await.unwrap();
// As of today we do not save namespace data to avoid possible conflict
Expand Down
15 changes: 11 additions & 4 deletions vm/devices/storage/storage_tests/tests/scsidvd_nvme.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ impl ScsiDvdNvmeTest {
let payload_mem = mem.payload_mem();

let mut msi_set = MsiInterruptSet::new();
let nvme_controller_guid = Guid::new_random();
let nvme = NvmeController::new(
&driver_source,
guest_mem.clone(),
Expand All @@ -63,7 +64,7 @@ impl ScsiDvdNvmeTest {
NvmeControllerCaps {
msix_count: MSIX_COUNT,
max_io_queues: IO_QUEUE_COUNT,
subsystem_id: Guid::new_random(),
subsystem_id: nvme_controller_guid,
},
);

Expand All @@ -79,9 +80,15 @@ impl ScsiDvdNvmeTest {
.unwrap();

let device = EmulatedDevice::new(nvme, msi_set, dma_client.clone());
let nvme_driver = NvmeDriver::new(&driver_source, CPU_COUNT, device, false)
.await
.unwrap();
let nvme_driver = NvmeDriver::new(
&driver_source,
CPU_COUNT,
device,
false,
Some(nvme_controller_guid.to_string()),
)
.await
.unwrap();
let namespace = nvme_driver.namespace(1).await.unwrap();
let buf_range = OwnedRequestBuffers::linear(0, 16384, true);
for i in 0..(sector_count / 8) {
Expand Down