diff --git a/openhcl/underhill_core/src/dispatch/vtl2_settings_worker.rs b/openhcl/underhill_core/src/dispatch/vtl2_settings_worker.rs index df03e53bdd..af867f13eb 100644 --- a/openhcl/underhill_core/src/dispatch/vtl2_settings_worker.rs +++ b/openhcl/underhill_core/src/dispatch/vtl2_settings_worker.rs @@ -993,6 +993,7 @@ async fn make_disk_type_from_physical_device( // We can't validate yet that this namespace actually exists. That will // be checked later. return Ok(Resource::new(NvmeDiskConfig { + name: controller_instance_id.to_string(), pci_id, nsid: sub_device_path, })); diff --git a/openhcl/underhill_core/src/emuplat/netvsp.rs b/openhcl/underhill_core/src/emuplat/netvsp.rs index bf517208fb..5e6c027df5 100644 --- a/openhcl/underhill_core/src/emuplat/netvsp.rs +++ b/openhcl/underhill_core/src/emuplat/netvsp.rs @@ -120,7 +120,7 @@ async fn try_create_mana_device( max_sub_channels: u16, dma_client: Arc, ) -> anyhow::Result> { - let device = VfioDevice::new(driver_source, pci_id, dma_client) + let device = VfioDevice::new(driver_source, &pci_id.to_string(), None, dma_client) .await .context("failed to open device")?; diff --git a/openhcl/underhill_core/src/nvme_manager.rs b/openhcl/underhill_core/src/nvme_manager.rs index d49a48c117..a2c366679e 100644 --- a/openhcl/underhill_core/src/nvme_manager.rs +++ b/openhcl/underhill_core/src/nvme_manager.rs @@ -28,6 +28,14 @@ use std::collections::HashMap; use std::collections::hash_map; use thiserror::Error; use tracing::Instrument; + +/// Parameters for getting an NVMe namespace +#[derive(Debug, Clone, MeshPayload)] +pub struct GetNamespaceParams { + pub debug_host_id: String, + pub pci_id: String, + pub nsid: u32, +} use user_driver::vfio::VfioDevice; use vm_resource::AsyncResolveResource; use vm_resource::ResourceId; @@ -175,7 +183,7 @@ impl NvmeManager { enum Request { Inspect(inspect::Deferred), ForceLoadDriver(inspect::DeferredUpdate), - GetNamespace(Rpc<(String, u32), Result>), + GetNamespace(Rpc>), Save(Rpc<(), Result>), Shutdown { span: tracing::Span, @@ -191,13 +199,24 @@ pub struct NvmeManagerClient { impl NvmeManagerClient { pub async fn get_namespace( &self, + debug_host_id: String, pci_id: String, nsid: u32, ) -> anyhow::Result { + let params = GetNamespaceParams { + debug_host_id, + pci_id, + nsid, + }; Ok(self .sender - .call(Request::GetNamespace, (pci_id.clone(), nsid)) - .instrument(tracing::info_span!("nvme_get_namespace", pci_id, nsid)) + .call(Request::GetNamespace, params.clone()) + .instrument(tracing::info_span!( + "nvme_get_namespace", + debug_host_id = params.debug_host_id, + pci_id = params.pci_id, + nsid + )) .await .context("nvme manager is shut down")??) } @@ -235,7 +254,10 @@ impl NvmeManagerWorker { match req { Request::Inspect(deferred) => deferred.inspect(&self), Request::ForceLoadDriver(update) => { - match self.get_driver(update.new_value().to_owned()).await { + match self + .get_driver("force-load".to_string(), update.new_value().to_owned()) + .await + { Ok(_) => { let pci_id = update.new_value().to_string(); update.succeed(pci_id); @@ -246,9 +268,12 @@ impl NvmeManagerWorker { } } Request::GetNamespace(rpc) => { - rpc.handle(async |(pci_id, nsid)| { - self.get_namespace(pci_id.clone(), nsid) - .map_err(|source| NamespaceError { pci_id, source }) + rpc.handle(async |params| { + self.get_namespace(params.debug_host_id, params.pci_id.clone(), params.nsid) + .map_err(|source| NamespaceError { + pci_id: params.pci_id, + source, + }) .await }) .await @@ -283,9 +308,12 @@ impl NvmeManagerWorker { if !nvme_keepalive || !self.save_restore_supported { async { join_all(self.devices.drain().map(|(pci_id, driver)| { - driver - .shutdown() - .instrument(tracing::info_span!("shutdown_nvme_driver", pci_id)) + let name = driver.name(); + driver.shutdown().instrument(tracing::info_span!( + "shutdown_nvme_driver", + pci_id, + name + )) })) .await } @@ -296,6 +324,7 @@ impl NvmeManagerWorker { async fn get_driver( &mut self, + name: String, pci_id: String, ) -> Result<&mut nvme_driver::NvmeDriver, InnerError> { let driver = match self.devices.entry(pci_id.to_owned()) { @@ -315,10 +344,11 @@ impl NvmeManagerWorker { }) .map_err(InnerError::DmaClient)?; - let device = VfioDevice::new(&self.driver_source, entry.key(), dma_client) - .instrument(tracing::info_span!("vfio_device_open", pci_id)) - .await - .map_err(InnerError::Vfio)?; + let device = + VfioDevice::new(&self.driver_source, entry.key(), Some(&name), dma_client) + .instrument(tracing::info_span!("vfio_device_open", pci_id)) + .await + .map_err(InnerError::Vfio)?; // TODO: For now, any isolation means use bounce buffering. This // needs to change when we have nvme devices that support DMA to @@ -328,6 +358,7 @@ impl NvmeManagerWorker { self.vp_count, device, self.is_isolated, + name, ) .instrument(tracing::info_span!( "nvme_driver_init", @@ -344,10 +375,11 @@ impl NvmeManagerWorker { async fn get_namespace( &mut self, + name: String, pci_id: String, nsid: u32, ) -> Result { - let driver = self.get_driver(pci_id.to_owned()).await?; + let driver = self.get_driver(name, pci_id.to_owned()).await?; driver .namespace(nsid) .await @@ -393,10 +425,15 @@ impl NvmeManagerWorker { // This code can wait on each VFIO device until it is arrived. // A potential optimization would be to delay VFIO operation // until it is ready, but a redesign of VfioDevice is needed. - let vfio_device = - VfioDevice::restore(&self.driver_source, &disk.pci_id.clone(), true, dma_client) - .instrument(tracing::info_span!("vfio_device_restore", pci_id)) - .await?; + let vfio_device = VfioDevice::restore( + &self.driver_source, + &disk.pci_id, + Some(&format!("restored-{}", pci_id)), + true, + dma_client, + ) + .instrument(tracing::info_span!("vfio_device_restore", pci_id)) + .await?; // TODO: For now, any isolation means use bounce buffering. This // needs to change when we have nvme devices that support DMA to @@ -407,6 +444,7 @@ impl NvmeManagerWorker { vfio_device, &disk.driver_state, self.is_isolated, + format!("restored-{}", disk.pci_id), // Use PCI ID as name for restored drivers ) .instrument(tracing::info_span!("nvme_driver_restore")) .await?; @@ -440,7 +478,7 @@ impl AsyncResolveResource for NvmeDiskResolver { ) -> Result { let namespace = self .manager - .get_namespace(rsrc.pci_id, rsrc.nsid) + .get_namespace(rsrc.debug_id, rsrc.pci_id, rsrc.nsid) .await .context("could not open nvme namespace")?; @@ -448,12 +486,23 @@ impl AsyncResolveResource for NvmeDiskResolver { } } -#[derive(MeshPayload, Default)] +#[derive(MeshPayload)] pub struct NvmeDiskConfig { + pub name: String, pub pci_id: String, pub nsid: u32, } +impl Default for NvmeDiskConfig { + fn default() -> Self { + Self { + name: "force-load".to_string(), + pci_id: String::new(), + nsid: 0, + } + } +} + impl ResourceId for NvmeDiskConfig { const ID: &'static str = "nvme"; } diff --git a/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs b/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs index 300608d124..cf93fad8d9 100644 --- a/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs +++ b/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs @@ -56,6 +56,7 @@ pub struct NvmeDriver { #[inspect(flatten)] task: Option, WorkerState>>, device_id: String, + name: String, identify: Option>, #[inspect(skip)] driver: VmTaskDriver, @@ -76,6 +77,7 @@ pub struct NvmeDriver { #[derive(Inspect)] struct DriverWorkerTask { device: T, + name: String, #[inspect(skip)] driver: VmTaskDriver, registers: Arc>, @@ -179,19 +181,28 @@ impl NvmeDriver { cpu_count: u32, device: T, bounce_buffer: bool, + name: String, ) -> anyhow::Result { let pci_id = device.id().to_owned(); - let mut this = Self::new_disabled(driver_source, cpu_count, device, bounce_buffer) - .instrument(tracing::info_span!("nvme_new_disabled", pci_id)) - .await?; + let mut this = Self::new_disabled( + driver_source, + cpu_count, + device, + bounce_buffer, + name.clone(), + ) + .instrument(tracing::info_span!("nvme_new_disabled", name, pci_id)) + .await?; match this .enable(cpu_count as u16) - .instrument(tracing::info_span!("nvme_enable", pci_id)) + .instrument(tracing::info_span!("nvme_enable", name, pci_id)) .await { Ok(()) => Ok(this), Err(err) => { tracing::error!( + name = %name, + pci_id = %pci_id, error = err.as_ref() as &dyn std::error::Error, "device initialization failed, shutting down" ); @@ -208,6 +219,7 @@ impl NvmeDriver { cpu_count: u32, mut device: T, bounce_buffer: bool, + name: String, ) -> anyhow::Result { let driver = driver_source.simple(); let bar0 = Bar0( @@ -222,6 +234,7 @@ impl NvmeDriver { .reset(&driver) .instrument(tracing::info_span!( "nvme_already_enabled", + name = name, pci_id = device.id().to_owned() )) .await @@ -248,8 +261,10 @@ impl NvmeDriver { Ok(Self { device_id: device.id().to_owned(), + name: name.clone(), task: Some(TaskControl::new(DriverWorkerTask { device, + name: name.clone(), driver: driver.clone(), registers, admin: None, @@ -314,7 +329,11 @@ impl NvmeDriver { worker.registers.bar0.set_acq(admin.cq_addr()); // Enable the controller. - let span = tracing::info_span!("nvme_ctrl_enable", pci_id = worker.device.id().to_owned()); + let span = tracing::info_span!( + "nvme_ctrl_enable", + name = worker.name, + pci_id = worker.device.id().to_owned() + ); let ctrl_enable_span = span.enter(); worker.registers.bar0.set_cc( spec::Cc::new() @@ -381,6 +400,8 @@ impl NvmeDriver { let requested_io_queue_count = if max_interrupt_count < requested_io_queue_count as u32 { tracing::warn!( + name = %worker.name, + pci_id = %worker.device.id(), max_interrupt_count, requested_io_queue_count, "queue count constrained by msi count" @@ -412,6 +433,8 @@ impl NvmeDriver { let allocated_io_queue_count = sq_count.min(cq_count); if allocated_io_queue_count < requested_io_queue_count { tracing::warn!( + name = %worker.name, + pci_id = %worker.device.id(), sq_count, cq_count, requested_io_queue_count, @@ -437,9 +460,13 @@ impl NvmeDriver { let async_event_task = self.driver.spawn("nvme_async_event", { let admin = admin.issuer().clone(); let rescan_event = self.rescan_event.clone(); + let name = worker.name.clone(); + let device_id = worker.device.id().to_owned(); async move { if let Err(err) = handle_asynchronous_events(&admin, &rescan_event).await { tracing::error!( + name = %name, + pci_id = %device_id, error = err.as_ref() as &dyn std::error::Error, "asynchronous event failure, not processing any more" ); @@ -496,7 +523,12 @@ impl NvmeDriver { _admin_responses = admin.shutdown().await; } if let Err(e) = worker.registers.bar0.reset(&driver).await { - tracing::info!(csts = e, "device reset failed"); + tracing::info!( + name = %worker.name, + pci_id = %worker.device.id(), + csts = e, + "device reset failed" + ); } } } @@ -568,6 +600,7 @@ impl NvmeDriver { mut device: T, saved_state: &NvmeDriverSavedState, bounce_buffer: bool, + name: String, ) -> anyhow::Result { let driver = driver_source.simple(); let bar0_mapping = device @@ -594,8 +627,10 @@ impl NvmeDriver { let mut this = Self { device_id: device.id().to_owned(), + name: name.clone(), task: Some(TaskControl::new(DriverWorkerTask { device, + name: name.clone(), driver: driver.clone(), registers: registers.clone(), admin: None, // Updated below. @@ -661,9 +696,13 @@ impl NvmeDriver { let async_event_task = this.driver.spawn("nvme_async_event", { let admin = admin.issuer().clone(); let rescan_event = this.rescan_event.clone(); + let name = this.name.clone(); + let device_id = this.device_id.clone(); async move { if let Err(err) = handle_asynchronous_events(&admin, &rescan_event).await { tracing::error!( + name = %name, + pci_id = %device_id, error = err.as_ref() as &dyn std::error::Error, "asynchronous event failure, not processing any more" ); @@ -739,6 +778,11 @@ impl NvmeDriver { pub fn update_servicing_flags(&mut self, nvme_keepalive: bool) { self.nvme_keepalive = nvme_keepalive; } + + /// Get the name. + pub fn name(&self) -> &str { + &self.name + } } async fn handle_asynchronous_events( @@ -869,6 +913,8 @@ impl DriverWorkerTask { .unwrap(); tracing::error!( + name = %self.name, + pci_id = %self.device.id(), cpu, fallback_cpu, error = err.as_ref() as &dyn std::error::Error, @@ -976,6 +1022,8 @@ impl DriverWorkerTask { .await { tracing::error!( + name = %self.name, + pci_id = %self.device.id(), error = &err as &dyn std::error::Error, "failed to delete completion queue in teardown path" ); diff --git a/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs b/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs index e87f8246fd..95cd98d0a4 100644 --- a/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs +++ b/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs @@ -78,7 +78,14 @@ async fn test_nvme_ioqueue_max_mqes(driver: DefaultDriver) { let cap: Cap = Cap::new().with_mqes_z(max_u16); device.set_mock_response_u64(Some((0, cap.into()))); - let driver = NvmeDriver::new(&driver_source, CPU_COUNT, device, false).await; + let driver = NvmeDriver::new( + &driver_source, + CPU_COUNT, + device, + false, + "test-nvme".to_string(), + ) + .await; assert!(driver.is_ok()); } @@ -113,7 +120,14 @@ async fn test_nvme_ioqueue_invalid_mqes(driver: DefaultDriver) { // Setup mock response at offset 0 let cap: Cap = Cap::new().with_mqes_z(0); device.set_mock_response_u64(Some((0, cap.into()))); - let driver = NvmeDriver::new(&driver_source, CPU_COUNT, device, false).await; + let driver = NvmeDriver::new( + &driver_source, + CPU_COUNT, + device, + false, + "test-nvme".to_string(), + ) + .await; assert!(driver.is_err()); } @@ -150,9 +164,15 @@ async fn test_nvme_driver(driver: DefaultDriver, allow_dma: bool) { .await .unwrap(); let device = NvmeTestEmulatedDevice::new(nvme, msi_set, dma_client.clone()); - let driver = NvmeDriver::new(&driver_source, CPU_COUNT, device, false) - .await - .unwrap(); + let driver = NvmeDriver::new( + &driver_source, + CPU_COUNT, + device, + false, + "test-nvme".to_string(), + ) + .await + .unwrap(); let namespace = driver.namespace(1).await.unwrap(); // Act: Write 1024 bytes of data to disk starting at LBA 1. @@ -266,9 +286,15 @@ async fn test_nvme_save_restore_inner(driver: DefaultDriver) { .unwrap(); let device = NvmeTestEmulatedDevice::new(nvme_ctrl, msi_x, dma_client.clone()); - let mut nvme_driver = NvmeDriver::new(&driver_source, CPU_COUNT, device, false) - .await - .unwrap(); + let mut nvme_driver = NvmeDriver::new( + &driver_source, + CPU_COUNT, + device, + false, + "test-nvme".to_string(), + ) + .await + .unwrap(); let _ns1 = nvme_driver.namespace(1).await.unwrap(); let saved_state = nvme_driver.save().await.unwrap(); // As of today we do not save namespace data to avoid possible conflict @@ -304,7 +330,7 @@ async fn test_nvme_save_restore_inner(driver: DefaultDriver) { let _new_device = NvmeTestEmulatedDevice::new(new_nvme_ctrl, new_msi_x, dma_client.clone()); // TODO: Memory restore is disabled for emulated DMA, uncomment once fixed. - // let _new_nvme_driver = NvmeDriver::restore(&driver_source, CPU_COUNT, new_device, &saved_state) + // let _new_nvme_driver = NvmeDriver::restore(&driver_source, CPU_COUNT, new_device, &saved_state, false, "test-nvme".to_string()) // .await // .unwrap(); } diff --git a/vm/devices/user_driver/src/vfio.rs b/vm/devices/user_driver/src/vfio.rs index 0bc6a7670b..70e88c675c 100644 --- a/vm/devices/user_driver/src/vfio.rs +++ b/vm/devices/user_driver/src/vfio.rs @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. -//! Support for accessing a MANA device via VFIO on Linux. +//! Support for accessing devices via VFIO on Linux. #![cfg(target_os = "linux")] #![cfg(feature = "vfio")] @@ -42,6 +42,7 @@ use zerocopy::KnownLayout; #[derive(Inspect)] pub struct VfioDevice { pci_id: Arc, + debug_bus_id: Option>, #[inspect(skip)] _container: vfio_sys::Container, #[inspect(skip)] @@ -73,9 +74,10 @@ impl VfioDevice { pub async fn new( driver_source: &VmTaskDriverSource, pci_id: &str, + debug_bus_id: Option<&str>, dma_client: Arc, ) -> anyhow::Result { - Self::restore(driver_source, pci_id, false, dma_client).await + Self::restore(driver_source, pci_id, debug_bus_id, false, dma_client).await } /// Creates a new VFIO-backed device for the PCI device with `pci_id`. @@ -83,6 +85,7 @@ impl VfioDevice { pub async fn restore( driver_source: &VmTaskDriverSource, pci_id: &str, + debug_bus_id: Option<&str>, keepalive: bool, dma_client: Arc, ) -> anyhow::Result { @@ -112,9 +115,9 @@ impl VfioDevice { container.set_iommu(IommuType::NoIommu)?; if keepalive { // Prevent physical hardware interaction when restoring. - group.set_keep_alive(pci_id)?; + group.set_keep_alive(&pci_id)?; } - let device = group.open_device(pci_id)?; + let device = group.open_device(&pci_id)?; let msix_info = device.irq_info(vfio_bindings::bindings::vfio::VFIO_PCI_MSIX_IRQ_INDEX)?; if msix_info.flags.noresize() { anyhow::bail!("unsupported: kernel does not support dynamic msix allocation"); @@ -123,6 +126,7 @@ impl VfioDevice { let config_space = device.region_info(VFIO_PCI_CONFIG_REGION_INDEX)?; let this = Self { pci_id: pci_id.into(), + debug_bus_id: debug_bus_id.map(|s| s.into()), _container: container, _group: group, device: Arc::new(device), @@ -154,6 +158,11 @@ impl VfioDevice { Ok(()) } + /// Returns the debug controller ID for diagnostic purposes. + pub fn debug_bus_id(&self) -> Option<&str> { + self.debug_bus_id.as_deref() + } + pub fn read_config(&self, offset: u16) -> anyhow::Result { if offset as u64 > self.config_space.size - 4 { anyhow::bail!("invalid config offset"); @@ -258,7 +267,7 @@ impl DeviceBacking for VfioDevice { } let new_interrupt = { - let name = format!("vfio-interrupt-{pci_id}-{msix}", pci_id = self.pci_id); + let name = format!("vfio-interrupt-{pci_id}-{msix}", pci_id = &*self.pci_id); let driver = self .driver_source .builder() @@ -347,7 +356,7 @@ impl InterruptTask { // However, it is not a fatal error--it will just result in // worse performance--so do not panic. tracing::error!( - pci_id = self.pci_id.as_ref(), + pci_id = &*self.pci_id, msix = self.msix, irq = self.irq, error = &err as &dyn std::error::Error,