From be125d2bd4da9d8910ca74dd46d1ecf5d0da05fb Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 8 Jul 2025 18:09:43 +0000 Subject: [PATCH 1/9] Initial plan From 746da07f0a4c56cfabae9af7c6514a5033118c37 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 8 Jul 2025 18:27:50 +0000 Subject: [PATCH 2/9] Add controller_instance_id support to NvmeDriver for better tracing Co-authored-by: mattkur <13772048+mattkur@users.noreply.github.com> --- .../src/dispatch/vtl2_settings_worker.rs | 1 + openhcl/underhill_core/src/nvme_manager.rs | 44 ++++++++++++++----- .../disk_nvme/nvme_driver/src/driver.rs | 23 ++++++++-- .../disk_nvme/nvme_driver/src/tests.rs | 10 ++--- 4 files changed, 59 insertions(+), 19 deletions(-) diff --git a/openhcl/underhill_core/src/dispatch/vtl2_settings_worker.rs b/openhcl/underhill_core/src/dispatch/vtl2_settings_worker.rs index df03e53bdd..f4c8093ccd 100644 --- a/openhcl/underhill_core/src/dispatch/vtl2_settings_worker.rs +++ b/openhcl/underhill_core/src/dispatch/vtl2_settings_worker.rs @@ -993,6 +993,7 @@ async fn make_disk_type_from_physical_device( // We can't validate yet that this namespace actually exists. That will // be checked later. return Ok(Resource::new(NvmeDiskConfig { + controller_instance_id: Some(controller_instance_id.to_string()), pci_id, nsid: sub_device_path, })); diff --git a/openhcl/underhill_core/src/nvme_manager.rs b/openhcl/underhill_core/src/nvme_manager.rs index d49a48c117..157a88506a 100644 --- a/openhcl/underhill_core/src/nvme_manager.rs +++ b/openhcl/underhill_core/src/nvme_manager.rs @@ -175,7 +175,9 @@ impl NvmeManager { enum Request { Inspect(inspect::Deferred), ForceLoadDriver(inspect::DeferredUpdate), - GetNamespace(Rpc<(String, u32), Result>), + GetNamespace( + Rpc<(Option, String, u32), Result>, + ), Save(Rpc<(), Result>), Shutdown { span: tracing::Span, @@ -191,13 +193,22 @@ pub struct NvmeManagerClient { impl NvmeManagerClient { pub async fn get_namespace( &self, + controller_instance_id: Option, pci_id: String, nsid: u32, ) -> anyhow::Result { Ok(self .sender - .call(Request::GetNamespace, (pci_id.clone(), nsid)) - .instrument(tracing::info_span!("nvme_get_namespace", pci_id, nsid)) + .call( + Request::GetNamespace, + (controller_instance_id.clone(), pci_id.clone(), nsid), + ) + .instrument(tracing::info_span!( + "nvme_get_namespace", + controller_instance_id = controller_instance_id.as_ref().map(|s| s.as_str()), + pci_id, + nsid + )) .await .context("nvme manager is shut down")??) } @@ -235,7 +246,7 @@ impl NvmeManagerWorker { match req { Request::Inspect(deferred) => deferred.inspect(&self), Request::ForceLoadDriver(update) => { - match self.get_driver(update.new_value().to_owned()).await { + match self.get_driver(None, update.new_value().to_owned()).await { Ok(_) => { let pci_id = update.new_value().to_string(); update.succeed(pci_id); @@ -246,8 +257,8 @@ impl NvmeManagerWorker { } } Request::GetNamespace(rpc) => { - rpc.handle(async |(pci_id, nsid)| { - self.get_namespace(pci_id.clone(), nsid) + rpc.handle(async |(controller_instance_id, pci_id, nsid)| { + self.get_namespace(controller_instance_id, pci_id.clone(), nsid) .map_err(|source| NamespaceError { pci_id, source }) .await }) @@ -283,9 +294,13 @@ impl NvmeManagerWorker { if !nvme_keepalive || !self.save_restore_supported { async { join_all(self.devices.drain().map(|(pci_id, driver)| { - driver - .shutdown() - .instrument(tracing::info_span!("shutdown_nvme_driver", pci_id)) + let controller_instance_id = driver.controller_instance_id(); + driver.shutdown().instrument(tracing::info_span!( + "shutdown_nvme_driver", + pci_id, + controller_instance_id = + controller_instance_id.as_ref().map(|s| s.as_str()) + )) })) .await } @@ -296,6 +311,7 @@ impl NvmeManagerWorker { async fn get_driver( &mut self, + controller_instance_id: Option, pci_id: String, ) -> Result<&mut nvme_driver::NvmeDriver, InnerError> { let driver = match self.devices.entry(pci_id.to_owned()) { @@ -328,6 +344,7 @@ impl NvmeManagerWorker { self.vp_count, device, self.is_isolated, + controller_instance_id, ) .instrument(tracing::info_span!( "nvme_driver_init", @@ -344,10 +361,13 @@ impl NvmeManagerWorker { async fn get_namespace( &mut self, + controller_instance_id: Option, pci_id: String, nsid: u32, ) -> Result { - let driver = self.get_driver(pci_id.to_owned()).await?; + let driver = self + .get_driver(controller_instance_id, pci_id.to_owned()) + .await?; driver .namespace(nsid) .await @@ -407,6 +427,7 @@ impl NvmeManagerWorker { vfio_device, &disk.driver_state, self.is_isolated, + None, // controller_instance_id is not persisted in saved state ) .instrument(tracing::info_span!("nvme_driver_restore")) .await?; @@ -440,7 +461,7 @@ impl AsyncResolveResource for NvmeDiskResolver { ) -> Result { let namespace = self .manager - .get_namespace(rsrc.pci_id, rsrc.nsid) + .get_namespace(rsrc.controller_instance_id, rsrc.pci_id, rsrc.nsid) .await .context("could not open nvme namespace")?; @@ -450,6 +471,7 @@ impl AsyncResolveResource for NvmeDiskResolver { #[derive(MeshPayload, Default)] pub struct NvmeDiskConfig { + pub controller_instance_id: Option, pub pci_id: String, pub nsid: u32, } diff --git a/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs b/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs index 300608d124..88e61d8a29 100644 --- a/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs +++ b/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs @@ -56,6 +56,7 @@ pub struct NvmeDriver { #[inspect(flatten)] task: Option, WorkerState>>, device_id: String, + controller_instance_id: Option, identify: Option>, #[inspect(skip)] driver: VmTaskDriver, @@ -179,11 +180,18 @@ impl NvmeDriver { cpu_count: u32, device: T, bounce_buffer: bool, + controller_instance_id: Option, ) -> anyhow::Result { let pci_id = device.id().to_owned(); - let mut this = Self::new_disabled(driver_source, cpu_count, device, bounce_buffer) - .instrument(tracing::info_span!("nvme_new_disabled", pci_id)) - .await?; + let mut this = Self::new_disabled( + driver_source, + cpu_count, + device, + bounce_buffer, + controller_instance_id, + ) + .instrument(tracing::info_span!("nvme_new_disabled", pci_id)) + .await?; match this .enable(cpu_count as u16) .instrument(tracing::info_span!("nvme_enable", pci_id)) @@ -208,6 +216,7 @@ impl NvmeDriver { cpu_count: u32, mut device: T, bounce_buffer: bool, + controller_instance_id: Option, ) -> anyhow::Result { let driver = driver_source.simple(); let bar0 = Bar0( @@ -248,6 +257,7 @@ impl NvmeDriver { Ok(Self { device_id: device.id().to_owned(), + controller_instance_id, task: Some(TaskControl::new(DriverWorkerTask { device, driver: driver.clone(), @@ -568,6 +578,7 @@ impl NvmeDriver { mut device: T, saved_state: &NvmeDriverSavedState, bounce_buffer: bool, + controller_instance_id: Option, ) -> anyhow::Result { let driver = driver_source.simple(); let bar0_mapping = device @@ -594,6 +605,7 @@ impl NvmeDriver { let mut this = Self { device_id: device.id().to_owned(), + controller_instance_id, task: Some(TaskControl::new(DriverWorkerTask { device, driver: driver.clone(), @@ -739,6 +751,11 @@ impl NvmeDriver { pub fn update_servicing_flags(&mut self, nvme_keepalive: bool) { self.nvme_keepalive = nvme_keepalive; } + + /// Get the controller instance ID, if set. + pub fn controller_instance_id(&self) -> Option { + self.controller_instance_id.clone() + } } async fn handle_asynchronous_events( diff --git a/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs b/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs index e87f8246fd..7bd19f3e93 100644 --- a/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs +++ b/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs @@ -78,7 +78,7 @@ async fn test_nvme_ioqueue_max_mqes(driver: DefaultDriver) { let cap: Cap = Cap::new().with_mqes_z(max_u16); device.set_mock_response_u64(Some((0, cap.into()))); - let driver = NvmeDriver::new(&driver_source, CPU_COUNT, device, false).await; + let driver = NvmeDriver::new(&driver_source, CPU_COUNT, device, false, None).await; assert!(driver.is_ok()); } @@ -113,7 +113,7 @@ async fn test_nvme_ioqueue_invalid_mqes(driver: DefaultDriver) { // Setup mock response at offset 0 let cap: Cap = Cap::new().with_mqes_z(0); device.set_mock_response_u64(Some((0, cap.into()))); - let driver = NvmeDriver::new(&driver_source, CPU_COUNT, device, false).await; + let driver = NvmeDriver::new(&driver_source, CPU_COUNT, device, false, None).await; assert!(driver.is_err()); } @@ -150,7 +150,7 @@ async fn test_nvme_driver(driver: DefaultDriver, allow_dma: bool) { .await .unwrap(); let device = NvmeTestEmulatedDevice::new(nvme, msi_set, dma_client.clone()); - let driver = NvmeDriver::new(&driver_source, CPU_COUNT, device, false) + let driver = NvmeDriver::new(&driver_source, CPU_COUNT, device, false, None) .await .unwrap(); let namespace = driver.namespace(1).await.unwrap(); @@ -266,7 +266,7 @@ async fn test_nvme_save_restore_inner(driver: DefaultDriver) { .unwrap(); let device = NvmeTestEmulatedDevice::new(nvme_ctrl, msi_x, dma_client.clone()); - let mut nvme_driver = NvmeDriver::new(&driver_source, CPU_COUNT, device, false) + let mut nvme_driver = NvmeDriver::new(&driver_source, CPU_COUNT, device, false, None) .await .unwrap(); let _ns1 = nvme_driver.namespace(1).await.unwrap(); @@ -304,7 +304,7 @@ async fn test_nvme_save_restore_inner(driver: DefaultDriver) { let _new_device = NvmeTestEmulatedDevice::new(new_nvme_ctrl, new_msi_x, dma_client.clone()); // TODO: Memory restore is disabled for emulated DMA, uncomment once fixed. - // let _new_nvme_driver = NvmeDriver::restore(&driver_source, CPU_COUNT, new_device, &saved_state) + // let _new_nvme_driver = NvmeDriver::restore(&driver_source, CPU_COUNT, new_device, &saved_state, false, None) // .await // .unwrap(); } From e76bedf800b75839a9f25aa336b5ba4f90eec024 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 8 Jul 2025 19:02:25 +0000 Subject: [PATCH 3/9] Rename controller_instance_id to debug_id and make it mandatory Co-authored-by: mattkur <13772048+mattkur@users.noreply.github.com> --- .../src/dispatch/vtl2_settings_worker.rs | 2 +- openhcl/underhill_core/src/nvme_manager.rs | 50 +++++++++++-------- .../disk_nvme/nvme_driver/src/driver.rs | 31 +++++------- .../disk_nvme/nvme_driver/src/tests.rs | 44 ++++++++++++---- 4 files changed, 78 insertions(+), 49 deletions(-) diff --git a/openhcl/underhill_core/src/dispatch/vtl2_settings_worker.rs b/openhcl/underhill_core/src/dispatch/vtl2_settings_worker.rs index f4c8093ccd..cb2072ceb4 100644 --- a/openhcl/underhill_core/src/dispatch/vtl2_settings_worker.rs +++ b/openhcl/underhill_core/src/dispatch/vtl2_settings_worker.rs @@ -993,7 +993,7 @@ async fn make_disk_type_from_physical_device( // We can't validate yet that this namespace actually exists. That will // be checked later. return Ok(Resource::new(NvmeDiskConfig { - controller_instance_id: Some(controller_instance_id.to_string()), + debug_id: controller_instance_id.to_string(), pci_id, nsid: sub_device_path, })); diff --git a/openhcl/underhill_core/src/nvme_manager.rs b/openhcl/underhill_core/src/nvme_manager.rs index 157a88506a..dab9930a52 100644 --- a/openhcl/underhill_core/src/nvme_manager.rs +++ b/openhcl/underhill_core/src/nvme_manager.rs @@ -175,9 +175,7 @@ impl NvmeManager { enum Request { Inspect(inspect::Deferred), ForceLoadDriver(inspect::DeferredUpdate), - GetNamespace( - Rpc<(Option, String, u32), Result>, - ), + GetNamespace(Rpc<(String, String, u32), Result>), Save(Rpc<(), Result>), Shutdown { span: tracing::Span, @@ -193,7 +191,7 @@ pub struct NvmeManagerClient { impl NvmeManagerClient { pub async fn get_namespace( &self, - controller_instance_id: Option, + debug_id: String, pci_id: String, nsid: u32, ) -> anyhow::Result { @@ -201,11 +199,11 @@ impl NvmeManagerClient { .sender .call( Request::GetNamespace, - (controller_instance_id.clone(), pci_id.clone(), nsid), + (debug_id.clone(), pci_id.clone(), nsid), ) .instrument(tracing::info_span!( "nvme_get_namespace", - controller_instance_id = controller_instance_id.as_ref().map(|s| s.as_str()), + debug_id, pci_id, nsid )) @@ -246,7 +244,10 @@ impl NvmeManagerWorker { match req { Request::Inspect(deferred) => deferred.inspect(&self), Request::ForceLoadDriver(update) => { - match self.get_driver(None, update.new_value().to_owned()).await { + match self + .get_driver("force-load".to_string(), update.new_value().to_owned()) + .await + { Ok(_) => { let pci_id = update.new_value().to_string(); update.succeed(pci_id); @@ -257,8 +258,8 @@ impl NvmeManagerWorker { } } Request::GetNamespace(rpc) => { - rpc.handle(async |(controller_instance_id, pci_id, nsid)| { - self.get_namespace(controller_instance_id, pci_id.clone(), nsid) + rpc.handle(async |(debug_id, pci_id, nsid)| { + self.get_namespace(debug_id, pci_id.clone(), nsid) .map_err(|source| NamespaceError { pci_id, source }) .await }) @@ -294,12 +295,11 @@ impl NvmeManagerWorker { if !nvme_keepalive || !self.save_restore_supported { async { join_all(self.devices.drain().map(|(pci_id, driver)| { - let controller_instance_id = driver.controller_instance_id(); + let debug_id = driver.debug_id(); driver.shutdown().instrument(tracing::info_span!( "shutdown_nvme_driver", pci_id, - controller_instance_id = - controller_instance_id.as_ref().map(|s| s.as_str()) + debug_id )) })) .await @@ -311,7 +311,7 @@ impl NvmeManagerWorker { async fn get_driver( &mut self, - controller_instance_id: Option, + debug_id: String, pci_id: String, ) -> Result<&mut nvme_driver::NvmeDriver, InnerError> { let driver = match self.devices.entry(pci_id.to_owned()) { @@ -344,7 +344,7 @@ impl NvmeManagerWorker { self.vp_count, device, self.is_isolated, - controller_instance_id, + debug_id, ) .instrument(tracing::info_span!( "nvme_driver_init", @@ -361,13 +361,11 @@ impl NvmeManagerWorker { async fn get_namespace( &mut self, - controller_instance_id: Option, + debug_id: String, pci_id: String, nsid: u32, ) -> Result { - let driver = self - .get_driver(controller_instance_id, pci_id.to_owned()) - .await?; + let driver = self.get_driver(debug_id, pci_id.to_owned()).await?; driver .namespace(nsid) .await @@ -461,7 +459,7 @@ impl AsyncResolveResource for NvmeDiskResolver { ) -> Result { let namespace = self .manager - .get_namespace(rsrc.controller_instance_id, rsrc.pci_id, rsrc.nsid) + .get_namespace(rsrc.debug_id, rsrc.pci_id, rsrc.nsid) .await .context("could not open nvme namespace")?; @@ -469,13 +467,23 @@ impl AsyncResolveResource for NvmeDiskResolver { } } -#[derive(MeshPayload, Default)] +#[derive(MeshPayload)] pub struct NvmeDiskConfig { - pub controller_instance_id: Option, + pub debug_id: String, pub pci_id: String, pub nsid: u32, } +impl Default for NvmeDiskConfig { + fn default() -> Self { + Self { + debug_id: "force-load".to_string(), + pci_id: String::new(), + nsid: 0, + } + } +} + impl ResourceId for NvmeDiskConfig { const ID: &'static str = "nvme"; } diff --git a/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs b/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs index 88e61d8a29..54a48b3a94 100644 --- a/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs +++ b/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs @@ -56,7 +56,7 @@ pub struct NvmeDriver { #[inspect(flatten)] task: Option, WorkerState>>, device_id: String, - controller_instance_id: Option, + debug_id: String, identify: Option>, #[inspect(skip)] driver: VmTaskDriver, @@ -180,18 +180,13 @@ impl NvmeDriver { cpu_count: u32, device: T, bounce_buffer: bool, - controller_instance_id: Option, + debug_id: String, ) -> anyhow::Result { let pci_id = device.id().to_owned(); - let mut this = Self::new_disabled( - driver_source, - cpu_count, - device, - bounce_buffer, - controller_instance_id, - ) - .instrument(tracing::info_span!("nvme_new_disabled", pci_id)) - .await?; + let mut this = + Self::new_disabled(driver_source, cpu_count, device, bounce_buffer, debug_id) + .instrument(tracing::info_span!("nvme_new_disabled", pci_id)) + .await?; match this .enable(cpu_count as u16) .instrument(tracing::info_span!("nvme_enable", pci_id)) @@ -216,7 +211,7 @@ impl NvmeDriver { cpu_count: u32, mut device: T, bounce_buffer: bool, - controller_instance_id: Option, + debug_id: String, ) -> anyhow::Result { let driver = driver_source.simple(); let bar0 = Bar0( @@ -257,7 +252,7 @@ impl NvmeDriver { Ok(Self { device_id: device.id().to_owned(), - controller_instance_id, + debug_id, task: Some(TaskControl::new(DriverWorkerTask { device, driver: driver.clone(), @@ -578,7 +573,7 @@ impl NvmeDriver { mut device: T, saved_state: &NvmeDriverSavedState, bounce_buffer: bool, - controller_instance_id: Option, + debug_id: String, ) -> anyhow::Result { let driver = driver_source.simple(); let bar0_mapping = device @@ -605,7 +600,7 @@ impl NvmeDriver { let mut this = Self { device_id: device.id().to_owned(), - controller_instance_id, + debug_id, task: Some(TaskControl::new(DriverWorkerTask { device, driver: driver.clone(), @@ -752,9 +747,9 @@ impl NvmeDriver { self.nvme_keepalive = nvme_keepalive; } - /// Get the controller instance ID, if set. - pub fn controller_instance_id(&self) -> Option { - self.controller_instance_id.clone() + /// Get the debug ID. + pub fn debug_id(&self) -> &str { + &self.debug_id } } diff --git a/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs b/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs index 7bd19f3e93..95cd98d0a4 100644 --- a/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs +++ b/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs @@ -78,7 +78,14 @@ async fn test_nvme_ioqueue_max_mqes(driver: DefaultDriver) { let cap: Cap = Cap::new().with_mqes_z(max_u16); device.set_mock_response_u64(Some((0, cap.into()))); - let driver = NvmeDriver::new(&driver_source, CPU_COUNT, device, false, None).await; + let driver = NvmeDriver::new( + &driver_source, + CPU_COUNT, + device, + false, + "test-nvme".to_string(), + ) + .await; assert!(driver.is_ok()); } @@ -113,7 +120,14 @@ async fn test_nvme_ioqueue_invalid_mqes(driver: DefaultDriver) { // Setup mock response at offset 0 let cap: Cap = Cap::new().with_mqes_z(0); device.set_mock_response_u64(Some((0, cap.into()))); - let driver = NvmeDriver::new(&driver_source, CPU_COUNT, device, false, None).await; + let driver = NvmeDriver::new( + &driver_source, + CPU_COUNT, + device, + false, + "test-nvme".to_string(), + ) + .await; assert!(driver.is_err()); } @@ -150,9 +164,15 @@ async fn test_nvme_driver(driver: DefaultDriver, allow_dma: bool) { .await .unwrap(); let device = NvmeTestEmulatedDevice::new(nvme, msi_set, dma_client.clone()); - let driver = NvmeDriver::new(&driver_source, CPU_COUNT, device, false, None) - .await - .unwrap(); + let driver = NvmeDriver::new( + &driver_source, + CPU_COUNT, + device, + false, + "test-nvme".to_string(), + ) + .await + .unwrap(); let namespace = driver.namespace(1).await.unwrap(); // Act: Write 1024 bytes of data to disk starting at LBA 1. @@ -266,9 +286,15 @@ async fn test_nvme_save_restore_inner(driver: DefaultDriver) { .unwrap(); let device = NvmeTestEmulatedDevice::new(nvme_ctrl, msi_x, dma_client.clone()); - let mut nvme_driver = NvmeDriver::new(&driver_source, CPU_COUNT, device, false, None) - .await - .unwrap(); + let mut nvme_driver = NvmeDriver::new( + &driver_source, + CPU_COUNT, + device, + false, + "test-nvme".to_string(), + ) + .await + .unwrap(); let _ns1 = nvme_driver.namespace(1).await.unwrap(); let saved_state = nvme_driver.save().await.unwrap(); // As of today we do not save namespace data to avoid possible conflict @@ -304,7 +330,7 @@ async fn test_nvme_save_restore_inner(driver: DefaultDriver) { let _new_device = NvmeTestEmulatedDevice::new(new_nvme_ctrl, new_msi_x, dma_client.clone()); // TODO: Memory restore is disabled for emulated DMA, uncomment once fixed. - // let _new_nvme_driver = NvmeDriver::restore(&driver_source, CPU_COUNT, new_device, &saved_state, false, None) + // let _new_nvme_driver = NvmeDriver::restore(&driver_source, CPU_COUNT, new_device, &saved_state, false, "test-nvme".to_string()) // .await // .unwrap(); } From 02fbdbc1bef9a03eb7f450f5e9a15c1a8a0bfdad Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 8 Jul 2025 21:24:31 +0000 Subject: [PATCH 4/9] Address code review feedback: Add strong typing, rename debug_id to name, and fix restore issue Co-authored-by: mattkur <13772048+mattkur@users.noreply.github.com> --- .../src/dispatch/vtl2_settings_worker.rs | 2 +- openhcl/underhill_core/src/nvme_manager.rs | 99 +++++++++++++++---- .../disk_nvme/nvme_driver/src/driver.rs | 25 +++-- 3 files changed, 91 insertions(+), 35 deletions(-) diff --git a/openhcl/underhill_core/src/dispatch/vtl2_settings_worker.rs b/openhcl/underhill_core/src/dispatch/vtl2_settings_worker.rs index cb2072ceb4..af867f13eb 100644 --- a/openhcl/underhill_core/src/dispatch/vtl2_settings_worker.rs +++ b/openhcl/underhill_core/src/dispatch/vtl2_settings_worker.rs @@ -993,7 +993,7 @@ async fn make_disk_type_from_physical_device( // We can't validate yet that this namespace actually exists. That will // be checked later. return Ok(Resource::new(NvmeDiskConfig { - debug_id: controller_instance_id.to_string(), + name: controller_instance_id.to_string(), pci_id, nsid: sub_device_path, })); diff --git a/openhcl/underhill_core/src/nvme_manager.rs b/openhcl/underhill_core/src/nvme_manager.rs index dab9930a52..5c69694d72 100644 --- a/openhcl/underhill_core/src/nvme_manager.rs +++ b/openhcl/underhill_core/src/nvme_manager.rs @@ -28,6 +28,58 @@ use std::collections::HashMap; use std::collections::hash_map; use thiserror::Error; use tracing::Instrument; + +/// Strongly typed wrapper for NVMe device name (formerly debug_id/controller_instance_id) +#[derive(Debug, Clone, PartialEq, Eq, MeshPayload)] +pub struct NvmeDeviceName(pub String); + +impl From for NvmeDeviceName { + fn from(s: String) -> Self { + NvmeDeviceName(s) + } +} + +impl From<&str> for NvmeDeviceName { + fn from(s: &str) -> Self { + NvmeDeviceName(s.to_string()) + } +} + +impl AsRef for NvmeDeviceName { + fn as_ref(&self) -> &str { + &self.0 + } +} + +/// Strongly typed wrapper for PCI ID +#[derive(Debug, Clone, PartialEq, Eq, MeshPayload)] +pub struct PciId(pub String); + +impl From for PciId { + fn from(s: String) -> Self { + PciId(s) + } +} + +impl From<&str> for PciId { + fn from(s: &str) -> Self { + PciId(s.to_string()) + } +} + +impl AsRef for PciId { + fn as_ref(&self) -> &str { + &self.0 + } +} + +/// Parameters for getting an NVMe namespace +#[derive(Debug, Clone, MeshPayload)] +pub struct GetNamespaceParams { + pub name: NvmeDeviceName, + pub pci_id: PciId, + pub nsid: u32, +} use user_driver::vfio::VfioDevice; use vm_resource::AsyncResolveResource; use vm_resource::ResourceId; @@ -175,7 +227,7 @@ impl NvmeManager { enum Request { Inspect(inspect::Deferred), ForceLoadDriver(inspect::DeferredUpdate), - GetNamespace(Rpc<(String, String, u32), Result>), + GetNamespace(Rpc>), Save(Rpc<(), Result>), Shutdown { span: tracing::Span, @@ -191,20 +243,22 @@ pub struct NvmeManagerClient { impl NvmeManagerClient { pub async fn get_namespace( &self, - debug_id: String, - pci_id: String, + name: impl Into, + pci_id: impl Into, nsid: u32, ) -> anyhow::Result { + let params = GetNamespaceParams { + name: name.into(), + pci_id: pci_id.into(), + nsid, + }; Ok(self .sender - .call( - Request::GetNamespace, - (debug_id.clone(), pci_id.clone(), nsid), - ) + .call(Request::GetNamespace, params.clone()) .instrument(tracing::info_span!( "nvme_get_namespace", - debug_id, - pci_id, + name = params.name.as_ref(), + pci_id = params.pci_id.as_ref(), nsid )) .await @@ -258,9 +312,12 @@ impl NvmeManagerWorker { } } Request::GetNamespace(rpc) => { - rpc.handle(async |(debug_id, pci_id, nsid)| { - self.get_namespace(debug_id, pci_id.clone(), nsid) - .map_err(|source| NamespaceError { pci_id, source }) + rpc.handle(async |params| { + self.get_namespace(params.name.0, params.pci_id.0.clone(), params.nsid) + .map_err(|source| NamespaceError { + pci_id: params.pci_id.0, + source, + }) .await }) .await @@ -295,11 +352,11 @@ impl NvmeManagerWorker { if !nvme_keepalive || !self.save_restore_supported { async { join_all(self.devices.drain().map(|(pci_id, driver)| { - let debug_id = driver.debug_id(); + let name = driver.name(); driver.shutdown().instrument(tracing::info_span!( "shutdown_nvme_driver", pci_id, - debug_id + name )) })) .await @@ -311,7 +368,7 @@ impl NvmeManagerWorker { async fn get_driver( &mut self, - debug_id: String, + name: String, pci_id: String, ) -> Result<&mut nvme_driver::NvmeDriver, InnerError> { let driver = match self.devices.entry(pci_id.to_owned()) { @@ -344,7 +401,7 @@ impl NvmeManagerWorker { self.vp_count, device, self.is_isolated, - debug_id, + name, ) .instrument(tracing::info_span!( "nvme_driver_init", @@ -361,11 +418,11 @@ impl NvmeManagerWorker { async fn get_namespace( &mut self, - debug_id: String, + name: String, pci_id: String, nsid: u32, ) -> Result { - let driver = self.get_driver(debug_id, pci_id.to_owned()).await?; + let driver = self.get_driver(name, pci_id.to_owned()).await?; driver .namespace(nsid) .await @@ -425,7 +482,7 @@ impl NvmeManagerWorker { vfio_device, &disk.driver_state, self.is_isolated, - None, // controller_instance_id is not persisted in saved state + format!("restored-{}", disk.pci_id), // Use PCI ID as name for restored drivers ) .instrument(tracing::info_span!("nvme_driver_restore")) .await?; @@ -469,7 +526,7 @@ impl AsyncResolveResource for NvmeDiskResolver { #[derive(MeshPayload)] pub struct NvmeDiskConfig { - pub debug_id: String, + pub name: String, pub pci_id: String, pub nsid: u32, } @@ -477,7 +534,7 @@ pub struct NvmeDiskConfig { impl Default for NvmeDiskConfig { fn default() -> Self { Self { - debug_id: "force-load".to_string(), + name: "force-load".to_string(), pci_id: String::new(), nsid: 0, } diff --git a/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs b/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs index 54a48b3a94..72955cb76d 100644 --- a/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs +++ b/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs @@ -56,7 +56,7 @@ pub struct NvmeDriver { #[inspect(flatten)] task: Option, WorkerState>>, device_id: String, - debug_id: String, + name: String, identify: Option>, #[inspect(skip)] driver: VmTaskDriver, @@ -180,13 +180,12 @@ impl NvmeDriver { cpu_count: u32, device: T, bounce_buffer: bool, - debug_id: String, + name: String, ) -> anyhow::Result { let pci_id = device.id().to_owned(); - let mut this = - Self::new_disabled(driver_source, cpu_count, device, bounce_buffer, debug_id) - .instrument(tracing::info_span!("nvme_new_disabled", pci_id)) - .await?; + let mut this = Self::new_disabled(driver_source, cpu_count, device, bounce_buffer, name) + .instrument(tracing::info_span!("nvme_new_disabled", pci_id)) + .await?; match this .enable(cpu_count as u16) .instrument(tracing::info_span!("nvme_enable", pci_id)) @@ -211,7 +210,7 @@ impl NvmeDriver { cpu_count: u32, mut device: T, bounce_buffer: bool, - debug_id: String, + name: String, ) -> anyhow::Result { let driver = driver_source.simple(); let bar0 = Bar0( @@ -252,7 +251,7 @@ impl NvmeDriver { Ok(Self { device_id: device.id().to_owned(), - debug_id, + name, task: Some(TaskControl::new(DriverWorkerTask { device, driver: driver.clone(), @@ -573,7 +572,7 @@ impl NvmeDriver { mut device: T, saved_state: &NvmeDriverSavedState, bounce_buffer: bool, - debug_id: String, + name: String, ) -> anyhow::Result { let driver = driver_source.simple(); let bar0_mapping = device @@ -600,7 +599,7 @@ impl NvmeDriver { let mut this = Self { device_id: device.id().to_owned(), - debug_id, + name, task: Some(TaskControl::new(DriverWorkerTask { device, driver: driver.clone(), @@ -747,9 +746,9 @@ impl NvmeDriver { self.nvme_keepalive = nvme_keepalive; } - /// Get the debug ID. - pub fn debug_id(&self) -> &str { - &self.debug_id + /// Get the name. + pub fn name(&self) -> &str { + &self.name } } From a9818bd2aaf5b9e35a56139dac453ea41b9a9ad1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 8 Jul 2025 22:11:00 +0000 Subject: [PATCH 5/9] Address comment feedback: Remove unnecessary trait implementations, add debug controller ID to VfioDevice, enhance nvme_driver logging Co-authored-by: mattkur <13772048+mattkur@users.noreply.github.com> --- openhcl/underhill_core/src/emuplat/netvsp.rs | 2 +- openhcl/underhill_core/src/nvme_manager.rs | 79 ++++++------------- .../disk_nvme/nvme_driver/src/driver.rs | 53 +++++++++++-- vm/devices/user_driver/src/vfio.rs | 25 +++++- 4 files changed, 94 insertions(+), 65 deletions(-) diff --git a/openhcl/underhill_core/src/emuplat/netvsp.rs b/openhcl/underhill_core/src/emuplat/netvsp.rs index bf517208fb..be6a015ed9 100644 --- a/openhcl/underhill_core/src/emuplat/netvsp.rs +++ b/openhcl/underhill_core/src/emuplat/netvsp.rs @@ -120,7 +120,7 @@ async fn try_create_mana_device( max_sub_channels: u16, dma_client: Arc, ) -> anyhow::Result> { - let device = VfioDevice::new(driver_source, pci_id, dma_client) + let device = VfioDevice::new(driver_source, pci_id, None, dma_client) .await .context("failed to open device")?; diff --git a/openhcl/underhill_core/src/nvme_manager.rs b/openhcl/underhill_core/src/nvme_manager.rs index 5c69694d72..5ee780166c 100644 --- a/openhcl/underhill_core/src/nvme_manager.rs +++ b/openhcl/underhill_core/src/nvme_manager.rs @@ -28,51 +28,12 @@ use std::collections::HashMap; use std::collections::hash_map; use thiserror::Error; use tracing::Instrument; +use user_driver::vfio::PciId; /// Strongly typed wrapper for NVMe device name (formerly debug_id/controller_instance_id) #[derive(Debug, Clone, PartialEq, Eq, MeshPayload)] pub struct NvmeDeviceName(pub String); -impl From for NvmeDeviceName { - fn from(s: String) -> Self { - NvmeDeviceName(s) - } -} - -impl From<&str> for NvmeDeviceName { - fn from(s: &str) -> Self { - NvmeDeviceName(s.to_string()) - } -} - -impl AsRef for NvmeDeviceName { - fn as_ref(&self) -> &str { - &self.0 - } -} - -/// Strongly typed wrapper for PCI ID -#[derive(Debug, Clone, PartialEq, Eq, MeshPayload)] -pub struct PciId(pub String); - -impl From for PciId { - fn from(s: String) -> Self { - PciId(s) - } -} - -impl From<&str> for PciId { - fn from(s: &str) -> Self { - PciId(s.to_string()) - } -} - -impl AsRef for PciId { - fn as_ref(&self) -> &str { - &self.0 - } -} - /// Parameters for getting an NVMe namespace #[derive(Debug, Clone, MeshPayload)] pub struct GetNamespaceParams { @@ -243,13 +204,13 @@ pub struct NvmeManagerClient { impl NvmeManagerClient { pub async fn get_namespace( &self, - name: impl Into, - pci_id: impl Into, + name: String, + pci_id: String, nsid: u32, ) -> anyhow::Result { let params = GetNamespaceParams { - name: name.into(), - pci_id: pci_id.into(), + name: NvmeDeviceName(name), + pci_id: PciId(pci_id), nsid, }; Ok(self @@ -257,8 +218,8 @@ impl NvmeManagerClient { .call(Request::GetNamespace, params.clone()) .instrument(tracing::info_span!( "nvme_get_namespace", - name = params.name.as_ref(), - pci_id = params.pci_id.as_ref(), + name = params.name.0, + pci_id = params.pci_id.0, nsid )) .await @@ -388,10 +349,15 @@ impl NvmeManagerWorker { }) .map_err(InnerError::DmaClient)?; - let device = VfioDevice::new(&self.driver_source, entry.key(), dma_client) - .instrument(tracing::info_span!("vfio_device_open", pci_id)) - .await - .map_err(InnerError::Vfio)?; + let device = VfioDevice::new( + &self.driver_source, + entry.key(), + Some(name.clone()), + dma_client, + ) + .instrument(tracing::info_span!("vfio_device_open", pci_id)) + .await + .map_err(InnerError::Vfio)?; // TODO: For now, any isolation means use bounce buffering. This // needs to change when we have nvme devices that support DMA to @@ -468,10 +434,15 @@ impl NvmeManagerWorker { // This code can wait on each VFIO device until it is arrived. // A potential optimization would be to delay VFIO operation // until it is ready, but a redesign of VfioDevice is needed. - let vfio_device = - VfioDevice::restore(&self.driver_source, &disk.pci_id.clone(), true, dma_client) - .instrument(tracing::info_span!("vfio_device_restore", pci_id)) - .await?; + let vfio_device = VfioDevice::restore( + &self.driver_source, + &disk.pci_id.clone(), + Some(format!("restored-{}", pci_id)), + true, + dma_client, + ) + .instrument(tracing::info_span!("vfio_device_restore", pci_id)) + .await?; // TODO: For now, any isolation means use bounce buffering. This // needs to change when we have nvme devices that support DMA to diff --git a/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs b/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs index 72955cb76d..cf93fad8d9 100644 --- a/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs +++ b/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs @@ -77,6 +77,7 @@ pub struct NvmeDriver { #[derive(Inspect)] struct DriverWorkerTask { device: T, + name: String, #[inspect(skip)] driver: VmTaskDriver, registers: Arc>, @@ -183,17 +184,25 @@ impl NvmeDriver { name: String, ) -> anyhow::Result { let pci_id = device.id().to_owned(); - let mut this = Self::new_disabled(driver_source, cpu_count, device, bounce_buffer, name) - .instrument(tracing::info_span!("nvme_new_disabled", pci_id)) - .await?; + let mut this = Self::new_disabled( + driver_source, + cpu_count, + device, + bounce_buffer, + name.clone(), + ) + .instrument(tracing::info_span!("nvme_new_disabled", name, pci_id)) + .await?; match this .enable(cpu_count as u16) - .instrument(tracing::info_span!("nvme_enable", pci_id)) + .instrument(tracing::info_span!("nvme_enable", name, pci_id)) .await { Ok(()) => Ok(this), Err(err) => { tracing::error!( + name = %name, + pci_id = %pci_id, error = err.as_ref() as &dyn std::error::Error, "device initialization failed, shutting down" ); @@ -225,6 +234,7 @@ impl NvmeDriver { .reset(&driver) .instrument(tracing::info_span!( "nvme_already_enabled", + name = name, pci_id = device.id().to_owned() )) .await @@ -251,9 +261,10 @@ impl NvmeDriver { Ok(Self { device_id: device.id().to_owned(), - name, + name: name.clone(), task: Some(TaskControl::new(DriverWorkerTask { device, + name: name.clone(), driver: driver.clone(), registers, admin: None, @@ -318,7 +329,11 @@ impl NvmeDriver { worker.registers.bar0.set_acq(admin.cq_addr()); // Enable the controller. - let span = tracing::info_span!("nvme_ctrl_enable", pci_id = worker.device.id().to_owned()); + let span = tracing::info_span!( + "nvme_ctrl_enable", + name = worker.name, + pci_id = worker.device.id().to_owned() + ); let ctrl_enable_span = span.enter(); worker.registers.bar0.set_cc( spec::Cc::new() @@ -385,6 +400,8 @@ impl NvmeDriver { let requested_io_queue_count = if max_interrupt_count < requested_io_queue_count as u32 { tracing::warn!( + name = %worker.name, + pci_id = %worker.device.id(), max_interrupt_count, requested_io_queue_count, "queue count constrained by msi count" @@ -416,6 +433,8 @@ impl NvmeDriver { let allocated_io_queue_count = sq_count.min(cq_count); if allocated_io_queue_count < requested_io_queue_count { tracing::warn!( + name = %worker.name, + pci_id = %worker.device.id(), sq_count, cq_count, requested_io_queue_count, @@ -441,9 +460,13 @@ impl NvmeDriver { let async_event_task = self.driver.spawn("nvme_async_event", { let admin = admin.issuer().clone(); let rescan_event = self.rescan_event.clone(); + let name = worker.name.clone(); + let device_id = worker.device.id().to_owned(); async move { if let Err(err) = handle_asynchronous_events(&admin, &rescan_event).await { tracing::error!( + name = %name, + pci_id = %device_id, error = err.as_ref() as &dyn std::error::Error, "asynchronous event failure, not processing any more" ); @@ -500,7 +523,12 @@ impl NvmeDriver { _admin_responses = admin.shutdown().await; } if let Err(e) = worker.registers.bar0.reset(&driver).await { - tracing::info!(csts = e, "device reset failed"); + tracing::info!( + name = %worker.name, + pci_id = %worker.device.id(), + csts = e, + "device reset failed" + ); } } } @@ -599,9 +627,10 @@ impl NvmeDriver { let mut this = Self { device_id: device.id().to_owned(), - name, + name: name.clone(), task: Some(TaskControl::new(DriverWorkerTask { device, + name: name.clone(), driver: driver.clone(), registers: registers.clone(), admin: None, // Updated below. @@ -667,9 +696,13 @@ impl NvmeDriver { let async_event_task = this.driver.spawn("nvme_async_event", { let admin = admin.issuer().clone(); let rescan_event = this.rescan_event.clone(); + let name = this.name.clone(); + let device_id = this.device_id.clone(); async move { if let Err(err) = handle_asynchronous_events(&admin, &rescan_event).await { tracing::error!( + name = %name, + pci_id = %device_id, error = err.as_ref() as &dyn std::error::Error, "asynchronous event failure, not processing any more" ); @@ -880,6 +913,8 @@ impl DriverWorkerTask { .unwrap(); tracing::error!( + name = %self.name, + pci_id = %self.device.id(), cpu, fallback_cpu, error = err.as_ref() as &dyn std::error::Error, @@ -987,6 +1022,8 @@ impl DriverWorkerTask { .await { tracing::error!( + name = %self.name, + pci_id = %self.device.id(), error = &err as &dyn std::error::Error, "failed to delete completion queue in teardown path" ); diff --git a/vm/devices/user_driver/src/vfio.rs b/vm/devices/user_driver/src/vfio.rs index 0bc6a7670b..36a2cc0311 100644 --- a/vm/devices/user_driver/src/vfio.rs +++ b/vm/devices/user_driver/src/vfio.rs @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. -//! Support for accessing a MANA device via VFIO on Linux. +//! Support for accessing devices via VFIO on Linux. #![cfg(target_os = "linux")] #![cfg(feature = "vfio")] @@ -16,6 +16,7 @@ use futures::FutureExt; use futures_concurrency::future::Race; use inspect::Inspect; use inspect_counters::SharedCounter; +use mesh::MeshPayload; use pal_async::task::Spawn; use pal_async::task::Task; use pal_async::wait::PolledWait; @@ -38,10 +39,15 @@ use zerocopy::Immutable; use zerocopy::IntoBytes; use zerocopy::KnownLayout; +/// Strongly typed wrapper for PCI ID +#[derive(Debug, Clone, PartialEq, Eq, MeshPayload)] +pub struct PciId(pub String); + /// A device backend accessed via VFIO. #[derive(Inspect)] pub struct VfioDevice { pci_id: Arc, + debug_controller_id: Option, #[inspect(skip)] _container: vfio_sys::Container, #[inspect(skip)] @@ -73,9 +79,17 @@ impl VfioDevice { pub async fn new( driver_source: &VmTaskDriverSource, pci_id: &str, + debug_controller_id: Option, dma_client: Arc, ) -> anyhow::Result { - Self::restore(driver_source, pci_id, false, dma_client).await + Self::restore( + driver_source, + pci_id, + debug_controller_id, + false, + dma_client, + ) + .await } /// Creates a new VFIO-backed device for the PCI device with `pci_id`. @@ -83,6 +97,7 @@ impl VfioDevice { pub async fn restore( driver_source: &VmTaskDriverSource, pci_id: &str, + debug_controller_id: Option, keepalive: bool, dma_client: Arc, ) -> anyhow::Result { @@ -123,6 +138,7 @@ impl VfioDevice { let config_space = device.region_info(VFIO_PCI_CONFIG_REGION_INDEX)?; let this = Self { pci_id: pci_id.into(), + debug_controller_id, _container: container, _group: group, device: Arc::new(device), @@ -154,6 +170,11 @@ impl VfioDevice { Ok(()) } + /// Returns the debug controller ID for diagnostic purposes. + pub fn debug_controller_id(&self) -> Option<&str> { + self.debug_controller_id.as_deref() + } + pub fn read_config(&self, offset: u16) -> anyhow::Result { if offset as u64 > self.config_space.size - 4 { anyhow::bail!("invalid config offset"); From bdc7dd1d2385fc05d19e01d344c042ba7c573537 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 9 Jul 2025 00:27:38 +0000 Subject: [PATCH 6/9] Make VfioDevice use strongly typed PciId and Arc<str> for debug_controller_id Co-authored-by: mattkur <13772048+mattkur@users.noreply.github.com> --- libvfio.rlib | Bin 0 -> 6144 bytes openhcl/underhill_core/src/emuplat/netvsp.rs | 3 +- openhcl/underhill_core/src/nvme_manager.rs | 4 +-- vm/devices/user_driver/src/vfio.rs | 36 ++++++++++++------- 4 files changed, 28 insertions(+), 15 deletions(-) create mode 100644 libvfio.rlib diff --git a/libvfio.rlib b/libvfio.rlib new file mode 100644 index 0000000000000000000000000000000000000000..8e85167024bac5481c502d85ac8e863a2566ca1e GIT binary patch literal 6144 zcmcgQ3v?6Ll{1p85i%JGh;pMp(g;g)DzV zH#Vj#K4#yA!YBAEaMu| zP0#Mxcl6%7_rCjk_q})D8|lmVkh^}ayoCDmLQg;w@Whs)*zxf?IW|^GCMy}|c`#re zH&Q2W@%kEz*UWoQZrwK<4Jch9GeUE{?a!-BMhsE4_9DPECrJQtc02Zb-|KVmVjHQUS7BQw-QPC~(ba3R>XR|R+nHg83nJ)UPk!=j ze+^)NK*2E>(zPt~-X8b*vBkD0NtHlvjn+*Tuv z823q9zicOzHXUVr@!`||yt5QTj24%LvGJVI>NfElZ8Sx3OckHWnx?R4O>1LlT~m;E z3v1ksAt9Qb(-=wriSL>Aj88vEf4XrOQotBJENe9KCZo;j@_+(NbWOHI!~BXD9uiK3 zYSe89rSBp}+RC$TD`#?^+$ei{D#4=kG`?1^7gBbEkZJa#p+>r+QbS*ug%7>$OP|c5JF6}$FVE= z%)jm&Xny~Cbmfu_k0ExW&1N*(Og4^YZLHwtfPG3m-&F4kHMRz#^*>mwTDEWT_OzLY zFC5b=@5JZeV%)UZLNgwdU}DTJkWfD;tb9X5qdQ7X$(eEI_7i(9z415eD!OIU;x>7~ zWwCIud@S$b-N;2bS`v>C7Tn?Jt+&l9Pu`(^qre877ZrQVM?>*#;|Ms-)UFK_AxNX*8AA;H2Md6(c3SPPmR@9~6~246sk ze(_uU<3o8*zV`Efd|}DmQ-556BskVY8$E)>Yol4K*<=NYl(jy8gD?7I{#&=Yx4!gp z3;Aim3eD;d5jOl^7&3@e%PcNIu{i1_??59)fodEQmvqiz)JPTy!oKVNmw`_>X7i zy}n)gdTsQ^&PRW`2}q)Yk`_saWS3-{WUr)Cv~*teN?azPk{J$Vzp*0PcNW8vUXCeD zj{TqY$*BBq6(*xPIXroH6QwZT72lnVW7xcs`bNJ{5^4?vgir}eekF}TA<*LYkM}M9 zlCc#pX>JliYdj%e3-C6D+$He~Ez}gnsO^gn|1x~Jp!E4c^DlSZG6nCO(S@z}s}j7> zfLm~FHlD4`oLQ8inwgoOrODA}W>FdGxwF-pN_CcDmRhaO!3)ef+^{fHD#8n=t1_m4 zFC#xgn_`X@p-ISj#KHGnG0z=Pl_)^XaS(}jX3b-FG`;+w=JDhFe@Wp zZ)8%z^(Cp2rSji05ZgbIhgL>Ksj)N`Y&0G+Hthp9_|rAZ(b5IknQ02#k}C)4 z%IT~cI+J#~6fe;wsyQEKoexyX%vd! zE5$i(7J^xTMYu%^aV#mzGv%e;>&=zrfW=!8uA?53Pf3+c5#jmT+?;|^J2%(H5%`nJ z^uko^h4ntk`rgs@O~1bAXs-UI4Z}{RV5&COy2ffQY7VRmz#UWssk*tXXl?6YhgA}KeA>b+U`Wl3B z>VtveKx0@ahL{%zLy#sy;dZRJslF*33iB?2l6|{Bj>q4z4_5S7QBmOU`Fl^aVhp72 zk>b$36-^8jLpPQgI}?;dTQ`QBn=+9)&O1&l0O&zCK=s%=I=N-g#f9u7?I-CvprJLI zT#{cFW4?_KPsY<>C_64(@({)mtj(=B7J-v)IcM;49`^#NES+!4EH>~X=;9oOs$h_ zb9Krd5mh~(S~X9}q_Z;BA~}=0xjJ}QZnBh< zB)!my^d#V`okWL{Xi2DzOZ`GgggS|AeGO^(ko%_PbZ~TU-+aTfpNy<7DqFYx)8?!i zS9ir{pJQ0(9F!YT7GrdMGWE2Y-kvE13yw$}|3Gc+kH5*@fen2e#!a$L&EXM^HytnE z>|i&=da62!_BNuaZ~fE@nBosg@@6M#np&kh6xE*Ct?|++aklQ9L2>zjqr694KG@b1 z4iZnxi1zA8Fi3O+#Vu`-AQe#$P9v}Mk|VStKr7B&C$Bq6lk%|ktX^|UuRX-;y!00P zfo}VdgL_Y0cA&d#lh{6ZJ-wxlc)Fgj)ki|mc1D7|kzgIsQb@FXodRlGJ0ojkk@c!b zs4vpq*EQ$rb@J*k`5y<#E40`(uv;^_TQeAM&+wXKdd=Z3?J-{ZFTDxun;gjcWfE*CB`X+UvY@<&V1A!I*uoqZ|Tq=TOYYxOxLdF3E^-bsE%8=@yGb)zF$r$)4A!GUVEeQ$U9d)+ykVr-9t8;q6h zk8%Ssm}A*s^?U1O1bDR`0$3YSe|dzwQm6RCFl<%Dd0KIvQv9cr3@FKSGzsWe!_v#c zfTr9Cy0bw#hepw}MOYheX1>1TMUm+S`|<@?3lj&5$S_5iFtOtu63 ztd6t>BkJQ9$!oM?WSG2Mp}5=+R${og3C@!$vZfEV3W{)e2$r&IQzbVXgV2@3N)1*V z?$;jMtsSn^osDX|d2X5z&{Jk)s{X!LzlT-s!H&Kw#heQrRE^LgG%OF-_*|=+gau1i#y@B&cyl-T;7jW- z{@sEZWH`N1DKW^p=vam1qp1$P$|ULTkrWgftkw^$FT8JixAc7`%k=)xDwi4aGKI&>Y$Q(4|Urv>{fB zJ(+1fS4UoSLe5r589z4l44MF9%7ZXzl}7aNN$Me&DPV)cuf;DOa3P;R5q$a+04 zmx*S~MvKkkwRz2MuVAxUyo`%>nN226V0q@-p3dL?ha|Ea|HIGy9XDQF;MQ8u_u2a^2}a<5 z(A%p{mQb{Ml0LK#CKiOFI{E9MVj|N7IDXMiz-IwGrn7SbdF+jaVFxBXj3?WtzD54d zB)O#S$?}K|(cp>mXC*0*a>c}wlu!2PO_78r^358H-$WXMC-ZVZ6HR6UoF0=;`i0~W LUs5ePZ>an?u)Am2 literal 0 HcmV?d00001 diff --git a/openhcl/underhill_core/src/emuplat/netvsp.rs b/openhcl/underhill_core/src/emuplat/netvsp.rs index be6a015ed9..d277848acb 100644 --- a/openhcl/underhill_core/src/emuplat/netvsp.rs +++ b/openhcl/underhill_core/src/emuplat/netvsp.rs @@ -39,6 +39,7 @@ use tracing::Instrument; use uevent::UeventListener; use user_driver::DmaClient; use user_driver::vfio::PciDeviceResetMethod; +use user_driver::vfio::PciId; use user_driver::vfio::VfioDevice; use user_driver::vfio::vfio_set_device_reset_method; use vmcore::vm_task::VmTaskDriverSource; @@ -120,7 +121,7 @@ async fn try_create_mana_device( max_sub_channels: u16, dma_client: Arc, ) -> anyhow::Result> { - let device = VfioDevice::new(driver_source, pci_id, None, dma_client) + let device = VfioDevice::new(driver_source, PciId(pci_id.to_string()), None, dma_client) .await .context("failed to open device")?; diff --git a/openhcl/underhill_core/src/nvme_manager.rs b/openhcl/underhill_core/src/nvme_manager.rs index 5ee780166c..a5b23276a1 100644 --- a/openhcl/underhill_core/src/nvme_manager.rs +++ b/openhcl/underhill_core/src/nvme_manager.rs @@ -351,7 +351,7 @@ impl NvmeManagerWorker { let device = VfioDevice::new( &self.driver_source, - entry.key(), + PciId(entry.key().clone()), Some(name.clone()), dma_client, ) @@ -436,7 +436,7 @@ impl NvmeManagerWorker { // until it is ready, but a redesign of VfioDevice is needed. let vfio_device = VfioDevice::restore( &self.driver_source, - &disk.pci_id.clone(), + disk.pci_id.clone(), Some(format!("restored-{}", pci_id)), true, dma_client, diff --git a/vm/devices/user_driver/src/vfio.rs b/vm/devices/user_driver/src/vfio.rs index 36a2cc0311..1d95d37c78 100644 --- a/vm/devices/user_driver/src/vfio.rs +++ b/vm/devices/user_driver/src/vfio.rs @@ -43,11 +43,18 @@ use zerocopy::KnownLayout; #[derive(Debug, Clone, PartialEq, Eq, MeshPayload)] pub struct PciId(pub String); +impl std::fmt::Display for PciId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + /// A device backend accessed via VFIO. #[derive(Inspect)] pub struct VfioDevice { - pci_id: Arc, - debug_controller_id: Option, + #[inspect(skip)] + pci_id: PciId, + debug_controller_id: Option>, #[inspect(skip)] _container: vfio_sys::Container, #[inspect(skip)] @@ -78,7 +85,7 @@ impl VfioDevice { /// Creates a new VFIO-backed device for the PCI device with `pci_id`. pub async fn new( driver_source: &VmTaskDriverSource, - pci_id: &str, + pci_id: PciId, debug_controller_id: Option, dma_client: Arc, ) -> anyhow::Result { @@ -96,12 +103,12 @@ impl VfioDevice { /// or creates a device from the saved state if provided. pub async fn restore( driver_source: &VmTaskDriverSource, - pci_id: &str, + pci_id: PciId, debug_controller_id: Option, keepalive: bool, dma_client: Arc, ) -> anyhow::Result { - let path = Path::new("/sys/bus/pci/devices").join(pci_id); + let path = Path::new("/sys/bus/pci/devices").join(&pci_id.0); // The vfio device attaches asynchronously after the PCI device is added, // so make sure that it has completed by checking for the vfio-dev subpath. @@ -127,9 +134,9 @@ impl VfioDevice { container.set_iommu(IommuType::NoIommu)?; if keepalive { // Prevent physical hardware interaction when restoring. - group.set_keep_alive(pci_id)?; + group.set_keep_alive(&pci_id.0)?; } - let device = group.open_device(pci_id)?; + let device = group.open_device(&pci_id.0)?; let msix_info = device.irq_info(vfio_bindings::bindings::vfio::VFIO_PCI_MSIX_IRQ_INDEX)?; if msix_info.flags.noresize() { anyhow::bail!("unsupported: kernel does not support dynamic msix allocation"); @@ -137,8 +144,8 @@ impl VfioDevice { let config_space = device.region_info(VFIO_PCI_CONFIG_REGION_INDEX)?; let this = Self { - pci_id: pci_id.into(), - debug_controller_id, + pci_id, + debug_controller_id: debug_controller_id.map(|s| s.into()), _container: container, _group: group, device: Arc::new(device), @@ -175,6 +182,11 @@ impl VfioDevice { self.debug_controller_id.as_deref() } + /// Returns the PCI ID for inspection purposes. + pub fn pci_id(&self) -> &str { + &self.pci_id.0 + } + pub fn read_config(&self, offset: u16) -> anyhow::Result { if offset as u64 > self.config_space.size - 4 { anyhow::bail!("invalid config offset"); @@ -245,7 +257,7 @@ impl DeviceBacking for VfioDevice { type Registers = MappedRegionWithFallback; fn id(&self) -> &str { - &self.pci_id + &self.pci_id.0 } fn map_bar(&mut self, n: u8) -> anyhow::Result { @@ -298,7 +310,7 @@ impl DeviceBacking for VfioDevice { // The interrupt's CPU affinity will be set by the task when it // starts. This can block the thread briefly, so it's better to do // it on the target CPU. - let irq = vfio_sys::find_msix_irq(&self.pci_id, msix) + let irq = vfio_sys::find_msix_irq(&self.pci_id.0, msix) .context("failed to find irq for msix")?; let target_cpu = Arc::new(AtomicU32::new(cpu)); @@ -310,7 +322,7 @@ impl DeviceBacking for VfioDevice { InterruptTask { driver: driver.clone(), target_cpu: target_cpu.clone(), - pci_id: self.pci_id.clone(), + pci_id: self.pci_id.0.clone().into(), msix, irq, event, From 38bc76efd46ab656c37ed2ab4f7ff8ae9e8a637d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 14 Jul 2025 12:49:11 +0000 Subject: [PATCH 7/9] Simplify GetNamespaceParams to use String types and rename name to debug_host_id Co-authored-by: mattkur <13772048+mattkur@users.noreply.github.com> --- openhcl/underhill_core/src/nvme_manager.rs | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/openhcl/underhill_core/src/nvme_manager.rs b/openhcl/underhill_core/src/nvme_manager.rs index a5b23276a1..cdd15808d4 100644 --- a/openhcl/underhill_core/src/nvme_manager.rs +++ b/openhcl/underhill_core/src/nvme_manager.rs @@ -30,15 +30,11 @@ use thiserror::Error; use tracing::Instrument; use user_driver::vfio::PciId; -/// Strongly typed wrapper for NVMe device name (formerly debug_id/controller_instance_id) -#[derive(Debug, Clone, PartialEq, Eq, MeshPayload)] -pub struct NvmeDeviceName(pub String); - /// Parameters for getting an NVMe namespace #[derive(Debug, Clone, MeshPayload)] pub struct GetNamespaceParams { - pub name: NvmeDeviceName, - pub pci_id: PciId, + pub debug_host_id: String, + pub pci_id: String, pub nsid: u32, } use user_driver::vfio::VfioDevice; @@ -204,13 +200,13 @@ pub struct NvmeManagerClient { impl NvmeManagerClient { pub async fn get_namespace( &self, - name: String, + debug_host_id: String, pci_id: String, nsid: u32, ) -> anyhow::Result { let params = GetNamespaceParams { - name: NvmeDeviceName(name), - pci_id: PciId(pci_id), + debug_host_id, + pci_id, nsid, }; Ok(self @@ -218,8 +214,8 @@ impl NvmeManagerClient { .call(Request::GetNamespace, params.clone()) .instrument(tracing::info_span!( "nvme_get_namespace", - name = params.name.0, - pci_id = params.pci_id.0, + debug_host_id = params.debug_host_id, + pci_id = params.pci_id, nsid )) .await @@ -274,9 +270,9 @@ impl NvmeManagerWorker { } Request::GetNamespace(rpc) => { rpc.handle(async |params| { - self.get_namespace(params.name.0, params.pci_id.0.clone(), params.nsid) + self.get_namespace(params.debug_host_id, params.pci_id.clone(), params.nsid) .map_err(|source| NamespaceError { - pci_id: params.pci_id.0, + pci_id: params.pci_id, source, }) .await From e83946ad2f1ed961cc7fede4d4e635038517b622 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 14 Jul 2025 13:07:24 +0000 Subject: [PATCH 8/9] Address feedback: Make VfioDevice pci_id inspectable Arc, rename debug_controller_id to debug_bus_id Co-authored-by: mattkur <13772048+mattkur@users.noreply.github.com> --- openhcl/underhill_core/src/emuplat/netvsp.rs | 3 +- openhcl/underhill_core/src/nvme_manager.rs | 5 +-- vm/devices/user_driver/src/vfio.rs | 46 ++++++++------------ 3 files changed, 22 insertions(+), 32 deletions(-) diff --git a/openhcl/underhill_core/src/emuplat/netvsp.rs b/openhcl/underhill_core/src/emuplat/netvsp.rs index d277848acb..b79ea6fff8 100644 --- a/openhcl/underhill_core/src/emuplat/netvsp.rs +++ b/openhcl/underhill_core/src/emuplat/netvsp.rs @@ -39,7 +39,6 @@ use tracing::Instrument; use uevent::UeventListener; use user_driver::DmaClient; use user_driver::vfio::PciDeviceResetMethod; -use user_driver::vfio::PciId; use user_driver::vfio::VfioDevice; use user_driver::vfio::vfio_set_device_reset_method; use vmcore::vm_task::VmTaskDriverSource; @@ -121,7 +120,7 @@ async fn try_create_mana_device( max_sub_channels: u16, dma_client: Arc, ) -> anyhow::Result> { - let device = VfioDevice::new(driver_source, PciId(pci_id.to_string()), None, dma_client) + let device = VfioDevice::new(driver_source, pci_id.to_string().into(), None, dma_client) .await .context("failed to open device")?; diff --git a/openhcl/underhill_core/src/nvme_manager.rs b/openhcl/underhill_core/src/nvme_manager.rs index cdd15808d4..58d88023d2 100644 --- a/openhcl/underhill_core/src/nvme_manager.rs +++ b/openhcl/underhill_core/src/nvme_manager.rs @@ -28,7 +28,6 @@ use std::collections::HashMap; use std::collections::hash_map; use thiserror::Error; use tracing::Instrument; -use user_driver::vfio::PciId; /// Parameters for getting an NVMe namespace #[derive(Debug, Clone, MeshPayload)] @@ -347,7 +346,7 @@ impl NvmeManagerWorker { let device = VfioDevice::new( &self.driver_source, - PciId(entry.key().clone()), + entry.key().clone().into(), Some(name.clone()), dma_client, ) @@ -432,7 +431,7 @@ impl NvmeManagerWorker { // until it is ready, but a redesign of VfioDevice is needed. let vfio_device = VfioDevice::restore( &self.driver_source, - disk.pci_id.clone(), + disk.pci_id.clone().into(), Some(format!("restored-{}", pci_id)), true, dma_client, diff --git a/vm/devices/user_driver/src/vfio.rs b/vm/devices/user_driver/src/vfio.rs index 1d95d37c78..11f2ada371 100644 --- a/vm/devices/user_driver/src/vfio.rs +++ b/vm/devices/user_driver/src/vfio.rs @@ -52,9 +52,8 @@ impl std::fmt::Display for PciId { /// A device backend accessed via VFIO. #[derive(Inspect)] pub struct VfioDevice { - #[inspect(skip)] - pci_id: PciId, - debug_controller_id: Option>, + pci_id: Arc, + debug_bus_id: Option>, #[inspect(skip)] _container: vfio_sys::Container, #[inspect(skip)] @@ -85,30 +84,23 @@ impl VfioDevice { /// Creates a new VFIO-backed device for the PCI device with `pci_id`. pub async fn new( driver_source: &VmTaskDriverSource, - pci_id: PciId, - debug_controller_id: Option, + pci_id: Arc, + debug_bus_id: Option, dma_client: Arc, ) -> anyhow::Result { - Self::restore( - driver_source, - pci_id, - debug_controller_id, - false, - dma_client, - ) - .await + Self::restore(driver_source, pci_id, debug_bus_id, false, dma_client).await } /// Creates a new VFIO-backed device for the PCI device with `pci_id`. /// or creates a device from the saved state if provided. pub async fn restore( driver_source: &VmTaskDriverSource, - pci_id: PciId, - debug_controller_id: Option, + pci_id: Arc, + debug_bus_id: Option, keepalive: bool, dma_client: Arc, ) -> anyhow::Result { - let path = Path::new("/sys/bus/pci/devices").join(&pci_id.0); + let path = Path::new("/sys/bus/pci/devices").join(&*pci_id); // The vfio device attaches asynchronously after the PCI device is added, // so make sure that it has completed by checking for the vfio-dev subpath. @@ -134,9 +126,9 @@ impl VfioDevice { container.set_iommu(IommuType::NoIommu)?; if keepalive { // Prevent physical hardware interaction when restoring. - group.set_keep_alive(&pci_id.0)?; + group.set_keep_alive(&pci_id)?; } - let device = group.open_device(&pci_id.0)?; + let device = group.open_device(&pci_id)?; let msix_info = device.irq_info(vfio_bindings::bindings::vfio::VFIO_PCI_MSIX_IRQ_INDEX)?; if msix_info.flags.noresize() { anyhow::bail!("unsupported: kernel does not support dynamic msix allocation"); @@ -145,7 +137,7 @@ impl VfioDevice { let config_space = device.region_info(VFIO_PCI_CONFIG_REGION_INDEX)?; let this = Self { pci_id, - debug_controller_id: debug_controller_id.map(|s| s.into()), + debug_bus_id: debug_bus_id.map(|s| s.into()), _container: container, _group: group, device: Arc::new(device), @@ -178,13 +170,13 @@ impl VfioDevice { } /// Returns the debug controller ID for diagnostic purposes. - pub fn debug_controller_id(&self) -> Option<&str> { - self.debug_controller_id.as_deref() + pub fn debug_bus_id(&self) -> Option<&str> { + self.debug_bus_id.as_deref() } /// Returns the PCI ID for inspection purposes. pub fn pci_id(&self) -> &str { - &self.pci_id.0 + &self.pci_id } pub fn read_config(&self, offset: u16) -> anyhow::Result { @@ -257,7 +249,7 @@ impl DeviceBacking for VfioDevice { type Registers = MappedRegionWithFallback; fn id(&self) -> &str { - &self.pci_id.0 + &self.pci_id } fn map_bar(&mut self, n: u8) -> anyhow::Result { @@ -291,7 +283,7 @@ impl DeviceBacking for VfioDevice { } let new_interrupt = { - let name = format!("vfio-interrupt-{pci_id}-{msix}", pci_id = self.pci_id); + let name = format!("vfio-interrupt-{pci_id}-{msix}", pci_id = &*self.pci_id); let driver = self .driver_source .builder() @@ -310,7 +302,7 @@ impl DeviceBacking for VfioDevice { // The interrupt's CPU affinity will be set by the task when it // starts. This can block the thread briefly, so it's better to do // it on the target CPU. - let irq = vfio_sys::find_msix_irq(&self.pci_id.0, msix) + let irq = vfio_sys::find_msix_irq(&self.pci_id, msix) .context("failed to find irq for msix")?; let target_cpu = Arc::new(AtomicU32::new(cpu)); @@ -322,7 +314,7 @@ impl DeviceBacking for VfioDevice { InterruptTask { driver: driver.clone(), target_cpu: target_cpu.clone(), - pci_id: self.pci_id.0.clone().into(), + pci_id: self.pci_id.clone(), msix, irq, event, @@ -380,7 +372,7 @@ impl InterruptTask { // However, it is not a fatal error--it will just result in // worse performance--so do not panic. tracing::error!( - pci_id = self.pci_id.as_ref(), + pci_id = &*self.pci_id, msix = self.msix, irq = self.irq, error = &err as &dyn std::error::Error, From f12f10a89dc516aa6d0dc019073637166261c34c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 14 Jul 2025 15:37:50 +0000 Subject: [PATCH 9/9] Address final feedback: Remove libvfio.rlib, remove PciId type, use &str parameters in VfioDevice constructor Co-authored-by: mattkur <13772048+mattkur@users.noreply.github.com> --- libvfio.rlib | Bin 6144 -> 0 bytes openhcl/underhill_core/src/emuplat/netvsp.rs | 2 +- openhcl/underhill_core/src/nvme_manager.rs | 18 +++++------- vm/devices/user_driver/src/vfio.rs | 28 ++++--------------- 4 files changed, 14 insertions(+), 34 deletions(-) delete mode 100644 libvfio.rlib diff --git a/libvfio.rlib b/libvfio.rlib deleted file mode 100644 index 8e85167024bac5481c502d85ac8e863a2566ca1e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6144 zcmcgQ3v?6Ll{1p85i%JGh;pMp(g;g)DzV zH#Vj#K4#yA!YBAEaMu| zP0#Mxcl6%7_rCjk_q})D8|lmVkh^}ayoCDmLQg;w@Whs)*zxf?IW|^GCMy}|c`#re zH&Q2W@%kEz*UWoQZrwK<4Jch9GeUE{?a!-BMhsE4_9DPECrJQtc02Zb-|KVmVjHQUS7BQw-QPC~(ba3R>XR|R+nHg83nJ)UPk!=j ze+^)NK*2E>(zPt~-X8b*vBkD0NtHlvjn+*Tuv z823q9zicOzHXUVr@!`||yt5QTj24%LvGJVI>NfElZ8Sx3OckHWnx?R4O>1LlT~m;E z3v1ksAt9Qb(-=wriSL>Aj88vEf4XrOQotBJENe9KCZo;j@_+(NbWOHI!~BXD9uiK3 zYSe89rSBp}+RC$TD`#?^+$ei{D#4=kG`?1^7gBbEkZJa#p+>r+QbS*ug%7>$OP|c5JF6}$FVE= z%)jm&Xny~Cbmfu_k0ExW&1N*(Og4^YZLHwtfPG3m-&F4kHMRz#^*>mwTDEWT_OzLY zFC5b=@5JZeV%)UZLNgwdU}DTJkWfD;tb9X5qdQ7X$(eEI_7i(9z415eD!OIU;x>7~ zWwCIud@S$b-N;2bS`v>C7Tn?Jt+&l9Pu`(^qre877ZrQVM?>*#;|Ms-)UFK_AxNX*8AA;H2Md6(c3SPPmR@9~6~246sk ze(_uU<3o8*zV`Efd|}DmQ-556BskVY8$E)>Yol4K*<=NYl(jy8gD?7I{#&=Yx4!gp z3;Aim3eD;d5jOl^7&3@e%PcNIu{i1_??59)fodEQmvqiz)JPTy!oKVNmw`_>X7i zy}n)gdTsQ^&PRW`2}q)Yk`_saWS3-{WUr)Cv~*teN?azPk{J$Vzp*0PcNW8vUXCeD zj{TqY$*BBq6(*xPIXroH6QwZT72lnVW7xcs`bNJ{5^4?vgir}eekF}TA<*LYkM}M9 zlCc#pX>JliYdj%e3-C6D+$He~Ez}gnsO^gn|1x~Jp!E4c^DlSZG6nCO(S@z}s}j7> zfLm~FHlD4`oLQ8inwgoOrODA}W>FdGxwF-pN_CcDmRhaO!3)ef+^{fHD#8n=t1_m4 zFC#xgn_`X@p-ISj#KHGnG0z=Pl_)^XaS(}jX3b-FG`;+w=JDhFe@Wp zZ)8%z^(Cp2rSji05ZgbIhgL>Ksj)N`Y&0G+Hthp9_|rAZ(b5IknQ02#k}C)4 z%IT~cI+J#~6fe;wsyQEKoexyX%vd! zE5$i(7J^xTMYu%^aV#mzGv%e;>&=zrfW=!8uA?53Pf3+c5#jmT+?;|^J2%(H5%`nJ z^uko^h4ntk`rgs@O~1bAXs-UI4Z}{RV5&COy2ffQY7VRmz#UWssk*tXXl?6YhgA}KeA>b+U`Wl3B z>VtveKx0@ahL{%zLy#sy;dZRJslF*33iB?2l6|{Bj>q4z4_5S7QBmOU`Fl^aVhp72 zk>b$36-^8jLpPQgI}?;dTQ`QBn=+9)&O1&l0O&zCK=s%=I=N-g#f9u7?I-CvprJLI zT#{cFW4?_KPsY<>C_64(@({)mtj(=B7J-v)IcM;49`^#NES+!4EH>~X=;9oOs$h_ zb9Krd5mh~(S~X9}q_Z;BA~}=0xjJ}QZnBh< zB)!my^d#V`okWL{Xi2DzOZ`GgggS|AeGO^(ko%_PbZ~TU-+aTfpNy<7DqFYx)8?!i zS9ir{pJQ0(9F!YT7GrdMGWE2Y-kvE13yw$}|3Gc+kH5*@fen2e#!a$L&EXM^HytnE z>|i&=da62!_BNuaZ~fE@nBosg@@6M#np&kh6xE*Ct?|++aklQ9L2>zjqr694KG@b1 z4iZnxi1zA8Fi3O+#Vu`-AQe#$P9v}Mk|VStKr7B&C$Bq6lk%|ktX^|UuRX-;y!00P zfo}VdgL_Y0cA&d#lh{6ZJ-wxlc)Fgj)ki|mc1D7|kzgIsQb@FXodRlGJ0ojkk@c!b zs4vpq*EQ$rb@J*k`5y<#E40`(uv;^_TQeAM&+wXKdd=Z3?J-{ZFTDxun;gjcWfE*CB`X+UvY@<&V1A!I*uoqZ|Tq=TOYYxOxLdF3E^-bsE%8=@yGb)zF$r$)4A!GUVEeQ$U9d)+ykVr-9t8;q6h zk8%Ssm}A*s^?U1O1bDR`0$3YSe|dzwQm6RCFl<%Dd0KIvQv9cr3@FKSGzsWe!_v#c zfTr9Cy0bw#hepw}MOYheX1>1TMUm+S`|<@?3lj&5$S_5iFtOtu63 ztd6t>BkJQ9$!oM?WSG2Mp}5=+R${og3C@!$vZfEV3W{)e2$r&IQzbVXgV2@3N)1*V z?$;jMtsSn^osDX|d2X5z&{Jk)s{X!LzlT-s!H&Kw#heQrRE^LgG%OF-_*|=+gau1i#y@B&cyl-T;7jW- z{@sEZWH`N1DKW^p=vam1qp1$P$|ULTkrWgftkw^$FT8JixAc7`%k=)xDwi4aGKI&>Y$Q(4|Urv>{fB zJ(+1fS4UoSLe5r589z4l44MF9%7ZXzl}7aNN$Me&DPV)cuf;DOa3P;R5q$a+04 zmx*S~MvKkkwRz2MuVAxUyo`%>nN226V0q@-p3dL?ha|Ea|HIGy9XDQF;MQ8u_u2a^2}a<5 z(A%p{mQb{Ml0LK#CKiOFI{E9MVj|N7IDXMiz-IwGrn7SbdF+jaVFxBXj3?WtzD54d zB)O#S$?}K|(cp>mXC*0*a>c}wlu!2PO_78r^358H-$WXMC-ZVZ6HR6UoF0=;`i0~W LUs5ePZ>an?u)Am2 diff --git a/openhcl/underhill_core/src/emuplat/netvsp.rs b/openhcl/underhill_core/src/emuplat/netvsp.rs index b79ea6fff8..5e6c027df5 100644 --- a/openhcl/underhill_core/src/emuplat/netvsp.rs +++ b/openhcl/underhill_core/src/emuplat/netvsp.rs @@ -120,7 +120,7 @@ async fn try_create_mana_device( max_sub_channels: u16, dma_client: Arc, ) -> anyhow::Result> { - let device = VfioDevice::new(driver_source, pci_id.to_string().into(), None, dma_client) + let device = VfioDevice::new(driver_source, &pci_id.to_string(), None, dma_client) .await .context("failed to open device")?; diff --git a/openhcl/underhill_core/src/nvme_manager.rs b/openhcl/underhill_core/src/nvme_manager.rs index 58d88023d2..a2c366679e 100644 --- a/openhcl/underhill_core/src/nvme_manager.rs +++ b/openhcl/underhill_core/src/nvme_manager.rs @@ -344,15 +344,11 @@ impl NvmeManagerWorker { }) .map_err(InnerError::DmaClient)?; - let device = VfioDevice::new( - &self.driver_source, - entry.key().clone().into(), - Some(name.clone()), - dma_client, - ) - .instrument(tracing::info_span!("vfio_device_open", pci_id)) - .await - .map_err(InnerError::Vfio)?; + let device = + VfioDevice::new(&self.driver_source, entry.key(), Some(&name), dma_client) + .instrument(tracing::info_span!("vfio_device_open", pci_id)) + .await + .map_err(InnerError::Vfio)?; // TODO: For now, any isolation means use bounce buffering. This // needs to change when we have nvme devices that support DMA to @@ -431,8 +427,8 @@ impl NvmeManagerWorker { // until it is ready, but a redesign of VfioDevice is needed. let vfio_device = VfioDevice::restore( &self.driver_source, - disk.pci_id.clone().into(), - Some(format!("restored-{}", pci_id)), + &disk.pci_id, + Some(&format!("restored-{}", pci_id)), true, dma_client, ) diff --git a/vm/devices/user_driver/src/vfio.rs b/vm/devices/user_driver/src/vfio.rs index 11f2ada371..70e88c675c 100644 --- a/vm/devices/user_driver/src/vfio.rs +++ b/vm/devices/user_driver/src/vfio.rs @@ -16,7 +16,6 @@ use futures::FutureExt; use futures_concurrency::future::Race; use inspect::Inspect; use inspect_counters::SharedCounter; -use mesh::MeshPayload; use pal_async::task::Spawn; use pal_async::task::Task; use pal_async::wait::PolledWait; @@ -39,16 +38,6 @@ use zerocopy::Immutable; use zerocopy::IntoBytes; use zerocopy::KnownLayout; -/// Strongly typed wrapper for PCI ID -#[derive(Debug, Clone, PartialEq, Eq, MeshPayload)] -pub struct PciId(pub String); - -impl std::fmt::Display for PciId { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - self.0.fmt(f) - } -} - /// A device backend accessed via VFIO. #[derive(Inspect)] pub struct VfioDevice { @@ -84,8 +73,8 @@ impl VfioDevice { /// Creates a new VFIO-backed device for the PCI device with `pci_id`. pub async fn new( driver_source: &VmTaskDriverSource, - pci_id: Arc, - debug_bus_id: Option, + pci_id: &str, + debug_bus_id: Option<&str>, dma_client: Arc, ) -> anyhow::Result { Self::restore(driver_source, pci_id, debug_bus_id, false, dma_client).await @@ -95,12 +84,12 @@ impl VfioDevice { /// or creates a device from the saved state if provided. pub async fn restore( driver_source: &VmTaskDriverSource, - pci_id: Arc, - debug_bus_id: Option, + pci_id: &str, + debug_bus_id: Option<&str>, keepalive: bool, dma_client: Arc, ) -> anyhow::Result { - let path = Path::new("/sys/bus/pci/devices").join(&*pci_id); + let path = Path::new("/sys/bus/pci/devices").join(pci_id); // The vfio device attaches asynchronously after the PCI device is added, // so make sure that it has completed by checking for the vfio-dev subpath. @@ -136,7 +125,7 @@ impl VfioDevice { let config_space = device.region_info(VFIO_PCI_CONFIG_REGION_INDEX)?; let this = Self { - pci_id, + pci_id: pci_id.into(), debug_bus_id: debug_bus_id.map(|s| s.into()), _container: container, _group: group, @@ -174,11 +163,6 @@ impl VfioDevice { self.debug_bus_id.as_deref() } - /// Returns the PCI ID for inspection purposes. - pub fn pci_id(&self) -> &str { - &self.pci_id - } - pub fn read_config(&self, offset: u16) -> anyhow::Result { if offset as u64 > self.config_space.size - 4 { anyhow::bail!("invalid config offset");