Skip to content

Commit f32b124

Browse files
authored
Print full CSTS value when NVMe device failed to init (#1401) (#1673)
With the emulated devices likely it will be 0x00000000 or 0xFFFFFFFF but we need to distinguish the cases for further diagnostics. Cherry-pick from #1401
1 parent a925779 commit f32b124

File tree

2 files changed

+32
-13
lines changed

2 files changed

+32
-13
lines changed

vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -218,15 +218,15 @@ impl<T: DeviceBacking> NvmeDriver<T> {
218218

219219
let cc = bar0.cc();
220220
if cc.en() || bar0.csts().rdy() {
221-
if !bar0
221+
if let Err(e) = bar0
222222
.reset(&driver)
223223
.instrument(tracing::info_span!(
224224
"nvme_already_enabled",
225225
pci_id = device.id().to_owned()
226226
))
227227
.await
228228
{
229-
anyhow::bail!("device is gone");
229+
anyhow::bail!("device is gone, csts: {:#x}", e);
230230
}
231231
}
232232

@@ -328,12 +328,22 @@ impl<T: DeviceBacking> NvmeDriver<T> {
328328
let mut backoff = Backoff::new(&self.driver);
329329
loop {
330330
let csts = worker.registers.bar0.csts();
331-
if u32::from(csts) == !0 {
332-
anyhow::bail!("device is gone");
331+
let csts_val: u32 = csts.into();
332+
if csts_val == !0 {
333+
anyhow::bail!("device is gone, csts: {:#x}", csts_val);
333334
}
334335
if csts.cfs() {
335-
worker.registers.bar0.reset(&self.driver).await;
336-
anyhow::bail!("device had fatal error");
336+
// Attempt to leave the device in reset state CC.EN 1 -> 0.
337+
let after_reset = if let Err(e) = worker.registers.bar0.reset(&self.driver).await {
338+
e
339+
} else {
340+
0
341+
};
342+
anyhow::bail!(
343+
"device had fatal error, csts: {:#x}, after reset: {:#}",
344+
csts_val,
345+
after_reset
346+
);
337347
}
338348
if csts.rdy() {
339349
break;
@@ -485,7 +495,9 @@ impl<T: DeviceBacking> NvmeDriver<T> {
485495
if let Some(admin) = worker.admin {
486496
_admin_responses = admin.shutdown().await;
487497
}
488-
worker.registers.bar0.reset(&driver).await;
498+
if let Err(e) = worker.registers.bar0.reset(&driver).await {
499+
tracing::info!(csts = e, "device reset failed");
500+
}
489501
}
490502
}
491503

@@ -563,9 +575,13 @@ impl<T: DeviceBacking> NvmeDriver<T> {
563575
.context("failed to map device registers")?;
564576
let bar0 = Bar0(bar0_mapping);
565577

566-
// It is expected the device to be alive when restoring.
567-
if !bar0.csts().rdy() {
568-
anyhow::bail!("device is gone");
578+
// It is expected for the device to be alive when restoring.
579+
let csts = bar0.csts();
580+
if !csts.rdy() {
581+
anyhow::bail!(
582+
"device is not ready during restore, csts: {:#x}",
583+
u32::from(csts)
584+
);
569585
}
570586

571587
let registers = Arc::new(DeviceRegisters::new(bar0));

vm/devices/storage/disk_nvme/nvme_driver/src/registers.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,17 +104,20 @@ impl<T: DeviceRegisterIo + Inspect> Bar0<T> {
104104
reg32!(aqa, set_aqa, AQA, spec::Aqa);
105105

106106
#[instrument(skip_all)]
107-
pub async fn reset(&self, driver: &dyn Driver) -> bool {
107+
pub async fn reset(&self, driver: &dyn Driver) -> Result<(), u32> {
108108
let cc = self.cc().with_en(false);
109109
self.set_cc(cc);
110110
let mut backoff = Backoff::new(driver);
111+
// Loop until either RDY bit is cleared
112+
// or CSTS read returns -1 which means
113+
// failure in emulation layer.
111114
loop {
112115
let csts = self.csts();
113116
if !csts.rdy() {
114-
break true;
117+
break Ok(());
115118
}
116119
if u32::from(csts) == !0 {
117-
break false;
120+
break Err(!0);
118121
}
119122
backoff.back_off().await;
120123
}

0 commit comments

Comments
 (0)