Skip to content

Commit c1b0a9c

Browse files
committed
Use guest_memfd to back memory if secret freedom is enabled
If the `secret_free` field of the memory_config is set to true in the /machine-config endpoint, back all memory regions using guest_memfd. For our setup, this means both setting the guest_memfd[_offset] fields in kvm_user_memory_region2, as well as mmaping the guest memory and reflecting this VMA back into the memslot's userspace_addr (which is how kvm internal accesses to guest memory will work for these guest_memfd regions, such as mmio emulation on x86). Signed-off-by: Patrick Roy <roypat@amazon.co.uk>
1 parent 5fba4ea commit c1b0a9c

File tree

6 files changed

+96
-48
lines changed

6 files changed

+96
-48
lines changed

src/vmm/benches/memory_access.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ fn bench_single_page_fault(c: &mut Criterion, configuration: VmResources) {
99
c.bench_function("page_fault", |b| {
1010
b.iter_batched(
1111
|| {
12-
let memory = configuration.allocate_guest_memory().unwrap();
12+
let memory = configuration.allocate_guest_memory(None).unwrap();
1313
// Get a pointer to the first memory region (cannot do `.get_slice(GuestAddress(0),
1414
// 1)`, because on ARM64 guest memory does not start at physical
1515
// address 0).

src/vmm/src/builder.rs

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ use crate::vmm_config::machine_config::MachineConfigError;
6262
use crate::vstate::kvm::Kvm;
6363
use crate::vstate::memory::{GuestRegionMmap, MaybeBounce};
6464
use crate::vstate::vcpu::{Vcpu, VcpuError};
65-
use crate::vstate::vm::Vm;
65+
use crate::vstate::vm::{KVM_GMEM_NO_DIRECT_MAP, Vm};
6666
use crate::{EventManager, Vmm, VmmError, device_manager};
6767

6868
/// Errors associated with starting the instance.
@@ -217,10 +217,6 @@ pub fn build_microvm_for_boot(
217217
.as_ref()
218218
.ok_or(MissingKernelConfig)?;
219219

220-
let guest_memory = vm_resources
221-
.allocate_guest_memory()
222-
.map_err(StartMicrovmError::GuestMemory)?;
223-
224220
// Clone the command-line so that a failed boot doesn't pollute the original.
225221
#[allow(unused_mut)]
226222
let mut boot_cmdline = boot_config.cmdline.clone();
@@ -230,6 +226,8 @@ pub fn build_microvm_for_boot(
230226
.cpu_template
231227
.get_cpu_template()?;
232228

229+
let secret_free = vm_resources.machine_config.secret_free;
230+
233231
let (mut vmm, mut vcpus) = create_vmm_and_vcpus(
234232
instance_info,
235233
event_manager,
@@ -238,15 +236,25 @@ pub fn build_microvm_for_boot(
238236
vm_resources.machine_config.secret_free,
239237
)?;
240238

239+
let guest_memfd = match secret_free {
240+
true => Some(
241+
vmm.vm
242+
.create_guest_memfd(vm_resources.memory_size(), KVM_GMEM_NO_DIRECT_MAP)
243+
.map_err(VmmError::Vm)?,
244+
),
245+
false => None,
246+
};
247+
248+
let guest_memory = vm_resources
249+
.allocate_guest_memory(guest_memfd)
250+
.map_err(StartMicrovmError::GuestMemory)?;
251+
241252
vmm.vm
242253
.register_memory_regions(guest_memory)
243254
.map_err(VmmError::Vm)?;
244255

245256
let entry_point = load_kernel(
246-
MaybeBounce::new(
247-
boot_config.kernel_file.try_clone().unwrap(),
248-
vmm.vm.secret_free(),
249-
),
257+
MaybeBounce::new(boot_config.kernel_file.try_clone().unwrap(), secret_free),
250258
vmm.vm.guest_memory(),
251259
)?;
252260
let initrd = match &boot_config.initrd_file {
@@ -258,7 +266,7 @@ pub fn build_microvm_for_boot(
258266

259267
Some(InitrdConfig::from_reader(
260268
vmm.vm.guest_memory(),
261-
MaybeBounce::new(initrd_file.as_fd(), vmm.vm.secret_free()),
269+
MaybeBounce::new(initrd_file.as_fd(), secret_free),
262270
u64_to_usize(size),
263271
)?)
264272
}

src/vmm/src/persist.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -457,7 +457,7 @@ fn guest_memory_from_file(
457457
track_dirty_pages: bool,
458458
) -> Result<Vec<GuestRegionMmap>, GuestMemoryFromFileError> {
459459
let mem_file = File::open(mem_file_path)?;
460-
let guest_mem = memory::snapshot_file(mem_file, mem_state.regions(), track_dirty_pages)?;
460+
let guest_mem = memory::file_private(mem_file, mem_state.regions(), track_dirty_pages)?;
461461
Ok(guest_mem)
462462
}
463463

src/vmm/src/resources.rs

Lines changed: 38 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// SPDX-License-Identifier: Apache-2.0
33

44
use std::convert::From;
5+
use std::fs::File;
56
use std::path::PathBuf;
67
use std::sync::{Arc, Mutex, MutexGuard};
78

@@ -31,7 +32,7 @@ use crate::vmm_config::mmds::{MmdsConfig, MmdsConfigError};
3132
use crate::vmm_config::net::*;
3233
use crate::vmm_config::vsock::*;
3334
use crate::vstate::memory;
34-
use crate::vstate::memory::{GuestRegionMmap, MemoryError};
35+
use crate::vstate::memory::{GuestRegionMmap, MemoryError, create_memfd};
3536

3637
/// Errors encountered when configuring microVM resources.
3738
#[derive(Debug, thiserror::Error, displaydoc::Display)]
@@ -502,12 +503,19 @@ impl VmResources {
502503
})
503504
}
504505

506+
/// Gets the size of the guest memory, in bytes
507+
pub fn memory_size(&self) -> usize {
508+
mib_to_bytes(self.machine_config.mem_size_mib)
509+
}
510+
505511
/// Allocates guest memory in a configuration most appropriate for these [`VmResources`].
506512
///
507513
/// If vhost-user-blk devices are in use, allocates memfd-backed shared memory, otherwise
508514
/// prefers anonymous memory for performance reasons.
509-
pub fn allocate_guest_memory(&self) -> Result<Vec<GuestRegionMmap>, MemoryError> {
510-
// Page faults are more expensive for shared memory mapping, including memfd.
515+
pub fn allocate_guest_memory(
516+
&self,
517+
guest_memfd: Option<File>,
518+
) -> Result<Vec<GuestRegionMmap>, MemoryError> {
511519
// For this reason, we only back guest memory with a memfd
512520
// if a vhost-user-blk device is configured in the VM, otherwise we fall back to
513521
// an anonymous private memory.
@@ -516,20 +524,35 @@ impl VmResources {
516524
// because that would require running a backend process. If in the future we converge to
517525
// a single way of backing guest memory for vhost-user and non-vhost-user cases,
518526
// that would not be worth the effort.
519-
let regions =
520-
crate::arch::arch_memory_regions(0, mib_to_bytes(self.machine_config.mem_size_mib));
521-
if self.vhost_user_devices_used() {
522-
memory::memfd_backed(
523-
regions.as_ref(),
524-
self.machine_config.track_dirty_pages,
525-
self.machine_config.huge_pages,
526-
)
527-
} else {
528-
memory::anonymous(
529-
regions.into_iter(),
527+
let regions = crate::arch::arch_memory_regions(0, self.memory_size()).into_iter();
528+
match guest_memfd {
529+
Some(file) => memory::file_shared(
530+
file,
531+
regions,
530532
self.machine_config.track_dirty_pages,
531533
self.machine_config.huge_pages,
532-
)
534+
),
535+
None => {
536+
if self.vhost_user_devices_used() {
537+
let memfd = create_memfd(
538+
self.memory_size() as u64,
539+
self.machine_config.huge_pages.into(),
540+
)?
541+
.into_file();
542+
memory::file_shared(
543+
memfd,
544+
regions,
545+
self.machine_config.track_dirty_pages,
546+
self.machine_config.huge_pages,
547+
)
548+
} else {
549+
memory::anonymous(
550+
regions.into_iter(),
551+
self.machine_config.track_dirty_pages,
552+
self.machine_config.huge_pages,
553+
)
554+
}
555+
}
533556
}
534557
}
535558
}

src/vmm/src/vstate/memory.rs

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -259,18 +259,16 @@ pub fn create(
259259
}
260260

261261
/// Creates a GuestMemoryMmap with `size` in MiB backed by a memfd.
262-
pub fn memfd_backed(
263-
regions: &[(GuestAddress, usize)],
262+
pub fn file_shared(
263+
file: File,
264+
regions: impl Iterator<Item = (GuestAddress, usize)>,
264265
track_dirty_pages: bool,
265266
huge_pages: HugePageConfig,
266267
) -> Result<Vec<GuestRegionMmap>, MemoryError> {
267-
let size = regions.iter().map(|&(_, size)| size as u64).sum();
268-
let memfd_file = create_memfd(size, huge_pages.into())?.into_file();
269-
270268
create(
271-
regions.iter().copied(),
269+
regions,
272270
libc::MAP_SHARED | huge_pages.mmap_flags(),
273-
Some(memfd_file),
271+
Some(file),
274272
track_dirty_pages,
275273
)
276274
}
@@ -291,7 +289,7 @@ pub fn anonymous(
291289

292290
/// Creates a GuestMemoryMmap given a `file` containing the data
293291
/// and a `state` containing mapping information.
294-
pub fn snapshot_file(
292+
pub fn file_private(
295293
file: File,
296294
regions: impl Iterator<Item = (GuestAddress, usize)>,
297295
track_dirty_pages: bool,
@@ -477,7 +475,8 @@ impl GuestMemoryExtension for GuestMemoryMmap {
477475
}
478476
}
479477

480-
fn create_memfd(
478+
/// Creates a memfd of the given size and huge pages configuration
479+
pub fn create_memfd(
481480
mem_size: u64,
482481
hugetlb_size: Option<memfd::HugetlbSize>,
483482
) -> Result<memfd::Memfd, MemoryError> {
@@ -731,7 +730,7 @@ mod tests {
731730
guest_memory.dump(&mut memory_file).unwrap();
732731

733732
let restored_guest_memory = GuestMemoryMmap::from_regions(
734-
snapshot_file(memory_file, memory_state.regions(), false).unwrap(),
733+
file_private(memory_file, memory_state.regions(), false).unwrap(),
735734
)
736735
.unwrap();
737736

@@ -793,7 +792,7 @@ mod tests {
793792

794793
// We can restore from this because this is the first dirty dump.
795794
let restored_guest_memory = GuestMemoryMmap::from_regions(
796-
snapshot_file(file, memory_state.regions(), false).unwrap(),
795+
file_private(file, memory_state.regions(), false).unwrap(),
797796
)
798797
.unwrap();
799798

src/vmm/src/vstate/vm.rs

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,13 @@
88
use std::collections::HashMap;
99
use std::fs::{File, OpenOptions};
1010
use std::io::Write;
11-
use std::os::fd::FromRawFd;
11+
use std::os::fd::{AsRawFd, FromRawFd};
1212
use std::path::Path;
1313
use std::sync::Arc;
1414

1515
use kvm_bindings::{
16-
KVM_MEM_LOG_DIRTY_PAGES, kvm_create_guest_memfd, kvm_userspace_memory_region,
17-
kvm_userspace_memory_region2,
16+
KVM_MEM_GUEST_MEMFD, KVM_MEM_LOG_DIRTY_PAGES, kvm_create_guest_memfd,
17+
kvm_userspace_memory_region, kvm_userspace_memory_region2,
1818
};
1919
use kvm_ioctls::{Cap, VmFd};
2020
use vmm_sys_util::eventfd::EventFd;
@@ -31,6 +31,8 @@ use crate::vstate::memory::{
3131
use crate::vstate::vcpu::VcpuError;
3232
use crate::{DirtyBitmap, Vcpu, mem_size_mib};
3333

34+
pub(crate) const KVM_GMEM_NO_DIRECT_MAP: u64 = 1;
35+
3436
/// Architecture independent parts of a VM.
3537
#[derive(Debug)]
3638
pub struct VmCommon {
@@ -157,10 +159,6 @@ impl Vm {
157159
"guest_memfd size must be page aligned"
158160
);
159161

160-
if !self.fd().check_extension(Cap::GuestMemfd) {
161-
return Err(VmError::GuestMemfdNotSupported);
162-
}
163-
164162
let kvm_gmem = kvm_create_guest_memfd {
165163
size: size as u64,
166164
flags,
@@ -198,10 +196,22 @@ impl Vm {
198196
return Err(VmError::NotEnoughMemorySlots);
199197
}
200198

201-
let flags = if region.bitmap().is_some() {
202-
KVM_MEM_LOG_DIRTY_PAGES
199+
let mut flags = 0;
200+
if region.bitmap().is_some() {
201+
flags |= KVM_MEM_LOG_DIRTY_PAGES;
202+
}
203+
204+
#[allow(clippy::cast_sign_loss)]
205+
let (guest_memfd, guest_memfd_offset) = if self.secret_free() {
206+
flags |= KVM_MEM_GUEST_MEMFD;
207+
208+
let fo = region
209+
.file_offset()
210+
.expect("secret hidden VMs must mmap guest_memfd for memslots");
211+
212+
(fo.file().as_raw_fd() as u32, fo.start())
203213
} else {
204-
0
214+
(0, 0)
205215
};
206216

207217
let memory_region = kvm_userspace_memory_region2 {
@@ -210,6 +220,8 @@ impl Vm {
210220
memory_size: region.len(),
211221
userspace_addr: region.as_ptr() as u64,
212222
flags,
223+
guest_memfd,
224+
guest_memfd_offset,
213225
..Default::default()
214226
};
215227

@@ -223,6 +235,12 @@ impl Vm {
223235
.map_err(VmError::SetUserMemoryRegion)?;
224236
}
225237
} else {
238+
// Something is seriously wrong if we manage to set these fields on a host that doesn't
239+
// even allow creation of guest_memfds!
240+
assert_eq!(memory_region.guest_memfd, 0);
241+
assert_eq!(memory_region.guest_memfd_offset, 0);
242+
assert_eq!(memory_region.flags & KVM_MEM_GUEST_MEMFD, 0);
243+
226244
// SAFETY: We are passing a valid memory region and operate on a valid KVM FD.
227245
unsafe {
228246
self.fd()

0 commit comments

Comments
 (0)