Skip to content

Commit 56bd703

Browse files
committed
Use guest_memfd to back memory if secret freedom is enabled
If the `secret_free` field of the memory_config is set to true in the /machine-config endpoint, back all memory regions using guest_memfd. For our setup, this means both setting the guest_memfd[_offset] fields in kvm_user_memory_region2, as well as mmaping the guest memory and reflecting this VMA back into the memslot's userspace_addr (which is how kvm internal accesses to guest memory will work for these guest_memfd regions, such as mmio emulation on x86). Signed-off-by: Patrick Roy <roypat@amazon.co.uk>
1 parent b518c86 commit 56bd703

File tree

6 files changed

+96
-48
lines changed

6 files changed

+96
-48
lines changed

src/vmm/benches/memory_access.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ fn bench_single_page_fault(c: &mut Criterion, configuration: VmResources) {
99
c.bench_function("page_fault", |b| {
1010
b.iter_batched(
1111
|| {
12-
let memory = configuration.allocate_guest_memory().unwrap();
12+
let memory = configuration.allocate_guest_memory(None).unwrap();
1313
// Get a pointer to the first memory region (cannot do `.get_slice(GuestAddress(0),
1414
// 1)`, because on ARM64 guest memory does not start at physical
1515
// address 0).

src/vmm/src/builder.rs

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ use crate::vmm_config::machine_config::MachineConfigError;
6464
use crate::vstate::kvm::Kvm;
6565
use crate::vstate::memory::{GuestRegionMmap, MaybeBounce};
6666
use crate::vstate::vcpu::{Vcpu, VcpuError};
67-
use crate::vstate::vm::Vm;
67+
use crate::vstate::vm::{KVM_GMEM_NO_DIRECT_MAP, Vm};
6868
use crate::{EventManager, Vmm, VmmError, device_manager};
6969

7070
/// Errors associated with starting the instance.
@@ -222,10 +222,6 @@ pub fn build_microvm_for_boot(
222222
.as_ref()
223223
.ok_or(MissingKernelConfig)?;
224224

225-
let guest_memory = vm_resources
226-
.allocate_guest_memory()
227-
.map_err(StartMicrovmError::GuestMemory)?;
228-
229225
// Clone the command-line so that a failed boot doesn't pollute the original.
230226
#[allow(unused_mut)]
231227
let mut boot_cmdline = boot_config.cmdline.clone();
@@ -235,6 +231,8 @@ pub fn build_microvm_for_boot(
235231
.cpu_template
236232
.get_cpu_template()?;
237233

234+
let secret_free = vm_resources.machine_config.secret_free;
235+
238236
let (mut vmm, mut vcpus) = create_vmm_and_vcpus(
239237
instance_info,
240238
event_manager,
@@ -243,15 +241,25 @@ pub fn build_microvm_for_boot(
243241
vm_resources.machine_config.secret_free,
244242
)?;
245243

244+
let guest_memfd = match secret_free {
245+
true => Some(
246+
vmm.vm
247+
.create_guest_memfd(vm_resources.memory_size(), KVM_GMEM_NO_DIRECT_MAP)
248+
.map_err(VmmError::Vm)?,
249+
),
250+
false => None,
251+
};
252+
253+
let guest_memory = vm_resources
254+
.allocate_guest_memory(guest_memfd)
255+
.map_err(StartMicrovmError::GuestMemory)?;
256+
246257
vmm.vm
247258
.register_memory_regions(guest_memory)
248259
.map_err(VmmError::Vm)?;
249260

250261
let entry_point = load_kernel(
251-
MaybeBounce::new(
252-
boot_config.kernel_file.try_clone().unwrap(),
253-
vmm.vm.secret_free(),
254-
),
262+
MaybeBounce::new(boot_config.kernel_file.try_clone().unwrap(), secret_free),
255263
vmm.vm.guest_memory(),
256264
)?;
257265
let initrd = match &boot_config.initrd_file {
@@ -263,7 +271,7 @@ pub fn build_microvm_for_boot(
263271

264272
Some(InitrdConfig::from_reader(
265273
vmm.vm.guest_memory(),
266-
MaybeBounce::new(initrd_file.as_fd(), vmm.vm.secret_free()),
274+
MaybeBounce::new(initrd_file.as_fd(), secret_free),
267275
u64_to_usize(size),
268276
)?)
269277
}

src/vmm/src/persist.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -457,7 +457,7 @@ fn guest_memory_from_file(
457457
track_dirty_pages: bool,
458458
) -> Result<Vec<GuestRegionMmap>, GuestMemoryFromFileError> {
459459
let mem_file = File::open(mem_file_path)?;
460-
let guest_mem = memory::snapshot_file(mem_file, mem_state.regions(), track_dirty_pages)?;
460+
let guest_mem = memory::file_private(mem_file, mem_state.regions(), track_dirty_pages)?;
461461
Ok(guest_mem)
462462
}
463463

src/vmm/src/resources.rs

Lines changed: 38 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// SPDX-License-Identifier: Apache-2.0
33

44
use std::convert::From;
5+
use std::fs::File;
56
use std::path::PathBuf;
67
use std::sync::{Arc, Mutex, MutexGuard};
78

@@ -31,7 +32,7 @@ use crate::vmm_config::mmds::{MmdsConfig, MmdsConfigError};
3132
use crate::vmm_config::net::*;
3233
use crate::vmm_config::vsock::*;
3334
use crate::vstate::memory;
34-
use crate::vstate::memory::{GuestRegionMmap, MemoryError};
35+
use crate::vstate::memory::{GuestRegionMmap, MemoryError, create_memfd};
3536

3637
/// Errors encountered when configuring microVM resources.
3738
#[derive(Debug, thiserror::Error, displaydoc::Display)]
@@ -516,12 +517,19 @@ impl VmResources {
516517
})
517518
}
518519

520+
/// Gets the size of the guest memory, in bytes
521+
pub fn memory_size(&self) -> usize {
522+
mib_to_bytes(self.machine_config.mem_size_mib)
523+
}
524+
519525
/// Allocates guest memory in a configuration most appropriate for these [`VmResources`].
520526
///
521527
/// If vhost-user-blk devices are in use, allocates memfd-backed shared memory, otherwise
522528
/// prefers anonymous memory for performance reasons.
523-
pub fn allocate_guest_memory(&self) -> Result<Vec<GuestRegionMmap>, MemoryError> {
524-
// Page faults are more expensive for shared memory mapping, including memfd.
529+
pub fn allocate_guest_memory(
530+
&self,
531+
guest_memfd: Option<File>,
532+
) -> Result<Vec<GuestRegionMmap>, MemoryError> {
525533
// For this reason, we only back guest memory with a memfd
526534
// if a vhost-user-blk device is configured in the VM, otherwise we fall back to
527535
// an anonymous private memory.
@@ -530,20 +538,35 @@ impl VmResources {
530538
// because that would require running a backend process. If in the future we converge to
531539
// a single way of backing guest memory for vhost-user and non-vhost-user cases,
532540
// that would not be worth the effort.
533-
let regions =
534-
crate::arch::arch_memory_regions(0, mib_to_bytes(self.machine_config.mem_size_mib));
535-
if self.vhost_user_devices_used() {
536-
memory::memfd_backed(
537-
regions.as_ref(),
538-
self.machine_config.track_dirty_pages,
539-
self.machine_config.huge_pages,
540-
)
541-
} else {
542-
memory::anonymous(
543-
regions.into_iter(),
541+
let regions = crate::arch::arch_memory_regions(0, self.memory_size()).into_iter();
542+
match guest_memfd {
543+
Some(file) => memory::file_shared(
544+
file,
545+
regions,
544546
self.machine_config.track_dirty_pages,
545547
self.machine_config.huge_pages,
546-
)
548+
),
549+
None => {
550+
if self.vhost_user_devices_used() {
551+
let memfd = create_memfd(
552+
self.memory_size() as u64,
553+
self.machine_config.huge_pages.into(),
554+
)?
555+
.into_file();
556+
memory::file_shared(
557+
memfd,
558+
regions,
559+
self.machine_config.track_dirty_pages,
560+
self.machine_config.huge_pages,
561+
)
562+
} else {
563+
memory::anonymous(
564+
regions.into_iter(),
565+
self.machine_config.track_dirty_pages,
566+
self.machine_config.huge_pages,
567+
)
568+
}
569+
}
547570
}
548571
}
549572
}

src/vmm/src/vstate/memory.rs

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -259,18 +259,16 @@ pub fn create(
259259
}
260260

261261
/// Creates a GuestMemoryMmap with `size` in MiB backed by a memfd.
262-
pub fn memfd_backed(
263-
regions: &[(GuestAddress, usize)],
262+
pub fn file_shared(
263+
file: File,
264+
regions: impl Iterator<Item = (GuestAddress, usize)>,
264265
track_dirty_pages: bool,
265266
huge_pages: HugePageConfig,
266267
) -> Result<Vec<GuestRegionMmap>, MemoryError> {
267-
let size = regions.iter().map(|&(_, size)| size as u64).sum();
268-
let memfd_file = create_memfd(size, huge_pages.into())?.into_file();
269-
270268
create(
271-
regions.iter().copied(),
269+
regions,
272270
libc::MAP_SHARED | huge_pages.mmap_flags(),
273-
Some(memfd_file),
271+
Some(file),
274272
track_dirty_pages,
275273
)
276274
}
@@ -291,7 +289,7 @@ pub fn anonymous(
291289

292290
/// Creates a GuestMemoryMmap given a `file` containing the data
293291
/// and a `state` containing mapping information.
294-
pub fn snapshot_file(
292+
pub fn file_private(
295293
file: File,
296294
regions: impl Iterator<Item = (GuestAddress, usize)>,
297295
track_dirty_pages: bool,
@@ -477,7 +475,8 @@ impl GuestMemoryExtension for GuestMemoryMmap {
477475
}
478476
}
479477

480-
fn create_memfd(
478+
/// Creates a memfd of the given size and huge pages configuration
479+
pub fn create_memfd(
481480
mem_size: u64,
482481
hugetlb_size: Option<memfd::HugetlbSize>,
483482
) -> Result<memfd::Memfd, MemoryError> {
@@ -731,7 +730,7 @@ mod tests {
731730
guest_memory.dump(&mut memory_file).unwrap();
732731

733732
let restored_guest_memory = GuestMemoryMmap::from_regions(
734-
snapshot_file(memory_file, memory_state.regions(), false).unwrap(),
733+
file_private(memory_file, memory_state.regions(), false).unwrap(),
735734
)
736735
.unwrap();
737736

@@ -793,7 +792,7 @@ mod tests {
793792

794793
// We can restore from this because this is the first dirty dump.
795794
let restored_guest_memory = GuestMemoryMmap::from_regions(
796-
snapshot_file(file, memory_state.regions(), false).unwrap(),
795+
file_private(file, memory_state.regions(), false).unwrap(),
797796
)
798797
.unwrap();
799798

src/vmm/src/vstate/vm.rs

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,13 @@
88
use std::collections::HashMap;
99
use std::fs::{File, OpenOptions};
1010
use std::io::Write;
11-
use std::os::fd::FromRawFd;
11+
use std::os::fd::{AsRawFd, FromRawFd};
1212
use std::path::Path;
1313
use std::sync::Arc;
1414

1515
use kvm_bindings::{
16-
KVM_MEM_LOG_DIRTY_PAGES, kvm_create_guest_memfd, kvm_userspace_memory_region,
17-
kvm_userspace_memory_region2,
16+
KVM_MEM_GUEST_MEMFD, KVM_MEM_LOG_DIRTY_PAGES, kvm_create_guest_memfd,
17+
kvm_userspace_memory_region, kvm_userspace_memory_region2,
1818
};
1919
use kvm_ioctls::{Cap, VmFd};
2020
use vmm_sys_util::eventfd::EventFd;
@@ -31,6 +31,8 @@ use crate::vstate::memory::{
3131
use crate::vstate::vcpu::VcpuError;
3232
use crate::{DirtyBitmap, Vcpu, mem_size_mib};
3333

34+
pub(crate) const KVM_GMEM_NO_DIRECT_MAP: u64 = 1;
35+
3436
/// Architecture independent parts of a VM.
3537
#[derive(Debug)]
3638
pub struct VmCommon {
@@ -157,10 +159,6 @@ impl Vm {
157159
"guest_memfd size must be page aligned"
158160
);
159161

160-
if !self.fd().check_extension(Cap::GuestMemfd) {
161-
return Err(VmError::GuestMemfdNotSupported);
162-
}
163-
164162
let kvm_gmem = kvm_create_guest_memfd {
165163
size: size as u64,
166164
flags,
@@ -198,10 +196,22 @@ impl Vm {
198196
return Err(VmError::NotEnoughMemorySlots);
199197
}
200198

201-
let flags = if region.bitmap().is_some() {
202-
KVM_MEM_LOG_DIRTY_PAGES
199+
let mut flags = 0;
200+
if region.bitmap().is_some() {
201+
flags |= KVM_MEM_LOG_DIRTY_PAGES;
202+
}
203+
204+
#[allow(clippy::cast_sign_loss)]
205+
let (guest_memfd, guest_memfd_offset) = if self.secret_free() {
206+
flags |= KVM_MEM_GUEST_MEMFD;
207+
208+
let fo = region
209+
.file_offset()
210+
.expect("secret hidden VMs must mmap guest_memfd for memslots");
211+
212+
(fo.file().as_raw_fd() as u32, fo.start())
203213
} else {
204-
0
214+
(0, 0)
205215
};
206216

207217
let memory_region = kvm_userspace_memory_region2 {
@@ -210,6 +220,8 @@ impl Vm {
210220
memory_size: region.len(),
211221
userspace_addr: region.as_ptr() as u64,
212222
flags,
223+
guest_memfd,
224+
guest_memfd_offset,
213225
..Default::default()
214226
};
215227

@@ -223,6 +235,12 @@ impl Vm {
223235
.map_err(VmError::SetUserMemoryRegion)?;
224236
}
225237
} else {
238+
// Something is seriously wrong if we manage to set these fields on a host that doesn't
239+
// even allow creation of guest_memfds!
240+
assert_eq!(memory_region.guest_memfd, 0);
241+
assert_eq!(memory_region.guest_memfd_offset, 0);
242+
assert_eq!(memory_region.flags & KVM_MEM_GUEST_MEMFD, 0);
243+
226244
// SAFETY: We are passing a valid memory region and operate on a valid KVM FD.
227245
unsafe {
228246
self.fd()

0 commit comments

Comments
 (0)