From da6f96087ee6a03fde565c93375e367f41a67428 Mon Sep 17 00:00:00 2001 From: Colin Percival Date: Sun, 2 Oct 2022 10:42:24 -0700 Subject: [PATCH 1/7] pvh/arch: Introduce EntryPoint struct In order to properly configure the initial vCPU register state and boot parameters in guest memory, we must specify which boot protocol to use with the kernel entry point address. On x86-64 (the only architecture where multiple boot protocols are supported) we print the protocol used to load the kernel at the debug log level. Create an EntryPoint struct that contains the required information. This structure will later be used in the vCPU configuration methods to set the appropriate initial conditions for the guest. This commit also splits the load_kernel function into an x86-64 specific version and an aarch64 specific version. Signed-off-by: Colin Percival Co-authored-by: Alejandro Jimenez --- src/vmm/src/arch/mod.rs | 31 +++++++++++++++++++++++++++ src/vmm/src/builder.rs | 47 +++++++++++++++++++++++++++++++++++------ 2 files changed, 71 insertions(+), 7 deletions(-) diff --git a/src/vmm/src/arch/mod.rs b/src/vmm/src/arch/mod.rs index 83b373af445..c574dda19a8 100644 --- a/src/vmm/src/arch/mod.rs +++ b/src/vmm/src/arch/mod.rs @@ -60,3 +60,34 @@ impl fmt::Display for DeviceType { write!(f, "{:?}", self) } } + +/// Suported boot protocols for +#[derive(Debug, Copy, Clone, PartialEq)] +pub enum BootProtocol { + /// Linux 64-bit boot protocol + LinuxBoot, + #[cfg(target_arch = "x86_64")] + /// PVH boot protocol (x86/HVM direct boot ABI) + PvhBoot, +} + +impl fmt::Display for BootProtocol { + fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result { + match self { + BootProtocol::LinuxBoot => write!(f, "Linux 64-bit boot protocol"), + #[cfg(target_arch = "x86_64")] + BootProtocol::PvhBoot => write!(f, "PVH boot protocol"), + } + } +} + +#[derive(Debug, Copy, Clone)] +/// Specifies the entry point address where the guest must start +/// executing code, as well as which boot protocol is to be used +/// to configure the guest initial state. +pub struct EntryPoint { + /// Address in guest memory where the guest must start execution + pub entry_addr: utils::vm_memory::GuestAddress, + /// Specifies which boot protocol to use + pub protocol: BootProtocol, +} diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 5e63d95e004..ad181697750 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -14,10 +14,15 @@ use libc::EFD_NONBLOCK; use linux_loader::cmdline::Cmdline as LoaderKernelCmdline; #[cfg(target_arch = "x86_64")] use linux_loader::loader::elf::Elf as Loader; +#[cfg(target_arch = "x86_64")] +use linux_loader::loader::elf::PvhBootCapability; #[cfg(target_arch = "aarch64")] use linux_loader::loader::pe::PE as Loader; use linux_loader::loader::KernelLoader; +#[cfg(target_arch = "aarch64")] use log::error; +#[cfg(target_arch = "x86_64")] +use log::{debug, error}; use seccompiler::BpfThreadMap; use snapshot::Persist; use userfaultfd::Uffd; @@ -28,7 +33,7 @@ use utils::vm_memory::{GuestAddress, GuestMemory, GuestMemoryMmap, ReadVolatile} use vm_superio::Rtc; use vm_superio::Serial; -use crate::arch::InitrdConfig; +use crate::arch::{BootProtocol, EntryPoint, InitrdConfig}; #[cfg(target_arch = "aarch64")] use crate::construct_kvm_mpidrs; use crate::cpu_config::templates::{ @@ -256,7 +261,7 @@ pub fn build_microvm_for_boot( let track_dirty_pages = vm_resources.track_dirty_pages(); let guest_memory = create_guest_memory(vm_resources.vm_config.mem_size_mib, track_dirty_pages)?; - let entry_addr = load_kernel(boot_config, &guest_memory)?; + let entry_point = load_kernel(boot_config, &guest_memory)?; let initrd = load_initrd_from_config(boot_config, &guest_memory)?; // Clone the command-line so that a failed boot doesn't pollute the original. #[allow(unused_mut)] @@ -310,7 +315,7 @@ pub fn build_microvm_for_boot( &vmm, vcpus.as_mut(), &vm_resources.vm_config, - entry_addr, + entry_point.entry_addr, &initrd, boot_cmdline, )?; @@ -544,16 +549,16 @@ pub fn create_guest_memory( .map_err(StartMicrovmError::GuestMemoryMmap) } +#[cfg(target_arch = "x86_64")] fn load_kernel( boot_config: &BootConfig, guest_memory: &GuestMemoryMmap, -) -> Result { +) -> Result { let mut kernel_file = boot_config .kernel_file .try_clone() .map_err(|err| StartMicrovmError::Internal(VmmError::KernelFile(err)))?; - #[cfg(target_arch = "x86_64")] let entry_addr = Loader::load::( guest_memory, None, @@ -562,7 +567,32 @@ fn load_kernel( ) .map_err(StartMicrovmError::KernelLoader)?; - #[cfg(target_arch = "aarch64")] + let mut entry_point_addr: GuestAddress = entry_addr.kernel_load; + let mut boot_prot: BootProtocol = BootProtocol::LinuxBoot; + if let PvhBootCapability::PvhEntryPresent(pvh_entry_addr) = entry_addr.pvh_boot_cap { + // Use the PVH kernel entry point to boot the guest + entry_point_addr = pvh_entry_addr; + boot_prot = BootProtocol::PvhBoot; + } + + debug!("Kernel loaded using {boot_prot}"); + + Ok(EntryPoint { + entry_addr: entry_point_addr, + protocol: boot_prot, + }) +} + +#[cfg(target_arch = "aarch64")] +fn load_kernel( + boot_config: &BootConfig, + guest_memory: &GuestMemoryMmap, +) -> Result { + let mut kernel_file = boot_config + .kernel_file + .try_clone() + .map_err(|err| StartMicrovmError::Internal(VmmError::KernelFile(err)))?; + let entry_addr = Loader::load::( guest_memory, Some(GuestAddress(crate::arch::get_kernel_start())), @@ -571,7 +601,10 @@ fn load_kernel( ) .map_err(StartMicrovmError::KernelLoader)?; - Ok(entry_addr.kernel_load) + Ok(EntryPoint { + entry_addr: entry_addr.kernel_load, + protocol: BootProtocol::LinuxBoot, + }) } fn load_initrd_from_config( From fac3e99e60c12214c7eb316168c55f34d875cce3 Mon Sep 17 00:00:00 2001 From: Colin Percival Date: Sun, 2 Oct 2022 10:42:38 -0700 Subject: [PATCH 2/7] pvh/arch-x86_64: Initialize vCPU regs for PVH Set the initial values of the KVM vCPU registers as specified in the PVH boot ABI: https://xenbits.xen.org/docs/unstable/misc/pvh.html Add stub bits for aarch64; PVH mode does not exist there. Signed-off-by: Colin Percival Co-authored-by: Alejandro Jimenez --- src/vmm/src/arch/x86_64/gdt.rs | 35 +++++- src/vmm/src/arch/x86_64/layout.rs | 3 + src/vmm/src/arch/x86_64/regs.rs | 174 +++++++++++++++++++++-------- src/vmm/src/builder.rs | 6 +- src/vmm/src/vstate/vcpu/aarch64.rs | 23 ++-- src/vmm/src/vstate/vcpu/mod.rs | 10 +- src/vmm/src/vstate/vcpu/x86_64.rs | 51 +++++++-- 7 files changed, 228 insertions(+), 74 deletions(-) diff --git a/src/vmm/src/arch/x86_64/gdt.rs b/src/vmm/src/arch/x86_64/gdt.rs index c7fcbf31bf0..b34ca8c3d64 100644 --- a/src/vmm/src/arch/x86_64/gdt.rs +++ b/src/vmm/src/arch/x86_64/gdt.rs @@ -1,3 +1,5 @@ +// Copyright © 2020, Oracle and/or its affiliates. +// // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // @@ -24,8 +26,37 @@ fn get_base(entry: u64) -> u64 { | (((entry) & 0x0000_0000_FFFF_0000) >> 16) } +// Extract the segment limit from the GDT segment descriptor. +// +// In a segment descriptor, the limit field is 20 bits, so it can directly describe +// a range from 0 to 0xFFFFF (1 MB). When G flag is set (4-KByte page granularity) it +// scales the value in the limit field by a factor of 2^12 (4 Kbytes), making the effective +// limit range from 0xFFF (4 KBytes) to 0xFFFF_FFFF (4 GBytes). +// +// However, the limit field in the VMCS definition is a 32 bit field, and the limit value is not +// automatically scaled using the G flag. This means that for a desired range of 4GB for a +// given segment, its limit must be specified as 0xFFFF_FFFF. Therefore the method of obtaining +// the limit from the GDT entry is not sufficient, since it only provides 20 bits when 32 bits +// are necessary. Fortunately, we can check if the G flag is set when extracting the limit since +// the full GDT entry is passed as an argument, and perform the scaling of the limit value to +// return the full 32 bit value. +// +// The scaling mentioned above is required when using PVH boot, since the guest boots in protected +// (32-bit) mode and must be able to access the entire 32-bit address space. It does not cause +// issues for the case of direct boot to 64-bit (long) mode, since in 64-bit mode the processor does +// not perform runtime limit checking on code or data segments. +// +// (For more information concerning the formats of segment descriptors, VMCS fields, et cetera, +// please consult the Intel Software Developer Manual.) fn get_limit(entry: u64) -> u32 { - ((((entry) & 0x000F_0000_0000_0000) >> 32) | ((entry) & 0x0000_0000_0000_FFFF)) as u32 + let limit: u32 = + ((((entry) & 0x000F_0000_0000_0000) >> 32) | ((entry) & 0x0000_0000_0000_FFFF)) as u32; + + // Perform manual limit scaling if G flag is set + match get_g(entry) { + 0 => limit, + _ => (limit << 12) | 0xFFF, // G flag is either 0 or 1 + } } fn get_g(entry: u64) -> u8 { @@ -109,7 +140,7 @@ mod tests { assert_eq!(0xB, seg.type_); // base and limit assert_eq!(0x10_0000, seg.base); - assert_eq!(0xfffff, seg.limit); + assert_eq!(0xffff_ffff, seg.limit); assert_eq!(0x0, seg.unusable); } } diff --git a/src/vmm/src/arch/x86_64/layout.rs b/src/vmm/src/arch/x86_64/layout.rs index 38776c1e7ed..936458d54d4 100644 --- a/src/vmm/src/arch/x86_64/layout.rs +++ b/src/vmm/src/arch/x86_64/layout.rs @@ -27,5 +27,8 @@ pub const IRQ_MAX: u32 = 23; /// Address for the TSS setup. pub const KVM_TSS_ADDRESS: u64 = 0xfffb_d000; +/// Address of the hvm_start_info struct used in PVH boot +pub const PVH_INFO_START: u64 = 0x6000; + /// The 'zero page', a.k.a linux kernel bootparams. pub const ZERO_PAGE_START: u64 = 0x7000; diff --git a/src/vmm/src/arch/x86_64/regs.rs b/src/vmm/src/arch/x86_64/regs.rs index 41d37389ef7..2919cd55619 100644 --- a/src/vmm/src/arch/x86_64/regs.rs +++ b/src/vmm/src/arch/x86_64/regs.rs @@ -1,3 +1,4 @@ +// Copyright © 2020, Oracle and/or its affiliates. // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // @@ -11,6 +12,7 @@ use kvm_bindings::{kvm_fpu, kvm_regs, kvm_sregs}; use kvm_ioctls::VcpuFd; use utils::vm_memory::{Address, Bytes, GuestAddress, GuestMemory, GuestMemoryMmap}; +use super::super::{BootProtocol, EntryPoint}; use super::gdt::{gdt_entry, kvm_segment_from_gdt}; // Initial pagetables. @@ -89,20 +91,30 @@ pub struct SetupRegistersError(utils::errno::Error); /// # Errors /// /// When [`kvm_ioctls::ioctls::vcpu::VcpuFd::set_regs`] errors. -pub fn setup_regs(vcpu: &VcpuFd, boot_ip: u64) -> Result<(), SetupRegistersError> { - let regs: kvm_regs = kvm_regs { - rflags: 0x0000_0000_0000_0002u64, - rip: boot_ip, - // Frame pointer. It gets a snapshot of the stack pointer (rsp) so that when adjustments are - // made to rsp (i.e. reserving space for local variables or pushing values on to the stack), - // local variables and function parameters are still accessible from a constant offset from - // rbp. - rsp: super::layout::BOOT_STACK_POINTER, - // Starting stack pointer. - rbp: super::layout::BOOT_STACK_POINTER, - // Must point to zero page address per Linux ABI. This is x86_64 specific. - rsi: super::layout::ZERO_PAGE_START, - ..Default::default() +pub fn setup_regs(vcpu: &VcpuFd, entry_point: EntryPoint) -> Result<(), SetupRegistersError> { + let regs: kvm_regs = match entry_point.protocol { + BootProtocol::PvhBoot => kvm_regs { + // Configure regs as required by PVH boot protocol. + rflags: 0x0000_0000_0000_0002u64, + rbx: super::layout::PVH_INFO_START, + rip: entry_point.entry_addr.raw_value(), + ..Default::default() + }, + BootProtocol::LinuxBoot => kvm_regs { + // Configure regs as required by Linux 64-bit boot protocol. + rflags: 0x0000_0000_0000_0002u64, + rip: entry_point.entry_addr.raw_value(), + // Frame pointer. It gets a snapshot of the stack pointer (rsp) so that when adjustments + // are made to rsp (i.e. reserving space for local variables or pushing + // values on to the stack), local variables and function parameters are + // still accessible from a constant offset from rbp. + rsp: super::layout::BOOT_STACK_POINTER, + // Starting stack pointer. + rbp: super::layout::BOOT_STACK_POINTER, + // Must point to zero page address per Linux ABI. This is x86_64 specific. + rsi: super::layout::ZERO_PAGE_START, + ..Default::default() + }, }; vcpu.set_regs(®s).map_err(SetupRegistersError) @@ -131,6 +143,7 @@ pub enum SetupSpecialRegistersError { /// /// * `mem` - The memory that will be passed to the guest. /// * `vcpu` - Structure for the VCPU that holds the VCPU's fd. +/// * `boot_prot` - The boot protocol being used. /// /// # Errors /// @@ -139,14 +152,21 @@ pub enum SetupSpecialRegistersError { /// - [`configure_segments_and_sregs`] errors. /// - [`setup_page_tables`] errors /// - [`kvm_ioctls::ioctls::vcpu::VcpuFd::set_sregs`] errors. -pub fn setup_sregs(mem: &GuestMemoryMmap, vcpu: &VcpuFd) -> Result<(), SetupSpecialRegistersError> { +pub fn setup_sregs( + mem: &GuestMemoryMmap, + vcpu: &VcpuFd, + boot_prot: BootProtocol, +) -> Result<(), SetupSpecialRegistersError> { let mut sregs: kvm_sregs = vcpu .get_sregs() .map_err(SetupSpecialRegistersError::GetSpecialRegisters)?; - configure_segments_and_sregs(mem, &mut sregs) + configure_segments_and_sregs(mem, &mut sregs, boot_prot) .map_err(SetupSpecialRegistersError::ConfigureSegmentsAndSpecialRegisters)?; - setup_page_tables(mem, &mut sregs).map_err(SetupSpecialRegistersError::SetupPageTables)?; // TODO(dgreid) - Can this be done once per system instead? + if let BootProtocol::LinuxBoot = boot_prot { + setup_page_tables(mem, &mut sregs).map_err(SetupSpecialRegistersError::SetupPageTables)?; + // TODO(dgreid) - Can this be done once per system instead? + } vcpu.set_sregs(&sregs) .map_err(SetupSpecialRegistersError::SetSpecialRegisters) @@ -161,6 +181,7 @@ const EFER_LMA: u64 = 0x400; const EFER_LME: u64 = 0x100; const X86_CR0_PE: u64 = 0x1; +const X86_CR0_ET: u64 = 0x10; const X86_CR0_PG: u64 = 0x8000_0000; const X86_CR4_PAE: u64 = 0x20; @@ -187,13 +208,28 @@ fn write_idt_value(val: u64, guest_mem: &GuestMemoryMmap) -> Result<(), RegsErro fn configure_segments_and_sregs( mem: &GuestMemoryMmap, sregs: &mut kvm_sregs, + boot_prot: BootProtocol, ) -> Result<(), RegsError> { - let gdt_table: [u64; BOOT_GDT_MAX] = [ - gdt_entry(0, 0, 0), // NULL - gdt_entry(0xa09b, 0, 0xfffff), // CODE - gdt_entry(0xc093, 0, 0xfffff), // DATA - gdt_entry(0x808b, 0, 0xfffff), // TSS - ]; + let gdt_table: [u64; BOOT_GDT_MAX] = match boot_prot { + BootProtocol::PvhBoot => { + // Configure GDT entries as specified by PVH boot protocol + [ + gdt_entry(0, 0, 0), // NULL + gdt_entry(0xc09b, 0, 0xffff_ffff), // CODE + gdt_entry(0xc093, 0, 0xffff_ffff), // DATA + gdt_entry(0x008b, 0, 0x67), // TSS + ] + } + BootProtocol::LinuxBoot => { + // Configure GDT entries as specified by Linux 64bit boot protocol + [ + gdt_entry(0, 0, 0), // NULL + gdt_entry(0xa09b, 0, 0xfffff), // CODE + gdt_entry(0xc093, 0, 0xfffff), // DATA + gdt_entry(0x808b, 0, 0xfffff), // TSS + ] + } + }; let code_seg = kvm_segment_from_gdt(gdt_table[1], 1); let data_seg = kvm_segment_from_gdt(gdt_table[2], 2); @@ -216,9 +252,17 @@ fn configure_segments_and_sregs( sregs.ss = data_seg; sregs.tr = tss_seg; - // 64-bit protected mode - sregs.cr0 |= X86_CR0_PE; - sregs.efer |= EFER_LME | EFER_LMA; + match boot_prot { + BootProtocol::PvhBoot => { + sregs.cr0 = X86_CR0_PE | X86_CR0_ET; + sregs.cr4 = 0; + } + BootProtocol::LinuxBoot => { + // 64-bit protected mode + sregs.cr0 |= X86_CR0_PE; + sregs.efer |= EFER_LME | EFER_LMA; + } + } Ok(()) } @@ -279,24 +323,45 @@ mod tests { gm.read_obj(read_addr).unwrap() } - fn validate_segments_and_sregs(gm: &GuestMemoryMmap, sregs: &kvm_sregs) { + fn validate_segments_and_sregs( + gm: &GuestMemoryMmap, + sregs: &kvm_sregs, + boot_prot: BootProtocol, + ) { + if let BootProtocol::LinuxBoot = boot_prot { + assert_eq!(0xaf_9b00_0000_ffff, read_u64(gm, BOOT_GDT_OFFSET + 8)); + assert_eq!(0xcf_9300_0000_ffff, read_u64(gm, BOOT_GDT_OFFSET + 16)); + assert_eq!(0x8f_8b00_0000_ffff, read_u64(gm, BOOT_GDT_OFFSET + 24)); + + assert_eq!(0xffff_ffff, sregs.tr.limit); + + assert!(sregs.cr0 & X86_CR0_PE != 0); + assert!(sregs.efer & EFER_LME != 0 && sregs.efer & EFER_LMA != 0); + } else { + // Validate values that are specific to PVH boot protocol + assert_eq!(0xcf_9b00_0000_ffff, read_u64(gm, BOOT_GDT_OFFSET + 8)); + assert_eq!(0xcf_9300_0000_ffff, read_u64(gm, BOOT_GDT_OFFSET + 16)); + assert_eq!(0x00_8b00_0000_0067, read_u64(gm, BOOT_GDT_OFFSET + 24)); + + assert_eq!(0x67, sregs.tr.limit); + assert_eq!(0, sregs.tr.g); + + assert!(sregs.cr0 & X86_CR0_PE != 0 && sregs.cr0 & X86_CR0_ET != 0); + assert_eq!(0, sregs.cr4); + } + + // Common settings for both PVH and Linux boot protocol assert_eq!(0x0, read_u64(gm, BOOT_GDT_OFFSET)); - assert_eq!(0xaf_9b00_0000_ffff, read_u64(gm, BOOT_GDT_OFFSET + 8)); - assert_eq!(0xcf_9300_0000_ffff, read_u64(gm, BOOT_GDT_OFFSET + 16)); - assert_eq!(0x8f_8b00_0000_ffff, read_u64(gm, BOOT_GDT_OFFSET + 24)); assert_eq!(0x0, read_u64(gm, BOOT_IDT_OFFSET)); assert_eq!(0, sregs.cs.base); - assert_eq!(0xfffff, sregs.ds.limit); + assert_eq!(0xffff_ffff, sregs.ds.limit); assert_eq!(0x10, sregs.es.selector); assert_eq!(1, sregs.fs.present); assert_eq!(1, sregs.gs.g); assert_eq!(0, sregs.ss.avl); assert_eq!(0, sregs.tr.base); - assert_eq!(0xfffff, sregs.tr.limit); assert_eq!(0, sregs.tr.avl); - assert!(sregs.cr0 & X86_CR0_PE != 0); - assert!(sregs.efer & EFER_LME != 0 && sregs.efer & EFER_LMA != 0); } fn validate_page_tables(gm: &GuestMemoryMmap, sregs: &kvm_sregs) { @@ -348,7 +413,12 @@ mod tests { ..Default::default() }; - setup_regs(&vcpu, expected_regs.rip).unwrap(); + let entry_point: EntryPoint = EntryPoint { + entry_addr: GuestAddress(expected_regs.rip), + protocol: BootProtocol::LinuxBoot, + }; + + setup_regs(&vcpu, entry_point).unwrap(); let actual_regs: kvm_regs = vcpu.get_regs().unwrap(); assert_eq!(actual_regs, expected_regs); @@ -361,16 +431,22 @@ mod tests { let vcpu = vm.create_vcpu(0).unwrap(); let gm = create_guest_mem(None); - assert!(vcpu.set_sregs(&Default::default()).is_ok()); - setup_sregs(&gm, &vcpu).unwrap(); - - let mut sregs: kvm_sregs = vcpu.get_sregs().unwrap(); - // for AMD KVM_GET_SREGS returns g = 0 for each kvm_segment. - // We set it to 1, otherwise the test will fail. - sregs.gs.g = 1; - - validate_segments_and_sregs(&gm, &sregs); - validate_page_tables(&gm, &sregs); + [BootProtocol::LinuxBoot, BootProtocol::PvhBoot] + .iter() + .for_each(|boot_prot| { + assert!(vcpu.set_sregs(&Default::default()).is_ok()); + setup_sregs(&gm, &vcpu, *boot_prot).unwrap(); + + let mut sregs: kvm_sregs = vcpu.get_sregs().unwrap(); + // for AMD KVM_GET_SREGS returns g = 0 for each kvm_segment. + // We set it to 1, otherwise the test will fail. + sregs.gs.g = 1; + + validate_segments_and_sregs(&gm, &sregs, *boot_prot); + if let BootProtocol::LinuxBoot = *boot_prot { + validate_page_tables(&gm, &sregs); + } + }); } #[test] @@ -415,9 +491,13 @@ mod tests { fn test_configure_segments_and_sregs() { let mut sregs: kvm_sregs = Default::default(); let gm = create_guest_mem(None); - configure_segments_and_sregs(&gm, &mut sregs).unwrap(); + configure_segments_and_sregs(&gm, &mut sregs, BootProtocol::LinuxBoot).unwrap(); + + validate_segments_and_sregs(&gm, &sregs, BootProtocol::LinuxBoot); + + configure_segments_and_sregs(&gm, &mut sregs, BootProtocol::PvhBoot).unwrap(); - validate_segments_and_sregs(&gm, &sregs); + validate_segments_and_sregs(&gm, &sregs, BootProtocol::PvhBoot); } #[test] diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index ad181697750..378c4f2ca70 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -315,7 +315,7 @@ pub fn build_microvm_for_boot( &vmm, vcpus.as_mut(), &vm_resources.vm_config, - entry_point.entry_addr, + entry_point, &initrd, boot_cmdline, )?; @@ -776,7 +776,7 @@ pub fn configure_system_for_boot( vmm: &Vmm, vcpus: &mut [Vcpu], vm_config: &VmConfig, - entry_addr: GuestAddress, + entry_point: EntryPoint, initrd: &Option, boot_cmdline: LoaderKernelCmdline, ) -> Result<(), StartMicrovmError> { @@ -820,7 +820,7 @@ pub fn configure_system_for_boot( // Configure vCPUs with normalizing and setting the generated CPU configuration. for vcpu in vcpus.iter_mut() { vcpu.kvm_vcpu - .configure(vmm.guest_memory(), entry_addr, &vcpu_config) + .configure(vmm.guest_memory(), entry_point, &vcpu_config) .map_err(VmmError::VcpuConfigure) .map_err(Internal)?; } diff --git a/src/vmm/src/vstate/vcpu/aarch64.rs b/src/vmm/src/vstate/vcpu/aarch64.rs index c4c3257e2c2..5fbeb60acbe 100644 --- a/src/vmm/src/vstate/vcpu/aarch64.rs +++ b/src/vmm/src/vstate/vcpu/aarch64.rs @@ -7,7 +7,7 @@ use kvm_ioctls::*; use logger::{error, IncMetric, METRICS}; -use utils::vm_memory::{Address, GuestAddress, GuestMemoryMmap}; +use utils::vm_memory::{Address, GuestMemoryMmap}; use versionize::{VersionMap, Versionize, VersionizeError, VersionizeResult}; use versionize_derive::Versionize; @@ -18,6 +18,7 @@ use crate::arch::aarch64::vcpu::{ get_all_registers, get_all_registers_ids, get_mpidr, get_mpstate, get_registers, set_mpstate, set_registers, setup_boot_regs, VcpuError as ArchError, }; +use crate::arch::EntryPoint; use crate::cpu_config::templates::CpuConfiguration; use crate::vcpu::{VcpuConfig, VcpuError}; use crate::vstate::vcpu::VcpuEmulation; @@ -86,12 +87,13 @@ impl KvmVcpu { /// # Arguments /// /// * `guest_mem` - The guest memory used by this microvm. - /// * `kernel_load_addr` - Offset from `guest_mem` at which the kernel is loaded. + /// * `kernel_entry_point` - Specifies the boot protocol and offset from `guest_mem` at which + /// the kernel starts. /// * `vcpu_config` - The vCPU configuration. pub fn configure( &mut self, guest_mem: &GuestMemoryMmap, - kernel_load_addr: GuestAddress, + kernel_entry_point: EntryPoint, vcpu_config: &VcpuConfig, ) -> Result<(), KvmVcpuError> { for reg in vcpu_config.cpu_config.regs.iter() { @@ -103,7 +105,7 @@ impl KvmVcpu { setup_boot_regs( &self.fd, self.index, - kernel_load_addr.raw_value(), + kernel_entry_point.entry_addr.raw_value(), guest_mem, ) .map_err(KvmVcpuError::ConfigureRegisters)?; @@ -228,10 +230,11 @@ mod tests { use std::os::unix::io::AsRawFd; use kvm_bindings::KVM_REG_SIZE_U64; - use utils::vm_memory::GuestMemoryMmap; + use utils::vm_memory::{GuestAddress, GuestMemoryMmap}; use super::*; use crate::arch::aarch64::regs::Aarch64RegisterRef; + use crate::arch::BootProtocol; use crate::cpu_config::aarch64::CpuConfiguration; use crate::vcpu::VcpuConfig; use crate::vstate::vm::tests::setup_vm; @@ -278,7 +281,10 @@ mod tests { assert!(vcpu .configure( &vm_mem, - GuestAddress(crate::arch::get_kernel_start()), + EntryPoint { + entry_addr: GuestAddress(crate::arch::get_kernel_start()), + protocol: BootProtocol::LinuxBoot, + }, &vcpu_config, ) .is_ok()); @@ -287,7 +293,10 @@ mod tests { let err = vcpu.configure( &vm_mem, - GuestAddress(crate::arch::get_kernel_start()), + EntryPoint { + entry_addr: GuestAddress(crate::arch::get_kernel_start()), + protocol: BootProtocol::LinuxBoot, + }, &vcpu_config, ); assert!(err.is_err()); diff --git a/src/vmm/src/vstate/vcpu/mod.rs b/src/vmm/src/vstate/vcpu/mod.rs index e0b3ae94c81..87d7caad3db 100644 --- a/src/vmm/src/vstate/vcpu/mod.rs +++ b/src/vmm/src/vstate/vcpu/mod.rs @@ -721,6 +721,7 @@ pub mod tests { use utils::vm_memory::{GuestAddress, GuestMemoryMmap}; use super::*; + use crate::arch::{BootProtocol, EntryPoint}; use crate::builder::StartMicrovmError; use crate::devices::bus::DummyDevice; use crate::devices::BusDevice; @@ -945,7 +946,10 @@ pub mod tests { let vcpu_exit_evt = vcpu.exit_evt.try_clone().unwrap(); // Needs a kernel since we'll actually run this vcpu. - let entry_addr = load_good_kernel(&vm_mem); + let entry_point = EntryPoint { + entry_addr: load_good_kernel(&vm_mem), + protocol: BootProtocol::LinuxBoot, + }; #[cfg(target_arch = "x86_64")] { @@ -953,7 +957,7 @@ pub mod tests { vcpu.kvm_vcpu .configure( &vm_mem, - entry_addr, + entry_point, &VcpuConfig { vcpu_count: 1, smt: false, @@ -970,7 +974,7 @@ pub mod tests { vcpu.kvm_vcpu .configure( &vm_mem, - entry_addr, + entry_point, &VcpuConfig { vcpu_count: 1, smt: false, diff --git a/src/vmm/src/vstate/vcpu/x86_64.rs b/src/vmm/src/vstate/vcpu/x86_64.rs index cb9e3ba351c..041ec27de23 100644 --- a/src/vmm/src/vstate/vcpu/x86_64.rs +++ b/src/vmm/src/vstate/vcpu/x86_64.rs @@ -14,13 +14,14 @@ use kvm_bindings::{ use kvm_ioctls::{VcpuExit, VcpuFd}; use log::{error, warn}; use logger::{IncMetric, METRICS}; -use utils::vm_memory::{Address, GuestAddress, GuestMemoryMmap}; +use utils::vm_memory::GuestMemoryMmap; use versionize::{VersionMap, Versionize, VersionizeError, VersionizeResult}; use versionize_derive::Versionize; use crate::arch::x86_64::interrupts; use crate::arch::x86_64::msr::{create_boot_msr_entries, MsrError}; use crate::arch::x86_64::regs::{SetupFpuError, SetupRegistersError, SetupSpecialRegistersError}; +use crate::arch::EntryPoint; use crate::cpu_config::x86_64::{cpuid, CpuConfiguration}; use crate::vstate::vcpu::{VcpuConfig, VcpuEmulation}; use crate::vstate::vm::Vm; @@ -206,13 +207,14 @@ impl KvmVcpu { /// # Arguments /// /// * `guest_mem` - The guest memory used by this microvm. - /// * `kernel_start_addr` - Offset from `guest_mem` at which the kernel starts. + /// * `kernel_entry_point` - Specifies the boot protocol and offset from `guest_mem` at which + /// the kernel starts. /// * `vcpu_config` - The vCPU configuration. /// * `cpuid` - The capabilities exposed by this vCPU. pub fn configure( &mut self, guest_mem: &GuestMemoryMmap, - kernel_start_addr: GuestAddress, + kernel_entry_point: EntryPoint, vcpu_config: &VcpuConfig, ) -> Result<(), KvmVcpuConfigureError> { let mut cpuid = vcpu_config.cpu_config.cpuid.clone(); @@ -272,11 +274,10 @@ impl KvmVcpu { .collect::>(); crate::arch::x86_64::msr::set_msrs(&self.fd, &kvm_msrs)?; - crate::arch::x86_64::regs::setup_regs(&self.fd, kernel_start_addr.raw_value())?; + crate::arch::x86_64::regs::setup_regs(&self.fd, kernel_entry_point)?; crate::arch::x86_64::regs::setup_fpu(&self.fd)?; - crate::arch::x86_64::regs::setup_sregs(guest_mem, &self.fd)?; + crate::arch::x86_64::regs::setup_sregs(guest_mem, &self.fd, kernel_entry_point.protocol)?; crate::arch::x86_64::interrupts::set_lint(&self.fd)?; - Ok(()) } @@ -655,9 +656,11 @@ mod tests { use std::os::unix::io::AsRawFd; use kvm_ioctls::Cap; + use utils::vm_memory::GuestAddress; use super::*; use crate::arch::x86_64::cpu_model::CpuModel; + use crate::arch::BootProtocol; use crate::cpu_config::templates::{ CpuConfiguration, CpuTemplateType, CustomCpuTemplate, GetCpuTemplate, GuestConfigError, StaticCpuTemplate, @@ -728,7 +731,14 @@ mod tests { let vcpu_config = create_vcpu_config(&vm, &vcpu, &CustomCpuTemplate::default()).unwrap(); assert_eq!( - vcpu.configure(&vm_mem, GuestAddress(0), &vcpu_config,), + vcpu.configure( + &vm_mem, + EntryPoint { + entry_addr: GuestAddress(0), + protocol: BootProtocol::LinuxBoot, + }, + &vcpu_config, + ), Ok(()) ); @@ -740,7 +750,10 @@ mod tests { Ok(config) => vcpu .configure( &vm_mem, - GuestAddress(crate::arch::get_kernel_start()), + EntryPoint { + entry_addr: GuestAddress(crate::arch::get_kernel_start()), + protocol: BootProtocol::LinuxBoot, + }, &config, ) .is_ok(), @@ -843,8 +856,15 @@ mod tests { msrs: HashMap::new(), }, }; - vcpu.configure(&vm_mem, GuestAddress(0), &vcpu_config) - .unwrap(); + vcpu.configure( + &vm_mem, + EntryPoint { + entry_addr: GuestAddress(0), + protocol: BootProtocol::LinuxBoot, + }, + &vcpu_config, + ) + .unwrap(); // Invalid entries filled with 0 should not exist. let cpuid = vcpu.get_cpuid().unwrap(); @@ -905,8 +925,15 @@ mod tests { msrs: HashMap::new(), }, }; - vcpu.configure(&vm_mem, GuestAddress(0), &vcpu_config) - .unwrap(); + vcpu.configure( + &vm_mem, + EntryPoint { + entry_addr: GuestAddress(0), + protocol: BootProtocol::LinuxBoot, + }, + &vcpu_config, + ) + .unwrap(); assert!(vcpu.dump_cpu_config().is_ok()); } From 4caaaa10aec61f7cf552ebea669a88049a54af30 Mon Sep 17 00:00:00 2001 From: Colin Percival Date: Sun, 2 Oct 2022 10:42:41 -0700 Subject: [PATCH 3/7] pvh/arch-x86_64: Write start_info to guest memory Fill the hvm_start_info and related structures as specified in the PVH boot protocol. Write the data structures to guest memory at the GPA that will be stored in %rbx when the guest starts. Signed-off-by: Colin Percival Co-authored-by: Alejandro Jimenez --- src/vmm/src/arch/x86_64/layout.rs | 8 ++ src/vmm/src/arch/x86_64/mod.rs | 232 ++++++++++++++++++++++++++++-- src/vmm/src/builder.rs | 1 + 3 files changed, 233 insertions(+), 8 deletions(-) diff --git a/src/vmm/src/arch/x86_64/layout.rs b/src/vmm/src/arch/x86_64/layout.rs index 936458d54d4..84be9148967 100644 --- a/src/vmm/src/arch/x86_64/layout.rs +++ b/src/vmm/src/arch/x86_64/layout.rs @@ -30,5 +30,13 @@ pub const KVM_TSS_ADDRESS: u64 = 0xfffb_d000; /// Address of the hvm_start_info struct used in PVH boot pub const PVH_INFO_START: u64 = 0x6000; +/// Starting address of array of modules of hvm_modlist_entry type. +/// Used to enable initrd support using the PVH boot ABI. +pub const MODLIST_START: u64 = 0x6040; + +/// Address of memory map table used in PVH boot. Can overlap +/// with the zero page address since they are mutually exclusive. +pub const MEMMAP_START: u64 = 0x7000; + /// The 'zero page', a.k.a linux kernel bootparams. pub const ZERO_PAGE_START: u64 = 0x7000; diff --git a/src/vmm/src/arch/x86_64/mod.rs b/src/vmm/src/arch/x86_64/mod.rs index 6d6db07a051..f4e402a9a5e 100644 --- a/src/vmm/src/arch/x86_64/mod.rs +++ b/src/vmm/src/arch/x86_64/mod.rs @@ -1,3 +1,5 @@ +// Copyright © 2020, Oracle and/or its affiliates. +// // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // @@ -19,14 +21,19 @@ pub mod msr; pub mod regs; use linux_loader::configurator::linux::LinuxBootConfigurator; +use linux_loader::configurator::pvh::PvhBootConfigurator; use linux_loader::configurator::{BootConfigurator, BootParams}; use linux_loader::loader::bootparam::boot_params; +use linux_loader::loader::elf::start_info::{ + hvm_memmap_table_entry, hvm_modlist_entry, hvm_start_info, +}; use utils::vm_memory::{Address, GuestAddress, GuestMemory, GuestMemoryMmap, GuestMemoryRegion}; -use crate::arch::InitrdConfig; +use crate::arch::{BootProtocol, InitrdConfig}; // Value taken from https://elixir.bootlin.com/linux/v5.10.68/source/arch/x86/include/uapi/asm/e820.h#L31 const E820_RAM: u32 = 1; +const MEMMAP_TYPE_RAM: u32 = 1; /// Errors thrown while configuring x86_64 system. #[derive(Debug, PartialEq, Eq, derive_more::From)] @@ -39,6 +46,12 @@ pub enum ConfigurationError { ZeroPageSetup, /// Failed to compute initrd address. InitrdAddress, + /// Error writing module entry to guest memory. + ModlistSetup, + /// Error writing memory map table to guest memory. + MemmapTableSetup, + /// Error writing hvm_start_info to guest memory. + StartInfoSetup, } // Where BIOS/VGA magic would live on a real PC. @@ -102,12 +115,139 @@ pub fn initrd_load_addr( /// * `cmdline_size` - Size of the kernel command line in bytes including the null terminator. /// * `initrd` - Information about where the ramdisk image was loaded in the `guest_mem`. /// * `num_cpus` - Number of virtual CPUs the guest will have. +/// * `boot_prot` - Boot protocol that will be used to boot the guest. pub fn configure_system( guest_mem: &GuestMemoryMmap, cmdline_addr: GuestAddress, cmdline_size: usize, initrd: &Option, num_cpus: u8, + boot_prot: BootProtocol, +) -> Result<(), ConfigurationError> { + // Note that this puts the mptable at the last 1k of Linux's 640k base RAM + mptable::setup_mptable(guest_mem, num_cpus).map_err(ConfigurationError::MpTableSetup)?; + + match boot_prot { + BootProtocol::PvhBoot => { + configure_pvh(guest_mem, cmdline_addr, initrd)?; + } + BootProtocol::LinuxBoot => { + configure_64bit_boot(guest_mem, cmdline_addr, cmdline_size, initrd)?; + } + } + + Ok(()) +} + +fn configure_pvh( + guest_mem: &GuestMemoryMmap, + cmdline_addr: GuestAddress, + initrd: &Option, +) -> Result<(), ConfigurationError> { + const XEN_HVM_START_MAGIC_VALUE: u32 = 0x336e_c578; + let first_addr_past_32bits = GuestAddress(FIRST_ADDR_PAST_32BITS); + let end_32bit_gap_start = GuestAddress(MMIO_MEM_START); + let himem_start = GuestAddress(layout::HIMEM_START); + + // Vector to hold modules (currently either empty or holding initrd). + let mut modules: Vec = Vec::new(); + if let Some(initrd_config) = initrd { + // The initrd has been written to guest memory already, here we just need to + // create the module structure that describes it. + modules.push(hvm_modlist_entry { + paddr: initrd_config.address.raw_value(), + size: initrd_config.size as u64, + ..Default::default() + }); + } + + // Vector to hold the memory maps which needs to be written to guest memory + // at MEMMAP_START after all of the mappings are recorded. + let mut memmap: Vec = Vec::new(); + + // Create the memory map entries. + add_memmap_entry(&mut memmap, 0, EBDA_START, MEMMAP_TYPE_RAM)?; + let last_addr = guest_mem.last_addr(); + if last_addr < end_32bit_gap_start { + add_memmap_entry( + &mut memmap, + himem_start.raw_value(), + last_addr.unchecked_offset_from(himem_start) + 1, + MEMMAP_TYPE_RAM, + )?; + } else { + add_memmap_entry( + &mut memmap, + himem_start.raw_value(), + end_32bit_gap_start.unchecked_offset_from(himem_start), + MEMMAP_TYPE_RAM, + )?; + + if last_addr > first_addr_past_32bits { + add_memmap_entry( + &mut memmap, + first_addr_past_32bits.raw_value(), + last_addr.unchecked_offset_from(first_addr_past_32bits) + 1, + MEMMAP_TYPE_RAM, + )?; + } + } + + // Construct the hvm_start_info structure and serialize it into + // boot_params. This will be stored at PVH_INFO_START address, and %rbx + // will be initialized to contain PVH_INFO_START prior to starting the + // guest, as required by the PVH ABI. + let mut start_info = hvm_start_info { + magic: XEN_HVM_START_MAGIC_VALUE, + version: 1, + cmdline_paddr: cmdline_addr.raw_value(), + memmap_paddr: layout::MEMMAP_START, + memmap_entries: memmap.len() as u32, + nr_modules: modules.len() as u32, + ..Default::default() + }; + if !modules.is_empty() { + start_info.modlist_paddr = layout::MODLIST_START; + } + let mut boot_params = + BootParams::new::(&start_info, GuestAddress(layout::PVH_INFO_START)); + + // Copy the vector with the memmap table to the MEMMAP_START address + // which is already saved in the memmap_paddr field of hvm_start_info struct. + boot_params.set_sections::(&memmap, GuestAddress(layout::MEMMAP_START)); + + // Copy the vector with the modules list to the MODLIST_START address. + // Note that we only set the modlist_paddr address if there is a nonzero + // number of modules, but serializing an empty list is harmless. + boot_params.set_modules::(&modules, GuestAddress(layout::MODLIST_START)); + + // Write the hvm_start_info struct to guest memory. + PvhBootConfigurator::write_bootparams(&boot_params, guest_mem) + .map_err(|_| ConfigurationError::StartInfoSetup) +} + +fn add_memmap_entry( + memmap: &mut Vec, + addr: u64, + size: u64, + mem_type: u32, +) -> Result<(), ConfigurationError> { + // Add the table entry to the vector + memmap.push(hvm_memmap_table_entry { + addr, + size, + type_: mem_type, + reserved: 0, + }); + + Ok(()) +} + +fn configure_64bit_boot( + guest_mem: &GuestMemoryMmap, + cmdline_addr: GuestAddress, + cmdline_size: usize, + initrd: &Option, ) -> Result<(), ConfigurationError> { const KERNEL_BOOT_FLAG_MAGIC: u16 = 0xaa55; const KERNEL_HDR_MAGIC: u32 = 0x5372_6448; @@ -118,9 +258,6 @@ pub fn configure_system( let himem_start = GuestAddress(layout::HIMEM_START); - // Note that this puts the mptable at the last 1k of Linux's 640k base RAM - mptable::setup_mptable(guest_mem, num_cpus)?; - let mut params = boot_params::default(); params.hdr.type_of_loader = KERNEL_LOADER_OTHER; @@ -225,7 +362,8 @@ mod tests { false, ) .unwrap(); - let config_err = configure_system(&gm, GuestAddress(0), 0, &None, 1); + let config_err = + configure_system(&gm, GuestAddress(0), 0, &None, 1, BootProtocol::LinuxBoot); assert!(config_err.is_err()); assert_eq!( config_err.unwrap_err(), @@ -237,21 +375,72 @@ mod tests { let arch_mem_regions = arch_memory_regions(mem_size); let gm = utils::vm_memory::test_utils::create_anon_guest_memory(&arch_mem_regions, false) .unwrap(); - configure_system(&gm, GuestAddress(0), 0, &None, no_vcpus).unwrap(); + configure_system( + &gm, + GuestAddress(0), + 0, + &None, + no_vcpus, + BootProtocol::LinuxBoot, + ) + .unwrap(); + configure_system( + &gm, + GuestAddress(0), + 0, + &None, + no_vcpus, + BootProtocol::PvhBoot, + ) + .unwrap(); // Now assigning some memory that is equal to the start of the 32bit memory hole. let mem_size = 3328 << 20; let arch_mem_regions = arch_memory_regions(mem_size); let gm = utils::vm_memory::test_utils::create_anon_guest_memory(&arch_mem_regions, false) .unwrap(); - configure_system(&gm, GuestAddress(0), 0, &None, no_vcpus).unwrap(); + configure_system( + &gm, + GuestAddress(0), + 0, + &None, + no_vcpus, + BootProtocol::LinuxBoot, + ) + .unwrap(); + configure_system( + &gm, + GuestAddress(0), + 0, + &None, + no_vcpus, + BootProtocol::PvhBoot, + ) + .unwrap(); // Now assigning some memory that falls after the 32bit memory hole. let mem_size = 3330 << 20; let arch_mem_regions = arch_memory_regions(mem_size); let gm = utils::vm_memory::test_utils::create_anon_guest_memory(&arch_mem_regions, false) .unwrap(); - configure_system(&gm, GuestAddress(0), 0, &None, no_vcpus).unwrap(); + configure_system( + &gm, + GuestAddress(0), + 0, + &None, + no_vcpus, + BootProtocol::LinuxBoot, + ) + .unwrap(); + configure_system( + &gm, + GuestAddress(0), + 0, + &None, + no_vcpus, + BootProtocol::PvhBoot, + ) + .unwrap(); } #[test] @@ -293,4 +482,31 @@ mod tests { ) .is_err()); } + + #[test] + fn test_add_memmap_entry() { + const MEMMAP_TYPE_RESERVED: u32 = 2; + + let mut memmap: Vec = Vec::new(); + + let expected_memmap = vec![ + hvm_memmap_table_entry { + addr: 0x0, + size: 0x1000, + type_: MEMMAP_TYPE_RAM, + ..Default::default() + }, + hvm_memmap_table_entry { + addr: 0x10000, + size: 0xa000, + type_: MEMMAP_TYPE_RESERVED, + ..Default::default() + }, + ]; + + add_memmap_entry(&mut memmap, 0, 0x1000, MEMMAP_TYPE_RAM).unwrap(); + add_memmap_entry(&mut memmap, 0x10000, 0xa000, MEMMAP_TYPE_RESERVED).unwrap(); + + assert_eq!(format!("{:?}", memmap), format!("{:?}", expected_memmap)); + } } diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 378c4f2ca70..eb75dd2b00b 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -845,6 +845,7 @@ pub fn configure_system_for_boot( cmdline_size, initrd, vcpus.len() as u8, + entry_point.protocol, ) .map_err(ConfigureSystem)?; } From a31e86222b3a473d0ab31a40aa914d4323f2c93b Mon Sep 17 00:00:00 2001 From: Colin Percival Date: Fri, 30 Dec 2022 14:54:28 -0800 Subject: [PATCH 4/7] test_licenses: Accept Oracle copyright The PVH boot support bits are under Oracle copyright. Signed-off-by: Colin Percival --- tests/integration_tests/style/test_licenses.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/integration_tests/style/test_licenses.py b/tests/integration_tests/style/test_licenses.py index 6ee744e9e8b..6a2412782a0 100644 --- a/tests/integration_tests/style/test_licenses.py +++ b/tests/integration_tests/style/test_licenses.py @@ -23,6 +23,8 @@ ) ALIBABA_COPYRIGHT = "Copyright (C) 2019 Alibaba Cloud Computing. All rights reserved." ALIBABA_LICENSE = "SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause" +ORACLE_COPYRIGHT = "Copyright © 2020, Oracle and/or its affiliates." +ORACLE_LICENSE = "SPDX-License-Identifier: Apache-2.0" EXCLUDE = ["build", ".kernel", ".git"] @@ -77,11 +79,16 @@ def _validate_license(filename): ALIBABA_COPYRIGHT in copyright_info and _look_for_license(file, ALIBABA_LICENSE) ) + + has_oracle_copyright = ORACLE_COPYRIGHT in copyright_info and _look_for_license( + file, ORACLE_LICENSE + ) return ( has_amazon_copyright or has_chromium_copyright or has_tuntap_copyright or has_alibaba_copyright + or has_oracle_copyright ) return True From 545153257f1635784600e5eb74e471057a1a694e Mon Sep 17 00:00:00 2001 From: Colin Percival Date: Fri, 14 Jul 2023 14:48:06 -0700 Subject: [PATCH 5/7] Add FreeBSD kernel+rootfs build instructions While I'm here, clarify that the existing instructions are for building a Linux kernel and rootfs. Signed-off-by: Colin Percival --- docs/rootfs-and-kernel-setup.md | 46 +++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/docs/rootfs-and-kernel-setup.md b/docs/rootfs-and-kernel-setup.md index 1ba063b47e5..082774adcda 100644 --- a/docs/rootfs-and-kernel-setup.md +++ b/docs/rootfs-and-kernel-setup.md @@ -1,6 +1,6 @@ # Creating Custom rootfs and kernel Images -## Creating a kernel Image +## Creating a Linux kernel Image ### Manual compilation @@ -72,7 +72,7 @@ config="resources/guest_configs/microvm-kernel-arm64-4.14.config" on an aarch64 machine. -## Creating a rootfs Image +## Creating a Linux rootfs Image A rootfs image is just a file system image, that hosts at least an init system. For instance, our getting started guide uses an ext4 filesystem image. Note @@ -178,3 +178,45 @@ adjust the script(s) to suit your use case. You should now have a kernel image (`vmlinux`) and a rootfs image (`rootfs.ext4`), that you can boot with Firecracker. + +## Creating FreeBSD rootfs and kernel Images + +Here's a quick step-by-step guide to building a FreeBSD rootfs and kernel that +Firecracker can boot: + +1. Boot a FreeBSD system. In EC2, the + [FreeBSD 13 Marketplace image](https://aws.amazon.com/marketplace/pp/prodview-ukzmy5dzc6nbq) + is a good option; you can also use weekly snapshot AMIs published by the + FreeBSD project. (Firecracker support is in FreeBSD 14 and later, so you'll + need FreeBSD 13 or later to build it.) + + The build will require about 50 GB of disk space, so size the disk + appropriately. + +1. Log in to the FreeBSD system and become root. If using EC2, you'll want to + ssh in as `ec2-user` with your chosen SSH key and then `su` to become root. + +1. Install git and check out the FreeBSD src tree: + + ```sh + pkg install -y git + git clone https://git.freebsd.org/src.git /usr/src + ``` + + At present (July 2023) Firecracker support is only present in the `main` + branch. + +1. Build FreeBSD: + + ```sh + make -C /usr/src buildworld buildkernel KERNCONF=FIRECRACKER + make -C /usr/src/release firecracker DESTDIR=`pwd` + ``` + +You should now have a rootfs `freebsd-rootfs.bin` and a kernel `freebsd-kern.bin` +in the current directory (or elsewhere if you change the `DESTDIR` value) that +you can boot with Firecracker. Note that the FreeBSD rootfs generated in this +manner is somewhat minimized compared to "stock" FreeBSD; it omits utilities +which are only relevant on physical systems (e.g., utilities related to floppy +disks, USB devices, and some network interfaces) and also debug files and the +system compiler. From 97e976675374fcac2bc7b8ae3d2fd7aa09ffb3c4 Mon Sep 17 00:00:00 2001 From: Colin Percival Date: Fri, 14 Jul 2023 16:02:11 -0700 Subject: [PATCH 6/7] Add docs/pvh.md Brief description of the PVH boot mode. We defer to Xen for technical details of how CPU registers are set up upon kernel entry. Signed-off-by: Colin Percival --- docs/pvh.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 docs/pvh.md diff --git a/docs/pvh.md b/docs/pvh.md new file mode 100644 index 00000000000..4a351c301ec --- /dev/null +++ b/docs/pvh.md @@ -0,0 +1,15 @@ +# PVH boot mode + +Firecracker supports booting x86 kernels in "PVH direct boot" mode +[as specified by the Xen project](https://github.com/xen-project/xen/blob/master/docs/misc/pvh.pandoc). +If a kernel is provided which contains the XEN_ELFNOTE_PHYS32_ENTRY ELF Note +then this boot mode will be used. This boot mode was designed for virtualized +environments which load the kernel directly, and is simpler than the "Linux +boot" mode which is designed to be launched from a legacy boot loader. + +PVH boot mode can be enabled for Linux by setting CONFIG_XEN_PVH=y in the +kernel configuration. (This is not the default setting.) + +PVH boot mode is enabled by default in FreeBSD, which has support for +Firecracker starting with FreeBSD 14.0. Instructions on building a FreeBSD +kernel and root filesystem are available [here](rootfs-and-kernel-setup.md). From a6639ada102458c97160a0cc1eb4f3e82dafb7c2 Mon Sep 17 00:00:00 2001 From: Colin Percival Date: Fri, 14 Jul 2023 16:03:10 -0700 Subject: [PATCH 7/7] CHANGELOG: Mention PVH boot mode Firecracker now supports PVH boot as an alternative to "Linux" boot on the x86_64 architecture. This makes it possible for FreeBSD to boot, and also affects how Linux kernels compiled with the CONFIG_XEN_PVH=y option boot. Signed-off-by: Colin Percival --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 51d8dca8072..c7398981243 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,11 @@ ### Added +- Added support for PVH boot mode. This is used when an x86 kernel provides + the appropriate ELF Note to indicate that PVH boot mode is supported. + Linux kernels compiled with CONFIG_XEN_PVH=y set this ELF Note, as do + FreeBSD kernels. + ### Changed - Updated deserialization of `bitmap` for custom CPU templates to allow usage