Skip to content

Commit b1bb5da

Browse files
cpercivaaljimenezb
andcommitted
pvh/arch-x86_64: Initialize vCPU regs for PVH
Set the initial values of the KVM vCPU registers as specified in the PVH boot ABI: https://xenbits.xen.org/docs/unstable/misc/pvh.html Signed-off-by: Colin Percival <cperciva@freebsd.org> Co-authored-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
1 parent e0b0f71 commit b1bb5da

File tree

6 files changed

+192
-65
lines changed

6 files changed

+192
-65
lines changed

src/arch/src/x86_64/gdt.rs

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
// Copyright © 2020, Oracle and/or its affiliates.
2+
//
13
// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
24
// SPDX-License-Identifier: Apache-2.0
35
//
@@ -24,8 +26,34 @@ fn get_base(entry: u64) -> u64 {
2426
| (((entry) & 0x0000_0000_FFFF_0000) >> 16)
2527
}
2628

29+
// Extract the segment limit from the GDT segment descriptor.
30+
//
31+
// In a segment descriptor, the limit field is 20 bits, so it can directly describe
32+
// a range from 0 to 0xFFFFF (1 MB). When G flag is set (4-KByte page granularity) it
33+
// scales the value in the limit field by a factor of 2^12 (4 Kbytes), making the effective
34+
// limit range from 0xFFF (4 KBytes) to 0xFFFF_FFFF (4 GBytes).
35+
//
36+
// However, the limit field in the VMCS definition is a 32 bit field, and the limit value is not
37+
// automatically scaled using the G flag. This means that for a desired range of 4GB for a
38+
// given segment, its limit must be specified as 0xFFFF_FFFF. Therefore the method of obtaining
39+
// the limit from the GDT entry is not sufficient, since it only provides 20 bits when 32 bits
40+
// are necessary. Fortunately, we can check if the G flag is set when extracting the limit since
41+
// the full GDT entry is passed as an argument, and perform the scaling of the limit value to
42+
// return the full 32 bit value.
43+
//
44+
// The scaling mentioned above is required when using PVH boot, since the guest boots in protected
45+
// (32-bit) mode and must be able to access the entire 32-bit address space. It does not cause
46+
// issues for the case of direct boot to 64-bit (long) mode, since in 64-bit mode the processor does
47+
// not perform runtime limit checking on code or data segments.
2748
fn get_limit(entry: u64) -> u32 {
28-
((((entry) & 0x000F_0000_0000_0000) >> 32) | ((entry) & 0x0000_0000_0000_FFFF)) as u32
49+
let limit: u32 =
50+
((((entry) & 0x000F_0000_0000_0000) >> 32) | ((entry) & 0x0000_0000_0000_FFFF)) as u32;
51+
52+
// Perform manual limit scaling if G flag is set
53+
match get_g(entry) {
54+
0 => limit,
55+
_ => ((limit << 12) | 0xFFF), // G flag is either 0 or 1
56+
}
2957
}
3058

3159
fn get_g(entry: u64) -> u8 {
@@ -109,7 +137,7 @@ mod tests {
109137
assert_eq!(0xB, seg.type_);
110138
// base and limit
111139
assert_eq!(0x10_0000, seg.base);
112-
assert_eq!(0xfffff, seg.limit);
140+
assert_eq!(0xffff_ffff, seg.limit);
113141
assert_eq!(0x0, seg.unusable);
114142
}
115143
}

src/arch/src/x86_64/layout.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,5 +27,8 @@ pub const IRQ_MAX: u32 = 23;
2727
/// Address for the TSS setup.
2828
pub const KVM_TSS_ADDRESS: u64 = 0xfffb_d000;
2929

30+
/// Address of the hvm_start_info struct used in PVH boot
31+
pub const PVH_INFO_START: u64 = 0x6000;
32+
3033
/// The 'zero page', a.k.a linux kernel bootparams.
3134
pub const ZERO_PAGE_START: u64 = 0x7000;

src/arch/src/x86_64/regs.rs

Lines changed: 130 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
// Copyright © 2020, Oracle and/or its affiliates.
12
// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
23
// SPDX-License-Identifier: Apache-2.0
34
//
@@ -11,6 +12,7 @@ use kvm_bindings::{kvm_fpu, kvm_regs, kvm_sregs};
1112
use kvm_ioctls::VcpuFd;
1213
use vm_memory::{Address, Bytes, GuestAddress, GuestMemory, GuestMemoryMmap};
1314

15+
use super::super::{BootProtocol, EntryPoint};
1416
use super::gdt::{gdt_entry, kvm_segment_from_gdt};
1517

1618
// Initial pagetables.
@@ -101,20 +103,33 @@ impl fmt::Display for SetupRegistersError {
101103
/// # Errors
102104
///
103105
/// When [`kvm_ioctls::ioctls::vcpu::VcpuFd::set_regs`] errors.
104-
pub fn setup_regs(vcpu: &VcpuFd, boot_ip: u64) -> std::result::Result<(), SetupRegistersError> {
105-
let regs: kvm_regs = kvm_regs {
106-
rflags: 0x0000_0000_0000_0002u64,
107-
rip: boot_ip,
108-
// Frame pointer. It gets a snapshot of the stack pointer (rsp) so that when adjustments are
109-
// made to rsp (i.e. reserving space for local variables or pushing values on to the stack),
110-
// local variables and function parameters are still accessible from a constant offset from
111-
// rbp.
112-
rsp: super::layout::BOOT_STACK_POINTER as u64,
113-
// Starting stack pointer.
114-
rbp: super::layout::BOOT_STACK_POINTER as u64,
115-
// Must point to zero page address per Linux ABI. This is x86_64 specific.
116-
rsi: super::layout::ZERO_PAGE_START as u64,
117-
..Default::default()
106+
pub fn setup_regs(
107+
vcpu: &VcpuFd,
108+
entry_point: EntryPoint,
109+
) -> std::result::Result<(), SetupRegistersError> {
110+
let regs: kvm_regs = match entry_point.protocol {
111+
BootProtocol::PvhBoot => kvm_regs {
112+
// Configure regs as required by PVH boot protocol.
113+
rflags: 0x0000_0000_0000_0002u64,
114+
rbx: super::layout::PVH_INFO_START,
115+
rip: entry_point.entry_addr.raw_value(),
116+
..Default::default()
117+
},
118+
BootProtocol::LinuxBoot => kvm_regs {
119+
// Configure regs as required by Linux 64-bit boot protocol.
120+
rflags: 0x0000_0000_0000_0002u64,
121+
rip: entry_point.entry_addr.raw_value(),
122+
// Frame pointer. It gets a snapshot of the stack pointer (rsp) so that when adjustments
123+
// are made to rsp (i.e. reserving space for local variables or pushing
124+
// values on to the stack), local variables and function parameters are
125+
// still accessible from a constant offset from rbp.
126+
rsp: super::layout::BOOT_STACK_POINTER as u64,
127+
// Starting stack pointer.
128+
rbp: super::layout::BOOT_STACK_POINTER as u64,
129+
// Must point to zero page address per Linux ABI. This is x86_64 specific.
130+
rsi: super::layout::ZERO_PAGE_START as u64,
131+
..Default::default()
132+
},
118133
};
119134

120135
vcpu.set_regs(&regs).map_err(SetupRegistersError)
@@ -143,6 +158,7 @@ pub enum SetupSpecialRegistersError {
143158
///
144159
/// * `mem` - The memory that will be passed to the guest.
145160
/// * `vcpu` - Structure for the VCPU that holds the VCPU's fd.
161+
/// * `boot_prot` - The boot protocol being used.
146162
///
147163
/// # Errors
148164
///
@@ -154,14 +170,18 @@ pub enum SetupSpecialRegistersError {
154170
pub fn setup_sregs(
155171
mem: &GuestMemoryMmap,
156172
vcpu: &VcpuFd,
173+
boot_prot: BootProtocol,
157174
) -> std::result::Result<(), SetupSpecialRegistersError> {
158175
let mut sregs: kvm_sregs = vcpu
159176
.get_sregs()
160177
.map_err(SetupSpecialRegistersError::GetSpecialRegisters)?;
161178

162-
configure_segments_and_sregs(mem, &mut sregs)
179+
configure_segments_and_sregs(mem, &mut sregs, boot_prot)
163180
.map_err(SetupSpecialRegistersError::ConfigureSegmentsAndSpecialRegisters)?;
164-
setup_page_tables(mem, &mut sregs).map_err(SetupSpecialRegistersError::SetupPageTables)?; // TODO(dgreid) - Can this be done once per system instead?
181+
if let BootProtocol::LinuxBoot = boot_prot {
182+
setup_page_tables(mem, &mut sregs).map_err(SetupSpecialRegistersError::SetupPageTables)?;
183+
// TODO(dgreid) - Can this be done once per system instead?
184+
}
165185

166186
vcpu.set_sregs(&sregs)
167187
.map_err(SetupSpecialRegistersError::SetSpecialRegisters)
@@ -176,6 +196,7 @@ const EFER_LMA: u64 = 0x400;
176196
const EFER_LME: u64 = 0x100;
177197

178198
const X86_CR0_PE: u64 = 0x1;
199+
const X86_CR0_ET: u64 = 0x10;
179200
const X86_CR0_PG: u64 = 0x8000_0000;
180201
const X86_CR4_PAE: u64 = 0x20;
181202

@@ -199,13 +220,31 @@ fn write_idt_value(val: u64, guest_mem: &GuestMemoryMmap) -> Result<()> {
199220
.map_err(|_| Error::WriteIDT)
200221
}
201222

202-
fn configure_segments_and_sregs(mem: &GuestMemoryMmap, sregs: &mut kvm_sregs) -> Result<()> {
203-
let gdt_table: [u64; BOOT_GDT_MAX as usize] = [
204-
gdt_entry(0, 0, 0), // NULL
205-
gdt_entry(0xa09b, 0, 0xfffff), // CODE
206-
gdt_entry(0xc093, 0, 0xfffff), // DATA
207-
gdt_entry(0x808b, 0, 0xfffff), // TSS
208-
];
223+
fn configure_segments_and_sregs(
224+
mem: &GuestMemoryMmap,
225+
sregs: &mut kvm_sregs,
226+
boot_prot: BootProtocol,
227+
) -> Result<()> {
228+
let gdt_table: [u64; BOOT_GDT_MAX as usize] = match boot_prot {
229+
BootProtocol::PvhBoot => {
230+
// Configure GDT entries as specified by PVH boot protocol
231+
[
232+
gdt_entry(0, 0, 0), // NULL
233+
gdt_entry(0xc09b, 0, 0xffff_ffff), // CODE
234+
gdt_entry(0xc093, 0, 0xffff_ffff), // DATA
235+
gdt_entry(0x008b, 0, 0x67), // TSS
236+
]
237+
}
238+
BootProtocol::LinuxBoot => {
239+
// Configure GDT entries as specified by Linux 64bit boot protocol
240+
[
241+
gdt_entry(0, 0, 0), // NULL
242+
gdt_entry(0xa09b, 0, 0xfffff), // CODE
243+
gdt_entry(0xc093, 0, 0xfffff), // DATA
244+
gdt_entry(0x808b, 0, 0xfffff), // TSS
245+
]
246+
}
247+
};
209248

210249
let code_seg = kvm_segment_from_gdt(gdt_table[1], 1);
211250
let data_seg = kvm_segment_from_gdt(gdt_table[2], 2);
@@ -228,9 +267,17 @@ fn configure_segments_and_sregs(mem: &GuestMemoryMmap, sregs: &mut kvm_sregs) ->
228267
sregs.ss = data_seg;
229268
sregs.tr = tss_seg;
230269

231-
// 64-bit protected mode
232-
sregs.cr0 |= X86_CR0_PE;
233-
sregs.efer |= EFER_LME | EFER_LMA;
270+
match boot_prot {
271+
BootProtocol::PvhBoot => {
272+
sregs.cr0 = X86_CR0_PE | X86_CR0_ET;
273+
sregs.cr4 = 0;
274+
}
275+
BootProtocol::LinuxBoot => {
276+
// 64-bit protected mode
277+
sregs.cr0 |= X86_CR0_PE;
278+
sregs.efer |= EFER_LME | EFER_LMA;
279+
}
280+
}
234281

235282
Ok(())
236283
}
@@ -288,24 +335,45 @@ mod tests {
288335
gm.read_obj(read_addr).unwrap()
289336
}
290337

291-
fn validate_segments_and_sregs(gm: &GuestMemoryMmap, sregs: &kvm_sregs) {
338+
fn validate_segments_and_sregs(
339+
gm: &GuestMemoryMmap,
340+
sregs: &kvm_sregs,
341+
boot_prot: BootProtocol,
342+
) {
343+
if let BootProtocol::LinuxBoot = boot_prot {
344+
assert_eq!(0xaf_9b00_0000_ffff, read_u64(&gm, BOOT_GDT_OFFSET + 8));
345+
assert_eq!(0xcf_9300_0000_ffff, read_u64(&gm, BOOT_GDT_OFFSET + 16));
346+
assert_eq!(0x8f_8b00_0000_ffff, read_u64(&gm, BOOT_GDT_OFFSET + 24));
347+
348+
assert_eq!(0xffff_ffff, sregs.tr.limit);
349+
350+
assert!(sregs.cr0 & X86_CR0_PE != 0);
351+
assert!(sregs.efer & EFER_LME != 0 && sregs.efer & EFER_LMA != 0);
352+
} else {
353+
// Validate values that are specific to PVH boot protocol
354+
assert_eq!(0xcf_9b00_0000_ffff, read_u64(&gm, BOOT_GDT_OFFSET + 8));
355+
assert_eq!(0xcf_9300_0000_ffff, read_u64(&gm, BOOT_GDT_OFFSET + 16));
356+
assert_eq!(0x00_8b00_0000_0067, read_u64(&gm, BOOT_GDT_OFFSET + 24));
357+
358+
assert_eq!(0x67, sregs.tr.limit);
359+
assert_eq!(0, sregs.tr.g);
360+
361+
assert!(sregs.cr0 & X86_CR0_PE != 0 && sregs.cr0 & X86_CR0_ET != 0);
362+
assert_eq!(0, sregs.cr4);
363+
}
364+
365+
// Common settings for both PVH and Linux boot protocol
292366
assert_eq!(0x0, read_u64(&gm, BOOT_GDT_OFFSET));
293-
assert_eq!(0xaf_9b00_0000_ffff, read_u64(&gm, BOOT_GDT_OFFSET + 8));
294-
assert_eq!(0xcf_9300_0000_ffff, read_u64(&gm, BOOT_GDT_OFFSET + 16));
295-
assert_eq!(0x8f_8b00_0000_ffff, read_u64(&gm, BOOT_GDT_OFFSET + 24));
296367
assert_eq!(0x0, read_u64(&gm, BOOT_IDT_OFFSET));
297368

298369
assert_eq!(0, sregs.cs.base);
299-
assert_eq!(0xfffff, sregs.ds.limit);
370+
assert_eq!(0xffff_ffff, sregs.ds.limit);
300371
assert_eq!(0x10, sregs.es.selector);
301372
assert_eq!(1, sregs.fs.present);
302373
assert_eq!(1, sregs.gs.g);
303374
assert_eq!(0, sregs.ss.avl);
304375
assert_eq!(0, sregs.tr.base);
305-
assert_eq!(0xfffff, sregs.tr.limit);
306376
assert_eq!(0, sregs.tr.avl);
307-
assert!(sregs.cr0 & X86_CR0_PE != 0);
308-
assert!(sregs.efer & EFER_LME != 0 && sregs.efer & EFER_LMA != 0);
309377
}
310378

311379
fn validate_page_tables(gm: &GuestMemoryMmap, sregs: &kvm_sregs) {
@@ -357,7 +425,12 @@ mod tests {
357425
..Default::default()
358426
};
359427

360-
setup_regs(&vcpu, expected_regs.rip).unwrap();
428+
let entry_point: EntryPoint = EntryPoint {
429+
entry_addr: GuestAddress(expected_regs.rip),
430+
protocol: BootProtocol::LinuxBoot,
431+
};
432+
433+
setup_regs(&vcpu, entry_point).unwrap();
361434

362435
let actual_regs: kvm_regs = vcpu.get_regs().unwrap();
363436
assert_eq!(actual_regs, expected_regs);
@@ -370,16 +443,22 @@ mod tests {
370443
let vcpu = vm.create_vcpu(0).unwrap();
371444
let gm = create_guest_mem(None);
372445

373-
assert!(vcpu.set_sregs(&Default::default()).is_ok());
374-
setup_sregs(&gm, &vcpu).unwrap();
375-
376-
let mut sregs: kvm_sregs = vcpu.get_sregs().unwrap();
377-
// for AMD KVM_GET_SREGS returns g = 0 for each kvm_segment.
378-
// We set it to 1, otherwise the test will fail.
379-
sregs.gs.g = 1;
380-
381-
validate_segments_and_sregs(&gm, &sregs);
382-
validate_page_tables(&gm, &sregs);
446+
[BootProtocol::LinuxBoot, BootProtocol::PvhBoot]
447+
.iter()
448+
.for_each(|boot_prot| {
449+
assert!(vcpu.set_sregs(&Default::default()).is_ok());
450+
setup_sregs(&gm, &vcpu, *boot_prot).unwrap();
451+
452+
let mut sregs: kvm_sregs = vcpu.get_sregs().unwrap();
453+
// for AMD KVM_GET_SREGS returns g = 0 for each kvm_segment.
454+
// We set it to 1, otherwise the test will fail.
455+
sregs.gs.g = 1;
456+
457+
validate_segments_and_sregs(&gm, &sregs, *boot_prot);
458+
if let BootProtocol::LinuxBoot = *boot_prot {
459+
validate_page_tables(&gm, &sregs);
460+
}
461+
});
383462
}
384463

385464
#[test]
@@ -424,9 +503,13 @@ mod tests {
424503
fn test_configure_segments_and_sregs() {
425504
let mut sregs: kvm_sregs = Default::default();
426505
let gm = create_guest_mem(None);
427-
configure_segments_and_sregs(&gm, &mut sregs).unwrap();
506+
configure_segments_and_sregs(&gm, &mut sregs, BootProtocol::LinuxBoot).unwrap();
507+
508+
validate_segments_and_sregs(&gm, &sregs, BootProtocol::LinuxBoot);
509+
510+
configure_segments_and_sregs(&gm, &mut sregs, BootProtocol::PvhBoot).unwrap();
428511

429-
validate_segments_and_sregs(&gm, &sregs);
512+
validate_segments_and_sregs(&gm, &sregs, BootProtocol::PvhBoot);
430513
}
431514

432515
#[test]

src/vmm/src/builder.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -408,7 +408,7 @@ pub fn build_microvm_for_boot(
408408
&vmm,
409409
vcpus.as_mut(),
410410
vcpu_config,
411-
entry_point.entry_addr,
411+
entry_point,
412412
&initrd,
413413
boot_cmdline,
414414
)?;
@@ -865,7 +865,7 @@ pub fn configure_system_for_boot(
865865
vmm: &Vmm,
866866
vcpus: &mut [Vcpu],
867867
vcpu_config: VcpuConfig,
868-
entry_addr: GuestAddress,
868+
entry_point: EntryPoint,
869869
initrd: &Option<InitrdConfig>,
870870
boot_cmdline: LoaderKernelCmdline,
871871
) -> std::result::Result<(), StartMicrovmError> {
@@ -876,7 +876,7 @@ pub fn configure_system_for_boot(
876876
vcpu.kvm_vcpu
877877
.configure(
878878
vmm.guest_memory(),
879-
entry_addr,
879+
entry_point,
880880
&vcpu_config,
881881
vmm.vm.supported_cpuid().clone(),
882882
)
@@ -905,7 +905,7 @@ pub fn configure_system_for_boot(
905905
{
906906
for vcpu in vcpus.iter_mut() {
907907
vcpu.kvm_vcpu
908-
.configure(vmm.guest_memory(), entry_addr)
908+
.configure(vmm.guest_memory(), entry_point.entry_addr)
909909
.map_err(Error::VcpuConfigure)
910910
.map_err(Internal)?;
911911
}

0 commit comments

Comments
 (0)