Skip to content

Commit a459463

Browse files
authored
Merge pull request #1776 from Gelbpunkt/dynamic-xsave
refactor(arch/x86_64): Dynamically allocate XSAVE area
2 parents 970da10 + 164e56f commit a459463

File tree

1 file changed

+56
-115
lines changed

1 file changed

+56
-115
lines changed

src/arch/x86_64/kernel/processor.rs

Lines changed: 56 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
#![allow(dead_code)]
22

3+
use alloc::boxed::Box;
34
use core::arch::asm;
45
use core::arch::x86_64::{
5-
__rdtscp, _fxrstor, _fxsave, _mm_lfence, _rdseed64_step, _rdtsc, _xrstor, _xsave,
6+
__rdtscp, _fxrstor, _fxsave, _mm_lfence, _rdseed64_step, _rdtsc, _xrstor, _xsave, _xsavec,
7+
_xsaveopt,
68
};
9+
use core::fmt;
710
use core::hint::spin_loop;
811
use core::num::{NonZero, NonZeroU32};
912
use core::sync::atomic::{AtomicU64, Ordering};
10-
use core::{fmt, ptr};
1113

1214
use hermit_entry::boot_info::PlatformInfo;
1315
use hermit_sync::Lazy;
@@ -17,6 +19,7 @@ use x86_64::instructions::port::Port;
1719
use x86_64::instructions::tables::lidt;
1820
use x86_64::registers::control::{Cr0, Cr0Flags, Cr4, Cr4Flags, Efer, EferFlags};
1921
use x86_64::registers::model_specific::{FsBase, GsBase, Msr};
22+
use x86_64::registers::mxcsr::MxCsr;
2023
use x86_64::registers::segmentation::{FS, GS, Segment64};
2124
use x86_64::registers::xcontrol::{XCr0, XCr0Flags};
2225
use x86_64::structures::DescriptorTablePointer;
@@ -66,6 +69,8 @@ struct Features {
6669
supports_fsgs: bool,
6770
supports_rdtscp: bool,
6871
cpu_speedstep: CpuSpeedStep,
72+
has_xsaveopt: bool,
73+
has_xsavec: bool,
6974
xcr0_supports_avx512_opmask: bool,
7075
xcr0_supports_avx512_zmm_hi16: bool,
7176
xcr0_supports_avx512_zmm_hi256: bool,
@@ -109,6 +114,8 @@ static FEATURES: Lazy<Features> = Lazy::new(|| {
109114
cpu_speedstep.detect_features(&cpuid);
110115
cpu_speedstep
111116
},
117+
has_xsaveopt: extended_state_info.has_xsaveopt(),
118+
has_xsavec: extended_state_info.has_xsavec(),
112119
xcr0_supports_avx512_opmask: extended_state_info.xcr0_supports_avx512_opmask(),
113120
xcr0_supports_avx512_zmm_hi16: extended_state_info.xcr0_supports_avx512_zmm_hi16(),
114121
xcr0_supports_avx512_zmm_hi256: extended_state_info.xcr0_supports_avx512_zmm_hi256(),
@@ -133,135 +140,55 @@ pub struct XSaveLegacyRegion {
133140
pub fpu_instruction_pointer_high_or_cs: u32,
134141
pub fpu_data_pointer: u32,
135142
pub fpu_data_pointer_high_or_ds: u32,
136-
pub mxcsr: u32,
143+
pub mxcsr: MxCsr,
137144
pub mxcsr_mask: u32,
138145
pub st_space: [u8; 8 * 16],
139146
pub xmm_space: [u8; 16 * 16],
140147
pub padding: [u8; 96],
141148
}
142149

143-
#[repr(C)]
144-
pub struct XSaveHeader {
145-
pub xstate_bv: u64,
146-
pub xcomp_bv: u64,
147-
pub reserved: [u64; 6],
148-
}
150+
#[derive(Clone)]
151+
#[repr(C, align(64))]
152+
struct AlignToSixtyFour([u8; 64]);
149153

150154
#[repr(C)]
151-
pub struct XSaveAVXState {
152-
pub ymmh_space: [u8; 16 * 16],
155+
pub struct FPUState {
156+
xsave_area: Box<[AlignToSixtyFour]>,
153157
}
154158

155-
/// XSave Area for AMD Lightweight Profiling.
156-
/// Refer to AMD Lightweight Profiling Specification (Publication No. 43724), Figure 7-1.
157-
#[repr(C)]
158-
pub struct XSaveLWPState {
159-
pub lwpcb_address: u64,
160-
pub flags: u32,
161-
pub buffer_head_offset: u32,
162-
pub buffer_base: u64,
163-
pub buffer_size: u32,
164-
pub filters: u32,
165-
pub saved_event_record: [u64; 4],
166-
pub event_counter: [u32; 16],
167-
}
159+
impl FPUState {
160+
pub fn new() -> Self {
161+
let xsave_size = if supports_xsave() {
162+
CpuId::new()
163+
.get_extended_state_info()
164+
.expect("XSAVE requires extended state info")
165+
.xsave_area_size_enabled_features() as usize
166+
} else {
167+
size_of::<XSaveLegacyRegion>()
168+
};
168169

169-
#[repr(C)]
170-
pub struct XSaveBndregs {
171-
pub bound_registers: [u8; 4 * 16],
172-
}
170+
debug!("XSAVE area size: {xsave_size}");
173171

174-
#[repr(C)]
175-
pub struct XSaveBndcsr {
176-
pub bndcfgu_register: u64,
177-
pub bndstatus_register: u64,
178-
}
172+
// Allocate a 64-byte aligned Vec
173+
let n_units = xsave_size.div_ceil(size_of::<AlignToSixtyFour>());
174+
let mut xsave_area = vec![AlignToSixtyFour([0; 64]); n_units].into_boxed_slice();
179175

180-
/// Saved AVX512 register state.
181-
///
182-
/// AVX512 extends the existing 16 AVX/SSE registers to be 512-bit wide and
183-
/// adds 16 more.
184-
///
185-
/// It also adds 8 opmask registers, which are up to 64-bit wide.
186-
#[repr(C)]
187-
pub struct XSaveAVX512State {
188-
/// Opmask registers k0-k7.
189-
pub opmask: [u8; 8 * 8],
190-
/// Upper halves (32 bytes) of the lower ZMM registers (16).
191-
pub zmm_hi256: [u8; 32 * 16],
192-
/// Upper ZMM registers (64 bytes long, 16 registers).
193-
pub hi16_zmm: [u8; 64 * 16],
194-
}
176+
// SAFETY: We allocated at least the size of XSaveLegacyRegion bytes and have initialized them
177+
let legacy_region = unsafe { &mut *xsave_area.as_mut_ptr().cast::<XSaveLegacyRegion>() };
195178

196-
#[repr(C, align(64))]
197-
pub struct FPUState {
198-
pub legacy_region: XSaveLegacyRegion,
199-
pub header: XSaveHeader,
200-
pub avx_state: XSaveAVXState,
201-
pub lwp_state: XSaveLWPState,
202-
pub bndregs: XSaveBndregs,
203-
pub bndcsr: XSaveBndcsr,
204-
pub avx512_state: XSaveAVX512State,
205-
}
179+
// Set FPU-related values to their default values after initialization.
180+
// Refer to Intel Vol. 3A, Table 9-1. IA-32 and Intel 64 Processor States Following Power-up, Reset, or INIT
181+
legacy_region.fpu_control_word = 0x37f;
182+
legacy_region.fpu_tag_word = 0xffff;
183+
legacy_region.mxcsr = MxCsr::default();
206184

207-
impl FPUState {
208-
pub const fn new() -> Self {
209-
Self {
210-
// Set FPU-related values to their default values after initialization.
211-
// Refer to Intel Vol. 3A, Table 9-1. IA-32 and Intel 64 Processor States Following Power-up, Reset, or INIT
212-
legacy_region: XSaveLegacyRegion {
213-
fpu_control_word: 0x37f,
214-
fpu_status_word: 0,
215-
fpu_tag_word: 0xffff,
216-
fpu_opcode: 0,
217-
fpu_instruction_pointer: 0,
218-
fpu_instruction_pointer_high_or_cs: 0,
219-
fpu_data_pointer: 0,
220-
fpu_data_pointer_high_or_ds: 0,
221-
mxcsr: 0x1f80,
222-
mxcsr_mask: 0,
223-
st_space: [0; 8 * 16],
224-
xmm_space: [0; 16 * 16],
225-
padding: [0; 96],
226-
},
227-
228-
header: XSaveHeader {
229-
xstate_bv: 0,
230-
xcomp_bv: 0,
231-
reserved: [0; 6],
232-
},
233-
avx_state: XSaveAVXState {
234-
ymmh_space: [0; 16 * 16],
235-
},
236-
lwp_state: XSaveLWPState {
237-
lwpcb_address: 0,
238-
flags: 0,
239-
buffer_head_offset: 0,
240-
buffer_base: 0,
241-
buffer_size: 0,
242-
filters: 0,
243-
saved_event_record: [0; 4],
244-
event_counter: [0; 16],
245-
},
246-
bndregs: XSaveBndregs {
247-
bound_registers: [0; 4 * 16],
248-
},
249-
bndcsr: XSaveBndcsr {
250-
bndcfgu_register: 0,
251-
bndstatus_register: 0,
252-
},
253-
avx512_state: XSaveAVX512State {
254-
opmask: [0; 8 * 8],
255-
zmm_hi256: [0; 32 * 16],
256-
hi16_zmm: [0; 64 * 16],
257-
},
258-
}
185+
Self { xsave_area }
259186
}
260187

261188
pub fn restore(&self) {
262189
if supports_xsave() {
263190
unsafe {
264-
_xrstor(ptr::from_ref(self).cast(), u64::MAX);
191+
_xrstor(self.xsave_area.as_ptr().cast::<u8>(), u64::MAX);
265192
}
266193
} else {
267194
self.restore_common();
@@ -270,8 +197,12 @@ impl FPUState {
270197

271198
pub fn save(&mut self) {
272199
if supports_xsave() {
273-
unsafe {
274-
_xsave(ptr::from_mut(self).cast(), u64::MAX);
200+
if has_xsavec() {
201+
unsafe { _xsavec(self.xsave_area.as_mut_ptr().cast::<u8>(), u64::MAX) }
202+
} else if has_xsaveopt() {
203+
unsafe { _xsaveopt(self.xsave_area.as_mut_ptr().cast::<u8>(), u64::MAX) }
204+
} else {
205+
unsafe { _xsave(self.xsave_area.as_mut_ptr().cast::<u8>(), u64::MAX) }
275206
}
276207
} else {
277208
self.save_common();
@@ -280,13 +211,13 @@ impl FPUState {
280211

281212
pub fn restore_common(&self) {
282213
unsafe {
283-
_fxrstor(ptr::from_ref(self).cast());
214+
_fxrstor(self.xsave_area.as_ptr().cast::<u8>());
284215
}
285216
}
286217

287218
pub fn save_common(&mut self) {
288219
unsafe {
289-
_fxsave(ptr::from_mut(self).cast());
220+
_fxsave(self.xsave_area.as_mut_ptr().cast::<u8>());
290221
asm!("fnclex", options(nomem, nostack));
291222
}
292223
}
@@ -1110,6 +1041,16 @@ pub fn supports_fsgs() -> bool {
11101041
FEATURES.supports_fsgs
11111042
}
11121043

1044+
#[inline]
1045+
pub fn has_xsaveopt() -> bool {
1046+
FEATURES.has_xsaveopt
1047+
}
1048+
1049+
#[inline]
1050+
pub fn has_xsavec() -> bool {
1051+
FEATURES.has_xsavec
1052+
}
1053+
11131054
#[inline]
11141055
pub fn xcr0_supports_avx512_opmask() -> bool {
11151056
FEATURES.xcr0_supports_avx512_opmask

0 commit comments

Comments
 (0)