1
1
#![ allow( dead_code) ]
2
2
3
+ use alloc:: boxed:: Box ;
3
4
use core:: arch:: asm;
4
5
use core:: arch:: x86_64:: {
5
- __rdtscp, _fxrstor, _fxsave, _mm_lfence, _rdseed64_step, _rdtsc, _xrstor, _xsave,
6
+ __rdtscp, _fxrstor, _fxsave, _mm_lfence, _rdseed64_step, _rdtsc, _xrstor, _xsave, _xsavec,
7
+ _xsaveopt,
6
8
} ;
9
+ use core:: fmt;
7
10
use core:: hint:: spin_loop;
8
11
use core:: num:: { NonZero , NonZeroU32 } ;
9
12
use core:: sync:: atomic:: { AtomicU64 , Ordering } ;
10
- use core:: { fmt, ptr} ;
11
13
12
14
use hermit_entry:: boot_info:: PlatformInfo ;
13
15
use hermit_sync:: Lazy ;
@@ -17,6 +19,7 @@ use x86_64::instructions::port::Port;
17
19
use x86_64:: instructions:: tables:: lidt;
18
20
use x86_64:: registers:: control:: { Cr0 , Cr0Flags , Cr4 , Cr4Flags , Efer , EferFlags } ;
19
21
use x86_64:: registers:: model_specific:: { FsBase , GsBase , Msr } ;
22
+ use x86_64:: registers:: mxcsr:: MxCsr ;
20
23
use x86_64:: registers:: segmentation:: { FS , GS , Segment64 } ;
21
24
use x86_64:: registers:: xcontrol:: { XCr0 , XCr0Flags } ;
22
25
use x86_64:: structures:: DescriptorTablePointer ;
@@ -66,6 +69,8 @@ struct Features {
66
69
supports_fsgs : bool ,
67
70
supports_rdtscp : bool ,
68
71
cpu_speedstep : CpuSpeedStep ,
72
+ has_xsaveopt : bool ,
73
+ has_xsavec : bool ,
69
74
xcr0_supports_avx512_opmask : bool ,
70
75
xcr0_supports_avx512_zmm_hi16 : bool ,
71
76
xcr0_supports_avx512_zmm_hi256 : bool ,
@@ -109,6 +114,8 @@ static FEATURES: Lazy<Features> = Lazy::new(|| {
109
114
cpu_speedstep. detect_features ( & cpuid) ;
110
115
cpu_speedstep
111
116
} ,
117
+ has_xsaveopt : extended_state_info. has_xsaveopt ( ) ,
118
+ has_xsavec : extended_state_info. has_xsavec ( ) ,
112
119
xcr0_supports_avx512_opmask : extended_state_info. xcr0_supports_avx512_opmask ( ) ,
113
120
xcr0_supports_avx512_zmm_hi16 : extended_state_info. xcr0_supports_avx512_zmm_hi16 ( ) ,
114
121
xcr0_supports_avx512_zmm_hi256 : extended_state_info. xcr0_supports_avx512_zmm_hi256 ( ) ,
@@ -133,135 +140,55 @@ pub struct XSaveLegacyRegion {
133
140
pub fpu_instruction_pointer_high_or_cs : u32 ,
134
141
pub fpu_data_pointer : u32 ,
135
142
pub fpu_data_pointer_high_or_ds : u32 ,
136
- pub mxcsr : u32 ,
143
+ pub mxcsr : MxCsr ,
137
144
pub mxcsr_mask : u32 ,
138
145
pub st_space : [ u8 ; 8 * 16 ] ,
139
146
pub xmm_space : [ u8 ; 16 * 16 ] ,
140
147
pub padding : [ u8 ; 96 ] ,
141
148
}
142
149
143
- #[ repr( C ) ]
144
- pub struct XSaveHeader {
145
- pub xstate_bv : u64 ,
146
- pub xcomp_bv : u64 ,
147
- pub reserved : [ u64 ; 6 ] ,
148
- }
150
+ #[ derive( Clone ) ]
151
+ #[ repr( C , align( 64 ) ) ]
152
+ struct AlignToSixtyFour ( [ u8 ; 64 ] ) ;
149
153
150
154
#[ repr( C ) ]
151
- pub struct XSaveAVXState {
152
- pub ymmh_space : [ u8 ; 16 * 16 ] ,
155
+ pub struct FPUState {
156
+ xsave_area : Box < [ AlignToSixtyFour ] > ,
153
157
}
154
158
155
- /// XSave Area for AMD Lightweight Profiling.
156
- /// Refer to AMD Lightweight Profiling Specification (Publication No. 43724), Figure 7-1.
157
- #[ repr( C ) ]
158
- pub struct XSaveLWPState {
159
- pub lwpcb_address : u64 ,
160
- pub flags : u32 ,
161
- pub buffer_head_offset : u32 ,
162
- pub buffer_base : u64 ,
163
- pub buffer_size : u32 ,
164
- pub filters : u32 ,
165
- pub saved_event_record : [ u64 ; 4 ] ,
166
- pub event_counter : [ u32 ; 16 ] ,
167
- }
159
+ impl FPUState {
160
+ pub fn new ( ) -> Self {
161
+ let xsave_size = if supports_xsave ( ) {
162
+ CpuId :: new ( )
163
+ . get_extended_state_info ( )
164
+ . expect ( "XSAVE requires extended state info" )
165
+ . xsave_area_size_enabled_features ( ) as usize
166
+ } else {
167
+ size_of :: < XSaveLegacyRegion > ( )
168
+ } ;
168
169
169
- #[ repr( C ) ]
170
- pub struct XSaveBndregs {
171
- pub bound_registers : [ u8 ; 4 * 16 ] ,
172
- }
170
+ debug ! ( "XSAVE area size: {xsave_size}" ) ;
173
171
174
- #[ repr( C ) ]
175
- pub struct XSaveBndcsr {
176
- pub bndcfgu_register : u64 ,
177
- pub bndstatus_register : u64 ,
178
- }
172
+ // Allocate a 64-byte aligned Vec
173
+ let n_units = xsave_size. div_ceil ( size_of :: < AlignToSixtyFour > ( ) ) ;
174
+ let mut xsave_area = vec ! [ AlignToSixtyFour ( [ 0 ; 64 ] ) ; n_units] . into_boxed_slice ( ) ;
179
175
180
- /// Saved AVX512 register state.
181
- ///
182
- /// AVX512 extends the existing 16 AVX/SSE registers to be 512-bit wide and
183
- /// adds 16 more.
184
- ///
185
- /// It also adds 8 opmask registers, which are up to 64-bit wide.
186
- #[ repr( C ) ]
187
- pub struct XSaveAVX512State {
188
- /// Opmask registers k0-k7.
189
- pub opmask : [ u8 ; 8 * 8 ] ,
190
- /// Upper halves (32 bytes) of the lower ZMM registers (16).
191
- pub zmm_hi256 : [ u8 ; 32 * 16 ] ,
192
- /// Upper ZMM registers (64 bytes long, 16 registers).
193
- pub hi16_zmm : [ u8 ; 64 * 16 ] ,
194
- }
176
+ // SAFETY: We allocated at least the size of XSaveLegacyRegion bytes and have initialized them
177
+ let legacy_region = unsafe { & mut * xsave_area. as_mut_ptr ( ) . cast :: < XSaveLegacyRegion > ( ) } ;
195
178
196
- #[ repr( C , align( 64 ) ) ]
197
- pub struct FPUState {
198
- pub legacy_region : XSaveLegacyRegion ,
199
- pub header : XSaveHeader ,
200
- pub avx_state : XSaveAVXState ,
201
- pub lwp_state : XSaveLWPState ,
202
- pub bndregs : XSaveBndregs ,
203
- pub bndcsr : XSaveBndcsr ,
204
- pub avx512_state : XSaveAVX512State ,
205
- }
179
+ // Set FPU-related values to their default values after initialization.
180
+ // Refer to Intel Vol. 3A, Table 9-1. IA-32 and Intel 64 Processor States Following Power-up, Reset, or INIT
181
+ legacy_region. fpu_control_word = 0x37f ;
182
+ legacy_region. fpu_tag_word = 0xffff ;
183
+ legacy_region. mxcsr = MxCsr :: default ( ) ;
206
184
207
- impl FPUState {
208
- pub const fn new ( ) -> Self {
209
- Self {
210
- // Set FPU-related values to their default values after initialization.
211
- // Refer to Intel Vol. 3A, Table 9-1. IA-32 and Intel 64 Processor States Following Power-up, Reset, or INIT
212
- legacy_region : XSaveLegacyRegion {
213
- fpu_control_word : 0x37f ,
214
- fpu_status_word : 0 ,
215
- fpu_tag_word : 0xffff ,
216
- fpu_opcode : 0 ,
217
- fpu_instruction_pointer : 0 ,
218
- fpu_instruction_pointer_high_or_cs : 0 ,
219
- fpu_data_pointer : 0 ,
220
- fpu_data_pointer_high_or_ds : 0 ,
221
- mxcsr : 0x1f80 ,
222
- mxcsr_mask : 0 ,
223
- st_space : [ 0 ; 8 * 16 ] ,
224
- xmm_space : [ 0 ; 16 * 16 ] ,
225
- padding : [ 0 ; 96 ] ,
226
- } ,
227
-
228
- header : XSaveHeader {
229
- xstate_bv : 0 ,
230
- xcomp_bv : 0 ,
231
- reserved : [ 0 ; 6 ] ,
232
- } ,
233
- avx_state : XSaveAVXState {
234
- ymmh_space : [ 0 ; 16 * 16 ] ,
235
- } ,
236
- lwp_state : XSaveLWPState {
237
- lwpcb_address : 0 ,
238
- flags : 0 ,
239
- buffer_head_offset : 0 ,
240
- buffer_base : 0 ,
241
- buffer_size : 0 ,
242
- filters : 0 ,
243
- saved_event_record : [ 0 ; 4 ] ,
244
- event_counter : [ 0 ; 16 ] ,
245
- } ,
246
- bndregs : XSaveBndregs {
247
- bound_registers : [ 0 ; 4 * 16 ] ,
248
- } ,
249
- bndcsr : XSaveBndcsr {
250
- bndcfgu_register : 0 ,
251
- bndstatus_register : 0 ,
252
- } ,
253
- avx512_state : XSaveAVX512State {
254
- opmask : [ 0 ; 8 * 8 ] ,
255
- zmm_hi256 : [ 0 ; 32 * 16 ] ,
256
- hi16_zmm : [ 0 ; 64 * 16 ] ,
257
- } ,
258
- }
185
+ Self { xsave_area }
259
186
}
260
187
261
188
pub fn restore ( & self ) {
262
189
if supports_xsave ( ) {
263
190
unsafe {
264
- _xrstor ( ptr :: from_ref ( self ) . cast ( ) , u64:: MAX ) ;
191
+ _xrstor ( self . xsave_area . as_ptr ( ) . cast :: < u8 > ( ) , u64:: MAX ) ;
265
192
}
266
193
} else {
267
194
self . restore_common ( ) ;
@@ -270,8 +197,12 @@ impl FPUState {
270
197
271
198
pub fn save ( & mut self ) {
272
199
if supports_xsave ( ) {
273
- unsafe {
274
- _xsave ( ptr:: from_mut ( self ) . cast ( ) , u64:: MAX ) ;
200
+ if has_xsavec ( ) {
201
+ unsafe { _xsavec ( self . xsave_area . as_mut_ptr ( ) . cast :: < u8 > ( ) , u64:: MAX ) }
202
+ } else if has_xsaveopt ( ) {
203
+ unsafe { _xsaveopt ( self . xsave_area . as_mut_ptr ( ) . cast :: < u8 > ( ) , u64:: MAX ) }
204
+ } else {
205
+ unsafe { _xsave ( self . xsave_area . as_mut_ptr ( ) . cast :: < u8 > ( ) , u64:: MAX ) }
275
206
}
276
207
} else {
277
208
self . save_common ( ) ;
@@ -280,13 +211,13 @@ impl FPUState {
280
211
281
212
pub fn restore_common ( & self ) {
282
213
unsafe {
283
- _fxrstor ( ptr :: from_ref ( self ) . cast ( ) ) ;
214
+ _fxrstor ( self . xsave_area . as_ptr ( ) . cast :: < u8 > ( ) ) ;
284
215
}
285
216
}
286
217
287
218
pub fn save_common ( & mut self ) {
288
219
unsafe {
289
- _fxsave ( ptr :: from_mut ( self ) . cast ( ) ) ;
220
+ _fxsave ( self . xsave_area . as_mut_ptr ( ) . cast :: < u8 > ( ) ) ;
290
221
asm ! ( "fnclex" , options( nomem, nostack) ) ;
291
222
}
292
223
}
@@ -1110,6 +1041,16 @@ pub fn supports_fsgs() -> bool {
1110
1041
FEATURES . supports_fsgs
1111
1042
}
1112
1043
1044
+ #[ inline]
1045
+ pub fn has_xsaveopt ( ) -> bool {
1046
+ FEATURES . has_xsaveopt
1047
+ }
1048
+
1049
+ #[ inline]
1050
+ pub fn has_xsavec ( ) -> bool {
1051
+ FEATURES . has_xsavec
1052
+ }
1053
+
1113
1054
#[ inline]
1114
1055
pub fn xcr0_supports_avx512_opmask ( ) -> bool {
1115
1056
FEATURES . xcr0_supports_avx512_opmask
0 commit comments