Skip to content

Commit 92687cf

Browse files
Auto merge of #143182 - xdoardo:more-addrspace, r=<try>
Allow custom default address spaces and parse `p-` specifications in the datalayout string Some targets, such as CHERI, use as default an address space different from the "normal" default address space `0` (in the case of CHERI, [200 is used](https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-877.pdf)). Currently, `rustc` does not allow to specify custom address spaces and does not take into consideration [`p-` specifications in the datalayout string](https://llvm.org/docs/LangRef.html#langref-datalayout). This patch tries to mitigate these problems by allowing targets to define a custom default address space (while keeping the default value to address space `0`) and adding the code to parse the `p-` specifications in `rustc_abi`. The main changes are that `TargetDataLayout` now uses functions to refer to pointer-related informations, instead of having specific fields for the size and alignment of pointers in the default address space; furthermore, the two `pointer_size` and `pointer_align` fields in `TargetDataLayout` are replaced with an `FxHashMap` that holds info for all the possible address spaces, as parsed by the `p-` specifications. The potential performance drawbacks of not having ad-hoc fields for the default address space will be tested in this PR's CI run. r? workingjubilee
2 parents ed2d759 + fce06e3 commit 92687cf

File tree

59 files changed

+320
-149
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+320
-149
lines changed

compiler/rustc_abi/src/layout/ty.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -118,11 +118,11 @@ impl<'a> Layout<'a> {
118118

119119
/// Whether the layout is from a type that implements [`std::marker::PointerLike`].
120120
///
121-
/// Currently, that means that the type is pointer-sized, pointer-aligned,
122-
/// and has a initialized (non-union), scalar ABI.
121+
/// Currently, that means that the type is pointer-sized, pointer-aligned, and has a initialized
122+
/// (non-union), scalar ABI; all of this with respect with the default address space.
123123
pub fn is_pointer_like(self, data_layout: &TargetDataLayout) -> bool {
124-
self.size() == data_layout.pointer_size
125-
&& self.align().abi == data_layout.pointer_align.abi
124+
self.size() == data_layout.pointer_size()
125+
&& self.align().abi == data_layout.pointer_align().abi
126126
&& matches!(self.backend_repr(), BackendRepr::Scalar(Scalar::Initialized { .. }))
127127
}
128128
}

compiler/rustc_abi/src/lib.rs

Lines changed: 166 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,17 @@ impl ReprOptions {
221221
/// * Cranelift stores the base-2 log of the lane count in a 4 bit integer.
222222
pub const MAX_SIMD_LANES: u64 = 1 << 0xF;
223223

224+
/// Informations relative to a specific address space.
225+
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
226+
pub struct AddressSpaceInfo {
227+
/// The size of the bitwise representation of the pointer.
228+
pointer_size: Size,
229+
/// The alignment requirements for pointers in this address space.
230+
pointer_align: AbiAlign,
231+
/// The size of the index that used for address calculations on pointers in this address space.
232+
pointer_index: Size,
233+
}
234+
224235
/// Parsed [Data layout](https://llvm.org/docs/LangRef.html#data-layout)
225236
/// for a target, which contains everything needed to compute layouts.
226237
#[derive(Debug, PartialEq, Eq)]
@@ -236,13 +247,14 @@ pub struct TargetDataLayout {
236247
pub f32_align: AbiAlign,
237248
pub f64_align: AbiAlign,
238249
pub f128_align: AbiAlign,
239-
pub pointer_size: Size,
240-
pub pointer_align: AbiAlign,
241250
pub aggregate_align: AbiAlign,
242251

243252
/// Alignments for vector types.
244253
pub vector_align: Vec<(Size, AbiAlign)>,
245254

255+
pub default_address_space: AddressSpace,
256+
pub address_space_info: Vec<(AddressSpace, AddressSpaceInfo)>,
257+
246258
pub instruction_address_space: AddressSpace,
247259

248260
/// Minimum size of #[repr(C)] enums (default c_int::BITS, usually 32)
@@ -267,14 +279,21 @@ impl Default for TargetDataLayout {
267279
f32_align: AbiAlign::new(align(32)),
268280
f64_align: AbiAlign::new(align(64)),
269281
f128_align: AbiAlign::new(align(128)),
270-
pointer_size: Size::from_bits(64),
271-
pointer_align: AbiAlign::new(align(64)),
272282
aggregate_align: AbiAlign { abi: align(8) },
273283
vector_align: vec![
274284
(Size::from_bits(64), AbiAlign::new(align(64))),
275285
(Size::from_bits(128), AbiAlign::new(align(128))),
276286
],
277-
instruction_address_space: AddressSpace::DATA,
287+
default_address_space: AddressSpace::ZERO,
288+
address_space_info: vec![(
289+
AddressSpace::ZERO,
290+
AddressSpaceInfo {
291+
pointer_size: Size::from_bits(64),
292+
pointer_align: AbiAlign::new(align(64)),
293+
pointer_index: Size::from_bits(64),
294+
},
295+
)],
296+
instruction_address_space: AddressSpace::ZERO,
278297
c_enum_min_size: Integer::I32,
279298
}
280299
}
@@ -288,6 +307,7 @@ pub enum TargetDataLayoutErrors<'a> {
288307
InconsistentTargetArchitecture { dl: &'a str, target: &'a str },
289308
InconsistentTargetPointerWidth { pointer_size: u64, target: u32 },
290309
InvalidBitsSize { err: String },
310+
MissingAddressSpaceInfo { addr_space: AddressSpace },
291311
}
292312

293313
impl TargetDataLayout {
@@ -298,6 +318,7 @@ impl TargetDataLayout {
298318
/// determined from llvm string.
299319
pub fn parse_from_llvm_datalayout_string<'a>(
300320
input: &'a str,
321+
default_address_space: AddressSpace,
301322
) -> Result<TargetDataLayout, TargetDataLayoutErrors<'a>> {
302323
// Parse an address space index from a string.
303324
let parse_address_space = |s: &'a str, cause: &'a str| {
@@ -334,6 +355,8 @@ impl TargetDataLayout {
334355
};
335356

336357
let mut dl = TargetDataLayout::default();
358+
dl.default_address_space = default_address_space;
359+
337360
let mut i128_align_src = 64;
338361
for spec in input.split('-') {
339362
let spec_parts = spec.split(':').collect::<Vec<_>>();
@@ -349,13 +372,47 @@ impl TargetDataLayout {
349372
["f32", a @ ..] => dl.f32_align = parse_align(a, "f32")?,
350373
["f64", a @ ..] => dl.f64_align = parse_align(a, "f64")?,
351374
["f128", a @ ..] => dl.f128_align = parse_align(a, "f128")?,
352-
// FIXME(erikdesjardins): we should be parsing nonzero address spaces
353-
// this will require replacing TargetDataLayout::{pointer_size,pointer_align}
354-
// with e.g. `fn pointer_size_in(AddressSpace)`
355-
[p @ "p", s, a @ ..] | [p @ "p0", s, a @ ..] => {
356-
dl.pointer_size = parse_size(s, p)?;
357-
dl.pointer_align = parse_align(a, p)?;
375+
[p, s, a @ ..] if p.starts_with("p") => {
376+
let p = p.strip_prefix(char::is_alphabetic).unwrap_or_default();
377+
378+
let addr_space = if !p.is_empty() {
379+
parse_address_space(p, "p")?
380+
} else {
381+
AddressSpace::ZERO
382+
};
383+
384+
let pointer_size = parse_size(s, p)?;
385+
let info = AddressSpaceInfo {
386+
pointer_index: pointer_size,
387+
pointer_size,
388+
pointer_align: parse_align(a, p)?,
389+
};
390+
match dl.address_space_info.iter_mut().find(|(a, _)| *a == addr_space) {
391+
Some(e) => e.1 = info,
392+
None => dl.address_space_info.push((addr_space, info)),
393+
}
394+
}
395+
[p, s, _pr, i, a @ ..] if p.starts_with("p") => {
396+
let p = p.strip_prefix(char::is_alphabetic).unwrap_or_default();
397+
398+
let addr_space = if !p.is_empty() {
399+
parse_address_space(p, "p")?
400+
} else {
401+
AddressSpace::ZERO
402+
};
403+
404+
let info = AddressSpaceInfo {
405+
pointer_align: parse_align(a, p)?,
406+
pointer_size: parse_size(s, p)?,
407+
pointer_index: parse_size(i, p)?,
408+
};
409+
410+
match dl.address_space_info.iter_mut().find(|(a, _)| *a == addr_space) {
411+
Some(e) => e.1 = info,
412+
None => dl.address_space_info.push((addr_space, info)),
413+
}
358414
}
415+
359416
[s, a @ ..] if s.starts_with('i') => {
360417
let Ok(bits) = s[1..].parse::<u64>() else {
361418
parse_size(&s[1..], "i")?; // For the user error.
@@ -390,10 +447,34 @@ impl TargetDataLayout {
390447
_ => {} // Ignore everything else.
391448
}
392449
}
450+
451+
if dl.address_space_info.iter().find(|(a, _)| *a == default_address_space).is_none() {
452+
return Err(TargetDataLayoutErrors::MissingAddressSpaceInfo {
453+
addr_space: default_address_space,
454+
});
455+
}
456+
457+
// Inherit, if not given, address space informations for specific LLVM elements from the
458+
// default data address space.
459+
460+
if dl.address_space_info.iter().find(|(a, _)| *a == dl.instruction_address_space).is_none()
461+
{
462+
dl.address_space_info.push((
463+
dl.instruction_address_space,
464+
dl.address_space_info
465+
.iter()
466+
.find(|(a, _)| *a == default_address_space)
467+
.unwrap()
468+
.1
469+
.clone(),
470+
));
471+
}
472+
393473
Ok(dl)
394474
}
395475

396-
/// Returns **exclusive** upper bound on object size in bytes.
476+
/// Returns **exclusive** upper bound on object size in bytes, in the default data address
477+
/// space.
397478
///
398479
/// The theoretical maximum object size is defined as the maximum positive `isize` value.
399480
/// This ensures that the `offset` semantics remain well-defined by allowing it to correctly
@@ -404,7 +485,21 @@ impl TargetDataLayout {
404485
/// so we adopt such a more-constrained size bound due to its technical limitations.
405486
#[inline]
406487
pub fn obj_size_bound(&self) -> u64 {
407-
match self.pointer_size.bits() {
488+
self.obj_size_bound_in(self.default_address_space)
489+
}
490+
491+
/// Returns **exclusive** upper bound on object size in bytes.
492+
///
493+
/// The theoretical maximum object size is defined as the maximum positive `isize` value.
494+
/// This ensures that the `offset` semantics remain well-defined by allowing it to correctly
495+
/// index every address within an object along with one byte past the end, along with allowing
496+
/// `isize` to store the difference between any two pointers into an object.
497+
///
498+
/// LLVM uses a 64-bit integer to represent object size in *bits*, but we care only for bytes,
499+
/// so we adopt such a more-constrained size bound due to its technical limitations.
500+
#[inline]
501+
pub fn obj_size_bound_in(&self, address_space: AddressSpace) -> u64 {
502+
match self.pointer_size_in(address_space).bits() {
408503
16 => 1 << 15,
409504
32 => 1 << 31,
410505
64 => 1 << 61,
@@ -414,8 +509,13 @@ impl TargetDataLayout {
414509

415510
#[inline]
416511
pub fn ptr_sized_integer(&self) -> Integer {
512+
self.ptr_sized_integer_in(self.default_address_space)
513+
}
514+
515+
#[inline]
516+
pub fn ptr_sized_integer_in(&self, address_space: AddressSpace) -> Integer {
417517
use Integer::*;
418-
match self.pointer_size.bits() {
518+
match self.pointer_index_in(address_space).bits() {
419519
16 => I16,
420520
32 => I32,
421521
64 => I64,
@@ -439,6 +539,54 @@ impl TargetDataLayout {
439539
Align::from_bytes(vec_size.bytes().next_power_of_two()).unwrap(),
440540
))
441541
}
542+
543+
/// Get the pointer size in the default data address space.
544+
#[inline]
545+
pub fn pointer_size(&self) -> Size {
546+
self.pointer_size_in(self.default_address_space)
547+
}
548+
549+
/// Get the pointer size in a specific address space.
550+
#[inline]
551+
pub fn pointer_size_in(&self, c: AddressSpace) -> Size {
552+
if let Some(e) = self.address_space_info.iter().find(|(a, _)| a == &c) {
553+
e.1.pointer_size
554+
} else {
555+
panic!("Use of unknown address space {c:?}");
556+
}
557+
}
558+
559+
/// Get the pointer index in the default data address space.
560+
#[inline]
561+
pub fn pointer_index(&self) -> Size {
562+
self.pointer_index_in(self.default_address_space)
563+
}
564+
565+
/// Get the pointer index in a specific address space.
566+
#[inline]
567+
pub fn pointer_index_in(&self, c: AddressSpace) -> Size {
568+
if let Some(e) = self.address_space_info.iter().find(|(a, _)| a == &c) {
569+
e.1.pointer_index
570+
} else {
571+
panic!("Use of unknown address space {c:?}");
572+
}
573+
}
574+
575+
/// Get the pointer alignment in the default data address space.
576+
#[inline]
577+
pub fn pointer_align(&self) -> AbiAlign {
578+
self.pointer_align_in(self.default_address_space)
579+
}
580+
581+
/// Get the pointer alignment in a specific address space.
582+
#[inline]
583+
pub fn pointer_align_in(&self, c: AddressSpace) -> AbiAlign {
584+
if let Some(e) = self.address_space_info.iter().find(|(a, _)| a == &c) {
585+
e.1.pointer_align
586+
} else {
587+
panic!("Use of unknown address space {c:?}");
588+
}
589+
}
442590
}
443591

444592
pub trait HasDataLayout {
@@ -1104,7 +1252,7 @@ impl Primitive {
11041252
// FIXME(erikdesjardins): ignoring address space is technically wrong, pointers in
11051253
// different address spaces can have different sizes
11061254
// (but TargetDataLayout doesn't currently parse that part of the DL string)
1107-
Pointer(_) => dl.pointer_size,
1255+
Pointer(a) => dl.pointer_size_in(a),
11081256
}
11091257
}
11101258

@@ -1118,7 +1266,7 @@ impl Primitive {
11181266
// FIXME(erikdesjardins): ignoring address space is technically wrong, pointers in
11191267
// different address spaces can have different alignments
11201268
// (but TargetDataLayout doesn't currently parse that part of the DL string)
1121-
Pointer(_) => dl.pointer_align,
1269+
Pointer(a) => dl.pointer_align_in(a),
11221270
}
11231271
}
11241272
}
@@ -1422,8 +1570,8 @@ impl<FieldIdx: Idx> FieldsShape<FieldIdx> {
14221570
pub struct AddressSpace(pub u32);
14231571

14241572
impl AddressSpace {
1425-
/// The default address space, corresponding to data space.
1426-
pub const DATA: Self = AddressSpace(0);
1573+
/// LLVM's `0` address space.
1574+
pub const ZERO: Self = AddressSpace(0);
14271575
}
14281576

14291577
/// The way we represent values to the backend

compiler/rustc_ast_lowering/src/format.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ impl<'hir> LoweringContext<'_, 'hir> {
5555
/// Get the maximum value of int_ty. It is platform-dependent due to the byte size of isize
5656
fn int_ty_max(&self, int_ty: IntTy) -> u128 {
5757
match int_ty {
58-
IntTy::Isize => self.tcx.data_layout.pointer_size.signed_int_max() as u128,
58+
IntTy::Isize => self.tcx.data_layout.pointer_size().signed_int_max() as u128,
5959
IntTy::I8 => i8::MAX as u128,
6060
IntTy::I16 => i16::MAX as u128,
6161
IntTy::I32 => i32::MAX as u128,
@@ -67,7 +67,7 @@ impl<'hir> LoweringContext<'_, 'hir> {
6767
/// Get the maximum value of uint_ty. It is platform-dependent due to the byte size of usize
6868
fn uint_ty_max(&self, uint_ty: UintTy) -> u128 {
6969
match uint_ty {
70-
UintTy::Usize => self.tcx.data_layout.pointer_size.unsigned_int_max(),
70+
UintTy::Usize => self.tcx.data_layout.pointer_size().unsigned_int_max(),
7171
UintTy::U8 => u8::MAX as u128,
7272
UintTy::U16 => u16::MAX as u128,
7373
UintTy::U32 => u32::MAX as u128,

compiler/rustc_codegen_cranelift/src/abi/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -831,7 +831,7 @@ pub(crate) fn codegen_drop<'tcx>(
831831

832832
pub(crate) fn lib_call_arg_param(tcx: TyCtxt<'_>, ty: Type, is_signed: bool) -> AbiParam {
833833
let param = AbiParam::new(ty);
834-
if ty.is_int() && u64::from(ty.bits()) < tcx.data_layout.pointer_size.bits() {
834+
if ty.is_int() && u64::from(ty.bits()) < tcx.data_layout.pointer_size().bits() {
835835
match (&*tcx.sess.target.arch, &*tcx.sess.target.vendor) {
836836
("x86_64", _) | ("aarch64", "apple") => match (ty, is_signed) {
837837
(types::I8 | types::I16, true) => param.sext(),

compiler/rustc_codegen_cranelift/src/abi/pass_mode.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ impl<'tcx> ArgAbiExt<'tcx> for ArgAbi<'tcx, Ty<'tcx>> {
127127
PassMode::Indirect { attrs, meta_attrs: None, on_stack } => {
128128
if on_stack {
129129
// Abi requires aligning struct size to pointer size
130-
let size = self.layout.size.align_to(tcx.data_layout.pointer_align.abi);
130+
let size = self.layout.size.align_to(tcx.data_layout.pointer_align().abi);
131131
let size = u32::try_from(size.bytes()).unwrap();
132132
smallvec![apply_attrs_to_abi_param(
133133
AbiParam::special(pointer_ty(tcx), ArgumentPurpose::StructArgument(size),),

compiler/rustc_codegen_cranelift/src/common.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ use crate::debuginfo::FunctionDebugContext;
1515
use crate::prelude::*;
1616

1717
pub(crate) fn pointer_ty(tcx: TyCtxt<'_>) -> types::Type {
18-
match tcx.data_layout.pointer_size.bits() {
18+
match tcx.data_layout.pointer_size().bits() {
1919
16 => types::I16,
2020
32 => types::I32,
2121
64 => types::I64,

compiler/rustc_codegen_cranelift/src/constant.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -443,7 +443,7 @@ fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut Constant
443443
let addend = {
444444
let endianness = tcx.data_layout.endian;
445445
let offset = offset.bytes() as usize;
446-
let ptr_size = tcx.data_layout.pointer_size;
446+
let ptr_size = tcx.data_layout.pointer_size();
447447
let bytes = &alloc.inspect_with_uninit_and_ptr_outside_interpreter(
448448
offset..offset + ptr_size.bytes() as usize,
449449
);

compiler/rustc_codegen_gcc/src/common.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ impl<'gcc, 'tcx> ConstCodegenMethods for CodegenCx<'gcc, 'tcx> {
170170
}
171171

172172
fn const_usize(&self, i: u64) -> RValue<'gcc> {
173-
let bit_size = self.data_layout().pointer_size.bits();
173+
let bit_size = self.data_layout().pointer_size().bits();
174174
if bit_size < 64 {
175175
// make sure it doesn't overflow
176176
assert!(i < (1 << bit_size));

compiler/rustc_codegen_gcc/src/consts.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ pub(crate) fn const_alloc_to_gcc_uncached<'gcc>(
294294
let alloc = alloc.inner();
295295
let mut llvals = Vec::with_capacity(alloc.provenance().ptrs().len() + 1);
296296
let dl = cx.data_layout();
297-
let pointer_size = dl.pointer_size.bytes() as usize;
297+
let pointer_size = dl.pointer_size().bytes() as usize;
298298

299299
let mut next_offset = 0;
300300
for &(offset, prov) in alloc.provenance().ptrs().iter() {
@@ -331,7 +331,7 @@ pub(crate) fn const_alloc_to_gcc_uncached<'gcc>(
331331
),
332332
abi::Scalar::Initialized {
333333
value: Primitive::Pointer(address_space),
334-
valid_range: WrappingRange::full(dl.pointer_size),
334+
valid_range: WrappingRange::full(dl.pointer_size()),
335335
},
336336
cx.type_i8p_ext(address_space),
337337
));

0 commit comments

Comments
 (0)