Skip to content

Commit db343c3

Browse files
committed
Add comments to AbstractType derive macro implementation
1 parent d9190eb commit db343c3

File tree

1 file changed

+146
-31
lines changed
  • rust/binaryninja-derive/src

1 file changed

+146
-31
lines changed

rust/binaryninja-derive/src/lib.rs

Lines changed: 146 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ impl AbstractField {
3434
let Some(ident) = field.ident else {
3535
return Err(field.span().error("field must be named"));
3636
};
37+
38+
// If the field is a pointer, we want the type being pointed at, not the pointer itself.
3739
let kind = match field.ty {
3840
Type::Ptr(ty) => {
3941
let Some(width) = pointer_width else {
@@ -47,6 +49,8 @@ impl AbstractField {
4749
}
4850
_ => FieldKind::Ty(field.ty),
4951
};
52+
53+
// Fields may be decorated with either `#[binja(name = "...")]` or `#[binja(named)]`.
5054
let name = find_binja_attr(&field.attrs)?
5155
.map(|attr| match attr.kind {
5256
BinjaAttrKind::PointerWidth(_) => Err(attr.span.error(
@@ -61,9 +65,11 @@ impl AbstractField {
6165
}
6266
})
6367
.transpose()?;
68+
6469
Ok(Self { kind, ident, name })
6570
}
6671

72+
/// Transforms the `AbstractField` into a token stream that constructs a binja `Type` object
6773
fn resolved_ty(&self) -> TokenStream {
6874
let ty = self.kind.ty();
6975
let mut resolved = quote! { <#ty as ::binaryninja::types::AbstractType>::resolve_type() };
@@ -93,9 +99,19 @@ enum BinjaAttrKind {
9399
Named(Option<String>),
94100
}
95101

102+
/// Given a list of attributes, look for a `#[binja(...)]` attribute. At most one copy of the
103+
/// attribute is allowed to decorate an item (i.e. a type or field). If more than one copy is
104+
/// present, we throw an error.
105+
///
106+
/// Three properties are supported, and for any given item they are mutually exclusive:
107+
/// - `pointer_width`: Expects an integer literal. Only allowed on types, not fields.
108+
/// - `name`: Expects a string literal. Only allowed on fields.
109+
/// - `named`: Must be a bare path. Only allowed on fields.
96110
fn find_binja_attr(attrs: &[Attribute]) -> Result<Option<BinjaAttr>> {
111+
// Use a `OnceCell` to assert that we only allow a single `#[binja(...)]` attribute per-item.
97112
let binja_attr = OnceCell::new();
98113

114+
// Wrapper function for setting the value of the `OnceCell` above.
99115
let set_attr = |attr: BinjaAttr| {
100116
let span = attr.span;
101117
binja_attr
@@ -111,6 +127,7 @@ fn find_binja_attr(attrs: &[Attribute]) -> Result<Option<BinjaAttr>> {
111127
let meta = attr.parse_args::<Meta>()?;
112128
let meta_ident = meta.path().require_ident()?;
113129
if meta_ident == "pointer_width" {
130+
// #[binja(pointer_width = <int>)]
114131
let value = &meta.require_name_value()?.value;
115132
if let Expr::Lit(expr) = &value {
116133
if let Lit::Int(val) = &expr.lit {
@@ -123,6 +140,7 @@ fn find_binja_attr(attrs: &[Attribute]) -> Result<Option<BinjaAttr>> {
123140
}
124141
return Err(value.span().error("expected integer literal"));
125142
} else if meta_ident == "name" {
143+
// #[binja(name = "...")]
126144
let value = &meta.require_name_value()?.value;
127145
if let Expr::Lit(expr) = &value {
128146
if let Lit::Str(lit) = &expr.lit {
@@ -135,6 +153,7 @@ fn find_binja_attr(attrs: &[Attribute]) -> Result<Option<BinjaAttr>> {
135153
}
136154
return Err(value.span().error(r#"expected string literal"#));
137155
} else if meta_ident == "named" {
156+
// #[binja(named)]
138157
meta.require_path_only()?;
139158
set_attr(BinjaAttr {
140159
kind: BinjaAttrKind::Named(None),
@@ -150,6 +169,7 @@ fn find_binja_attr(attrs: &[Attribute]) -> Result<Option<BinjaAttr>> {
150169
Ok(binja_attr.into_inner())
151170
}
152171

172+
/// Struct representing the contents of all `#[repr(...)]` attributes decorating a type.
153173
struct Repr {
154174
c: bool,
155175
packed: Option<Option<LitInt>>,
@@ -158,6 +178,8 @@ struct Repr {
158178
}
159179

160180
impl Repr {
181+
/// Scan through a list of attributes and finds every instance of a `#[repr(...)]` attribute,
182+
/// then initialize `Self` based off the collective contents of those attributes.
161183
fn from_attrs(attrs: &[Attribute]) -> Result<Self> {
162184
let mut c = false;
163185
let mut packed = None;
@@ -213,23 +235,34 @@ fn ident_in_list<const N: usize>(ident: &Ident, list: [&'static str; N]) -> bool
213235
list.iter().any(|id| ident == id)
214236
}
215237

238+
/// Entry point to the proc-macro.
216239
#[proc_macro_derive(AbstractType, attributes(binja))]
217240
pub fn abstract_type_derive(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
218241
let input = parse_macro_input!(input as DeriveInput);
242+
// Transforming the error diagnostic into tokens for emission allows the business logic to
243+
// return `Result` and make use of the `?` operator like any normal Rust program
219244
match impl_abstract_type(input) {
220245
Ok(tokens) => tokens.into(),
221246
Err(diag) => diag.emit_as_item_tokens().into(),
222247
}
223248
}
224249

250+
/// Main business logic of the macro. Parses any relevant attributes decorating the type, then
251+
/// defers execution based on the kind of type: struct, enum, or union.
225252
fn impl_abstract_type(ast: DeriveInput) -> Result<TokenStream> {
226253
let repr = Repr::from_attrs(&ast.attrs)?;
227254
let width = find_binja_attr(&ast.attrs)?
228255
.map(|attr| match attr.kind {
229-
BinjaAttrKind::PointerWidth(width) => Ok(width),
256+
BinjaAttrKind::PointerWidth(width) => {
257+
if let Data::Enum(_) = ast.data {
258+
Err(attr.span.error("`#[binja(pointer_width)]` is only supported on structs and unions, not enums"))
259+
} else {
260+
Ok(width)
261+
}
262+
}
230263
BinjaAttrKind::Named(Some(_)) => Err(attr
231264
.span
232-
.error(r#"`#[binja(name = "...")] is only supported on fields"#)),
265+
.error(r#"`#[binja(name)] is only supported on fields"#)),
233266
BinjaAttrKind::Named(None) => Err(attr
234267
.span
235268
.error("`#[binja(named)]` is only supported on fields")),
@@ -240,23 +273,23 @@ fn impl_abstract_type(ast: DeriveInput) -> Result<TokenStream> {
240273
return Err(ast.generics.span().error("type must not be generic"));
241274
}
242275

243-
let ident = ast.ident;
244276
match ast.data {
245277
Data::Struct(s) => match s.fields {
246278
Fields::Named(fields) => {
247-
impl_abstract_structure_type(ident, fields, repr, width, StructureKind::Struct)
279+
impl_abstract_structure_type(ast.ident, fields, repr, width, StructureKind::Struct)
248280
}
249-
Fields::Unnamed(_) => Err(s
250-
.fields
251-
.span()
252-
.error("tuple structs are unsupported; struct must have named fields")),
253-
Fields::Unit => Err(ident
254-
.span()
255-
.error("unit structs are unsupported; provide at least one named field")),
281+
Fields::Unnamed(_) => Err(s.fields.span().error(
282+
"tuple structs are unsupported; \
283+
struct must have named fields",
284+
)),
285+
Fields::Unit => Err(ast.ident.span().error(
286+
"unit structs are unsupported; \
287+
provide at least one named field",
288+
)),
256289
},
257-
Data::Enum(e) => impl_abstract_enum_type(ident, e.variants, repr),
290+
Data::Enum(e) => impl_abstract_enum_type(ast.ident, e.variants, repr),
258291
Data::Union(u) => {
259-
impl_abstract_structure_type(ident, u.fields, repr, width, StructureKind::Union)
292+
impl_abstract_structure_type(ast.ident, u.fields, repr, width, StructureKind::Union)
260293
}
261294
}
262295
}
@@ -266,6 +299,70 @@ enum StructureKind {
266299
Union,
267300
}
268301

302+
/// Implements the `AbstractType` trait for either a struct or union, based on the value of `kind`.
303+
///
304+
/// Unlike C-style enums, structs and unions can contain other types within them that affect their
305+
/// size and alignment. For example, the size of a struct is at least the sum of the sizes of its
306+
/// fields (plus any padding), and its alignment is equal to that of the most-aligned field.
307+
/// Likewise, a union's size is at least that of its largest field.
308+
///
309+
/// Normally this would be fine, because the compiler can give you size and alignment information
310+
/// using `std::mem::{size_of, align_of}`. However, the `#[binja(pointer_width)]` attribute allows
311+
/// users to change the width of pointer fields to be different in Binja compared to the host CPU
312+
/// architecture, meaning the value calculated by the compiler will be wrong in that case. What's
313+
/// worse, is that a pointer field with custom width not only affects the size/alignment of its
314+
/// parent struct, but anything that contains *that* struct, and so on up the tree.
315+
///
316+
/// So, we need a way to propagate the modified layout information at compile-time. To accomplish
317+
/// this, we use the `AbstractType::LAYOUT` associated constant, which by default matches the
318+
/// layout of the struct as calculated by the compiler, but which can be swapped out for any other
319+
/// valid `std::alloc::Layout` object when implementing the `AbstractType` trait. We then create a
320+
/// mock-type with the desired custom layout and use that for propagation.
321+
///
322+
/// In order to mock a type, we make use of the following construction:
323+
///
324+
/// ```ignore
325+
/// #[repr(C)]
326+
/// struct Mock<const SIZE: usize, const ALIGN: usize>
327+
/// where:
328+
/// elain::Align<ALIGN>: elain::Alignment,
329+
/// {
330+
/// t: [u8; SIZE],
331+
/// _align: elain::Align<ALIGN>
332+
/// }
333+
/// ```
334+
///
335+
/// The `elain::Align` type is a zero-size type with a const-generic parameter specifying its
336+
/// alignment. The trait bound serves to restrict the possible values of `ALIGN` to only those
337+
/// valid for specifying alignment (powers of two). Additionally, we know that `[u8; SIZE]` is
338+
/// always of size `SIZE`, and alignment 1. Therefore, the `Mock` type is guaranteed to be of size
339+
/// `SIZE` and alignment equal to `ALIGN`.
340+
///
341+
/// This constructed `Mock` type allows us to generate a struct with arbitrary layout, which we can
342+
/// use to mimic the layout of another struct:
343+
///
344+
/// ```ignore
345+
/// #[derive(AbstractType)]
346+
/// #[repr(C)]
347+
/// struct S {
348+
/// first: u8,
349+
/// second: u16,
350+
/// third: u64,
351+
/// }
352+
///
353+
/// // Identical layout to `S` above
354+
/// #[repr(C)]
355+
/// struct __S_layout {
356+
/// first: Mock<1, 1>,
357+
/// second: Mock<2, 2>,
358+
/// third: Mock<8, 8>,
359+
/// }
360+
/// ```
361+
///
362+
/// Then, we can propagate any changes in the layout of `S` (due to custom pointer widths) by
363+
/// setting the `S::LAYOUT` constant equal to `alloc::Layout<__S_layout>` rather than the default
364+
/// value of `alloc::Layout<S>`. Then, when mocking fields of type `S`, we use `S::LAYOUT.size()`
365+
/// and `S::LAYOUT.align()` for the const-generic parameters of `Mock`, instead of just integers.
269366
fn impl_abstract_structure_type(
270367
name: Ident,
271368
fields: FieldsNamed,
@@ -286,17 +383,41 @@ fn impl_abstract_structure_type(
286383
.into_iter()
287384
.map(|field| AbstractField::from_field(field, &name, pointer_width))
288385
.collect::<Result<Vec<_>>>()?;
386+
387+
// Generate the arguments to `StructureBuilder::insert`. Luckily `mem::offset_of!` was stabilized in
388+
// Rust 1.77 or otherwise this would be a lot more complicated.
289389
let layout_name = format_ident!("__{name}_layout");
390+
let args = abstract_fields
391+
.iter()
392+
.map(|field| {
393+
let ident = &field.ident;
394+
let resolved_ty = field.resolved_ty();
395+
quote! {
396+
&#resolved_ty,
397+
stringify!(#ident),
398+
::std::mem::offset_of!(#layout_name, #ident) as u64,
399+
false,
400+
::binaryninja::types::MemberAccess::NoAccess,
401+
::binaryninja::types::MemberScope::NoScope,
402+
}
403+
})
404+
.collect::<Vec<_>>();
405+
406+
// Calculate size and alignment for each field - these may differ from the compiler's
407+
// calculated values so we use the construction discussed above to mock/propagate them.
290408
let field_wrapper = format_ident!("__{name}_field_wrapper");
291409
let layout_fields = abstract_fields
292410
.iter()
293411
.map(|field| {
294412
let ident = &field.ident;
295413
let (size, align) = match &field.kind {
414+
// Since pointers can be of arbitrary size as specified by the user, we manually
415+
// calculate size/alignment for them.
296416
FieldKind::Ptr(_, width) => {
297417
let align = width.next_power_of_two();
298418
(quote! { #width }, quote! { #align })
299419
}
420+
// All other types defer to the value of Self::LAYOUT
300421
FieldKind::Ty(ty) => (
301422
quote! { { <#ty as ::binaryninja::types::AbstractType>::LAYOUT.size() } },
302423
quote! { { <#ty as ::binaryninja::types::AbstractType>::LAYOUT.align() } },
@@ -305,21 +426,9 @@ fn impl_abstract_structure_type(
305426
quote! { #ident: #field_wrapper<#size, #align> }
306427
})
307428
.collect::<Vec<_>>();
308-
let args = abstract_fields
309-
.iter()
310-
.map(|field| {
311-
let ident = &field.ident;
312-
let resolved_ty = field.resolved_ty();
313-
quote! {
314-
&#resolved_ty,
315-
stringify!(#ident),
316-
::std::mem::offset_of!(#layout_name, #ident) as u64,
317-
false,
318-
::binaryninja::types::MemberAccess::NoAccess,
319-
::binaryninja::types::MemberScope::NoScope,
320-
}
321-
})
322-
.collect::<Vec<_>>();
429+
430+
// If the struct/union is marked `#[repr(packed)]` or `#[repr(align(...))]`, we decorate the
431+
// mocked layout type with those as well
323432
let is_packed = repr.packed.is_some();
324433
let packed = repr.packed.map(|size| match size {
325434
Some(n) => quote! { #[repr(packed(#n))] },
@@ -334,17 +443,18 @@ fn impl_abstract_structure_type(
334443
)
335444
})
336445
.unzip();
446+
447+
// Distinguish between structs and unions
337448
let (kind, set_union) = match kind {
338449
StructureKind::Struct => (quote! { struct }, None),
339450
StructureKind::Union => (
340451
quote! { union },
341452
Some(quote! {
342-
.set_structure_type(
343-
::binaryninja::types::StructureType::UnionStructureType
344-
)
453+
.set_structure_type(::binaryninja::types::StructureType::UnionStructureType)
345454
}),
346455
),
347456
};
457+
348458
Ok(quote! {
349459
#[repr(C)]
350460
#[derive(Copy, Clone)]
@@ -380,6 +490,7 @@ fn impl_abstract_structure_type(
380490
})
381491
}
382492

493+
/// Implements the `AbstractType` trait for an enum.
383494
fn impl_abstract_enum_type(
384495
name: Ident,
385496
variants: impl IntoIterator<Item = Variant>,
@@ -400,6 +511,9 @@ fn impl_abstract_enum_type(
400511
.span()
401512
.error("must provide a primitive `repr` type, e.g. `u32`"));
402513
};
514+
515+
// Extract the variant names and the value of their discriminants. Variants must not hold any
516+
// nested data (in other words, they must be simple C-style identifiers).
403517
let variants = variants
404518
.into_iter()
405519
.map(|variant| {
@@ -415,6 +529,7 @@ fn impl_abstract_enum_type(
415529
Ok(quote! { stringify!(#ident), #discriminant as u64 })
416530
})
417531
.collect::<Result<Vec<_>>>()?;
532+
418533
Ok(quote! {
419534
impl ::binaryninja::types::AbstractType for #name {
420535
fn resolve_type() -> ::binaryninja::rc::Ref<::binaryninja::types::Type> {

0 commit comments

Comments
 (0)