From 466fe3d9bdb98467a13bd8f291ced4cb94bd79ab Mon Sep 17 00:00:00 2001 From: Michael Krasnitski Date: Thu, 9 May 2024 21:07:01 -0400 Subject: [PATCH 01/16] Add `AbstractType` trait --- rust/src/types.rs | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/rust/src/types.rs b/rust/src/types.rs index b42c7039c..1f80f1b6c 100644 --- a/rust/src/types.rs +++ b/rust/src/types.rs @@ -695,6 +695,41 @@ impl Drop for TypeBuilder { ////////// // Type +pub trait AbstractType { + fn resolve_type() -> Ref; +} + +macro_rules! abstract_type { + ($($t:ty => $e:expr),+) => { + $( + impl AbstractType for $t { + fn resolve_type() -> Ref { + $e + } + } + )+ + } +} + +abstract_type! { + u8 => Type::int(1, false), + u16 => Type::int(2, false), + u32 => Type::int(4, false), + u64 => Type::int(8, false), + i8 => Type::int(1, true), + i16 => Type::int(2, true), + i32 => Type::int(4, true), + i64 => Type::int(8, true), + f32 => Type::float(4), + f64 => Type::float(8) +} + +impl AbstractType for [T; N] { + fn resolve_type() -> Ref { + Type::array(&T::resolve_type(), N as u64) + } +} + #[repr(transparent)] pub struct Type { pub(crate) handle: *mut BNType, From 2b16967b6e184c68e24a15aad555423d73ff2b2a Mon Sep 17 00:00:00 2001 From: Michael Krasnitski Date: Mon, 1 Apr 2024 00:33:04 -0400 Subject: [PATCH 02/16] Add `AbstractType` derive macro --- rust/Cargo.lock | 23 +++++++ rust/Cargo.toml | 1 + rust/binaryninja-derive/Cargo.toml | 13 ++++ rust/binaryninja-derive/src/lib.rs | 106 +++++++++++++++++++++++++++++ rust/src/types.rs | 1 + 5 files changed, 144 insertions(+) create mode 100644 rust/binaryninja-derive/Cargo.toml create mode 100644 rust/binaryninja-derive/src/lib.rs diff --git a/rust/Cargo.lock b/rust/Cargo.lock index dfccee5a2..c07693207 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -100,6 +100,7 @@ dependencies = [ name = "binaryninja" version = "0.1.0" dependencies = [ + "binaryninja-derive", "binaryninjacore-sys", "lazy_static", "libc", @@ -107,6 +108,16 @@ dependencies = [ "rayon", ] +[[package]] +name = "binaryninja-derive" +version = "0.1.0" +dependencies = [ + "proc-macro2", + "proc-macro2-diagnostics", + "quote", + "syn 2.0.52", +] + [[package]] name = "binaryninjacore-sys" version = "0.1.0" @@ -744,6 +755,18 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "proc-macro2-diagnostics" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.52", + "version_check", +] + [[package]] name = "quote" version = "1.0.35" diff --git a/rust/Cargo.toml b/rust/Cargo.toml index b827b9bb5..dc6a51100 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -13,6 +13,7 @@ log = "0.4" libc = "0.2" rayon = { version = "1.8", optional = true } binaryninjacore-sys = { path = "binaryninjacore-sys" } +binaryninja-derive = { path = "binaryninja-derive" } [patch.crates-io] # Patched pdb crate to implement some extra structures diff --git a/rust/binaryninja-derive/Cargo.toml b/rust/binaryninja-derive/Cargo.toml new file mode 100644 index 000000000..7e4f72349 --- /dev/null +++ b/rust/binaryninja-derive/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "binaryninja-derive" +version = "0.1.0" +edition = "2021" + +[dependencies] +syn = "2.0" +quote = "1" +proc-macro2 = "1.0" +proc-macro2-diagnostics = { version = "0.10", default-features = false } + +[lib] +proc-macro = true diff --git a/rust/binaryninja-derive/src/lib.rs b/rust/binaryninja-derive/src/lib.rs new file mode 100644 index 000000000..59c1697a5 --- /dev/null +++ b/rust/binaryninja-derive/src/lib.rs @@ -0,0 +1,106 @@ +use proc_macro2::TokenStream; +use proc_macro2_diagnostics::{Diagnostic, SpanDiagnosticExt}; +use quote::quote; +use syn::spanned::Spanned; +use syn::{parse_macro_input, Data, DeriveInput, Fields, FieldsNamed, Ident}; + +type Result = std::result::Result; + +#[proc_macro_derive(AbstractType)] +pub fn abstract_type_derive(input: proc_macro::TokenStream) -> proc_macro::TokenStream { + let input = parse_macro_input!(input as DeriveInput); + match impl_abstract_type(input) { + Ok(tokens) => tokens.into(), + Err(diag) => diag.emit_as_item_tokens().into(), + } +} + +fn impl_abstract_type(ast: DeriveInput) -> Result { + let mut repr_c = false; + for attr in ast.attrs { + if attr.path().is_ident("repr") { + let _ = attr.parse_nested_meta(|meta| { + if meta.path.is_ident("c") { + repr_c = true; + } + Ok(()) + }); + } + } + + if !repr_c { + return Err(ast.ident.span().error("type must be `repr(C)`")); + } + + if !ast.generics.params.is_empty() { + return Err(ast.generics.span().error("type must not be generic")); + } + + let ident = ast.ident; + match ast.data { + Data::Struct(s) => match s.fields { + Fields::Named(fields) => Ok(impl_abstract_struct_type(ident, fields)), + Fields::Unnamed(_) => Err(s + .fields + .span() + .error("tuple structs are unsupported; struct must have named fields")), + Fields::Unit => Err(ident + .span() + .error("unit structs are unsupported; provide at least one named field")), + }, + Data::Enum(_) => todo!(), + Data::Union(u) => Ok(impl_abstract_union_type(ident, u.fields)), + } +} + +fn field_arguments(name: &Ident, fields: FieldsNamed) -> Vec { + fields + .named + .iter() + .map(|field| { + let ident = field.ident.as_ref().unwrap(); + let ty = &field.ty; + quote! { + &<#ty as ::binaryninja::types::AbstractType>::resolve_type(), + stringify!(#ident), + ::std::mem::offset_of!(#name, #ident) as u64, + false, + ::binaryninja::types::MemberAccess::NoAccess, + ::binaryninja::types::MemberScope::NoScope, + } + }) + .collect() +} + +fn impl_abstract_struct_type(name: Ident, fields: FieldsNamed) -> TokenStream { + let args = field_arguments(&name, fields); + quote! { + impl ::binaryninja::types::AbstractType for #name { + fn resolve_type() -> ::binaryninja::rc::Ref<::binaryninja::types::Type> { + ::binaryninja::types::Type::structure( + &::binaryninja::types::Structure::builder() + #(.insert(#args))* + .finalize() + ) + } + } + } +} + +fn impl_abstract_union_type(name: Ident, fields: FieldsNamed) -> TokenStream { + let args = field_arguments(&name, fields); + quote! { + impl ::binaryninja::types::AbstractType for #name { + fn resolve_type() -> ::binaryninja::rc::Ref<::binaryninja::types::Type> { + ::binaryninja::types::Type::structure( + &::binaryninja::types::Structure::builder() + #(.insert(#args))* + .set_structure_type( + ::binaryninja::types::StructureType::UnionStructureType + ) + .finalize() + ) + } + } + } +} diff --git a/rust/src/types.rs b/rust/src/types.rs index 1f80f1b6c..66dc88c21 100644 --- a/rust/src/types.rs +++ b/rust/src/types.rs @@ -695,6 +695,7 @@ impl Drop for TypeBuilder { ////////// // Type +pub use binaryninja_derive::*; pub trait AbstractType { fn resolve_type() -> Ref; } From cf77726bccfd6778a1aa9d9e1ff5b12266758de0 Mon Sep 17 00:00:00 2001 From: Michael Krasnitski Date: Mon, 1 Apr 2024 01:57:23 -0400 Subject: [PATCH 03/16] Support deriving `AbstractType` on enums --- rust/binaryninja-derive/src/lib.rs | 127 +++++++++++++++++++++++------ 1 file changed, 103 insertions(+), 24 deletions(-) diff --git a/rust/binaryninja-derive/src/lib.rs b/rust/binaryninja-derive/src/lib.rs index 59c1697a5..4fde8fb72 100644 --- a/rust/binaryninja-derive/src/lib.rs +++ b/rust/binaryninja-derive/src/lib.rs @@ -2,10 +2,51 @@ use proc_macro2::TokenStream; use proc_macro2_diagnostics::{Diagnostic, SpanDiagnosticExt}; use quote::quote; use syn::spanned::Spanned; -use syn::{parse_macro_input, Data, DeriveInput, Fields, FieldsNamed, Ident}; +use syn::{ + parse_macro_input, Attribute, Data, DeriveInput, Fields, FieldsNamed, Ident, Path, Variant, +}; type Result = std::result::Result; +struct Repr { + c: bool, + primitive: Option<(Path, bool)>, +} + +impl Repr { + fn from_attrs(attrs: Vec) -> Result { + let mut c = false; + let mut primitive = None; + for attr in attrs { + if attr.path().is_ident("repr") { + attr.parse_nested_meta(|meta| { + if let Some(ident) = meta.path.get_ident() { + if ident == "C" { + c = true; + } else if ident_in_list(ident, ["u8", "u16", "u32", "u64"]) { + primitive = Some((meta.path.clone(), false)); + } else if ident_in_list(ident, ["i8", "i16", "i32", "i64"]) { + primitive = Some((meta.path.clone(), true)); + } else if ident_in_list(ident, ["usize", "isize", "u128", "i128"]) { + return Err(ident + .span() + .error(format!("`repr({ident})` types are not supported")) + .into()); + } + } + Ok(()) + })?; + } + } + + Ok(Self { c, primitive }) + } +} + +fn ident_in_list(ident: &Ident, list: [&'static str; N]) -> bool { + list.iter().any(|id| ident == id) +} + #[proc_macro_derive(AbstractType)] pub fn abstract_type_derive(input: proc_macro::TokenStream) -> proc_macro::TokenStream { let input = parse_macro_input!(input as DeriveInput); @@ -16,21 +57,7 @@ pub fn abstract_type_derive(input: proc_macro::TokenStream) -> proc_macro::Token } fn impl_abstract_type(ast: DeriveInput) -> Result { - let mut repr_c = false; - for attr in ast.attrs { - if attr.path().is_ident("repr") { - let _ = attr.parse_nested_meta(|meta| { - if meta.path.is_ident("c") { - repr_c = true; - } - Ok(()) - }); - } - } - - if !repr_c { - return Err(ast.ident.span().error("type must be `repr(C)`")); - } + let repr = Repr::from_attrs(ast.attrs)?; if !ast.generics.params.is_empty() { return Err(ast.generics.span().error("type must not be generic")); @@ -39,7 +66,7 @@ fn impl_abstract_type(ast: DeriveInput) -> Result { let ident = ast.ident; match ast.data { Data::Struct(s) => match s.fields { - Fields::Named(fields) => Ok(impl_abstract_struct_type(ident, fields)), + Fields::Named(fields) => impl_abstract_struct_type(ident, fields, repr), Fields::Unnamed(_) => Err(s .fields .span() @@ -48,8 +75,8 @@ fn impl_abstract_type(ast: DeriveInput) -> Result { .span() .error("unit structs are unsupported; provide at least one named field")), }, - Data::Enum(_) => todo!(), - Data::Union(u) => Ok(impl_abstract_union_type(ident, u.fields)), + Data::Enum(e) => impl_abstract_enum_type(ident, e.variants, repr), + Data::Union(u) => impl_abstract_union_type(ident, u.fields, repr), } } @@ -72,9 +99,13 @@ fn field_arguments(name: &Ident, fields: FieldsNamed) -> Vec { .collect() } -fn impl_abstract_struct_type(name: Ident, fields: FieldsNamed) -> TokenStream { +fn impl_abstract_struct_type(name: Ident, fields: FieldsNamed, repr: Repr) -> Result { + if !repr.c { + return Err(name.span().error("struct must be `repr(C)`")); + } + let args = field_arguments(&name, fields); - quote! { + Ok(quote! { impl ::binaryninja::types::AbstractType for #name { fn resolve_type() -> ::binaryninja::rc::Ref<::binaryninja::types::Type> { ::binaryninja::types::Type::structure( @@ -84,12 +115,16 @@ fn impl_abstract_struct_type(name: Ident, fields: FieldsNamed) -> TokenStream { ) } } - } + }) } -fn impl_abstract_union_type(name: Ident, fields: FieldsNamed) -> TokenStream { +fn impl_abstract_union_type(name: Ident, fields: FieldsNamed, repr: Repr) -> Result { + if !repr.c { + return Err(name.span().error("union must be `repr(C)`")); + } + let args = field_arguments(&name, fields); - quote! { + Ok(quote! { impl ::binaryninja::types::AbstractType for #name { fn resolve_type() -> ::binaryninja::rc::Ref<::binaryninja::types::Type> { ::binaryninja::types::Type::structure( @@ -102,5 +137,49 @@ fn impl_abstract_union_type(name: Ident, fields: FieldsNamed) -> TokenStream { ) } } + }) +} + +fn impl_abstract_enum_type( + name: Ident, + variants: impl IntoIterator, + repr: Repr, +) -> Result { + if repr.c { + return Err(name.span().error("`repr(C)` enums are not supported")); } + + let Some((primitive, signed)) = repr.primitive else { + return Err(name + .span() + .error("must provide a primitive `repr` type, e.g. `u32`")); + }; + let variants = variants + .into_iter() + .map(|variant| { + if !variant.fields.is_empty() { + return Err(variant.span().error("variant must not have any fields")); + } + let Some((_, discriminant)) = variant.discriminant else { + return Err(variant + .span() + .error("variant must have an explicit discriminant")); + }; + let ident = variant.ident; + Ok(quote! { stringify!(#ident), #discriminant as u64 }) + }) + .collect::>>()?; + Ok(quote! { + impl ::binaryninja::types::AbstractType for #name { + fn resolve_type() -> ::binaryninja::rc::Ref<::binaryninja::types::Type> { + ::binaryninja::types::Type::enumeration( + &::binaryninja::types::Enumeration::builder() + #(.insert(#variants))* + .finalize(), + ::std::mem::size_of::<#primitive>(), + #signed + ) + } + } + }) } From 1c370ff55305ffb0e863415a6618abda2326120f Mon Sep 17 00:00:00 2001 From: Michael Krasnitski Date: Mon, 1 Apr 2024 01:31:50 -0400 Subject: [PATCH 04/16] Support `repr(packed)` and `repr(align)` on structs/unions --- rust/binaryninja-derive/src/lib.rs | 42 ++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/rust/binaryninja-derive/src/lib.rs b/rust/binaryninja-derive/src/lib.rs index 4fde8fb72..43e5d2902 100644 --- a/rust/binaryninja-derive/src/lib.rs +++ b/rust/binaryninja-derive/src/lib.rs @@ -3,19 +3,24 @@ use proc_macro2_diagnostics::{Diagnostic, SpanDiagnosticExt}; use quote::quote; use syn::spanned::Spanned; use syn::{ - parse_macro_input, Attribute, Data, DeriveInput, Fields, FieldsNamed, Ident, Path, Variant, + parenthesized, parse_macro_input, token, Attribute, Data, DeriveInput, Fields, FieldsNamed, + Ident, LitInt, Path, Variant, }; type Result = std::result::Result; struct Repr { c: bool, + packed: Option, + align: Option, primitive: Option<(Path, bool)>, } impl Repr { fn from_attrs(attrs: Vec) -> Result { let mut c = false; + let mut packed = None; + let mut align = None; let mut primitive = None; for attr in attrs { if attr.path().is_ident("repr") { @@ -23,6 +28,18 @@ impl Repr { if let Some(ident) = meta.path.get_ident() { if ident == "C" { c = true; + } else if ident == "packed" { + if meta.input.peek(token::Paren) { + let content; + parenthesized!(content in meta.input); + packed = Some(content.parse::()?.base10_parse()?); + } else { + packed = Some(1); + } + } else if ident == "align" { + let content; + parenthesized!(content in meta.input); + align = Some(content.parse::()?.base10_parse()?); } else if ident_in_list(ident, ["u8", "u16", "u32", "u64"]) { primitive = Some((meta.path.clone(), false)); } else if ident_in_list(ident, ["i8", "i16", "i32", "i64"]) { @@ -39,7 +56,12 @@ impl Repr { } } - Ok(Self { c, primitive }) + Ok(Self { + c, + packed, + align, + primitive, + }) } } @@ -105,12 +127,17 @@ fn impl_abstract_struct_type(name: Ident, fields: FieldsNamed, repr: Repr) -> Re } let args = field_arguments(&name, fields); + let packed = repr.packed.is_some(); + let alignment = repr.align.map(|align| quote! { .set_alignment(#align) }); Ok(quote! { impl ::binaryninja::types::AbstractType for #name { fn resolve_type() -> ::binaryninja::rc::Ref<::binaryninja::types::Type> { ::binaryninja::types::Type::structure( &::binaryninja::types::Structure::builder() #(.insert(#args))* + .set_width(::std::mem::size_of::<#name>() as u64) + .set_packed(#packed) + #alignment .finalize() ) } @@ -124,6 +151,8 @@ fn impl_abstract_union_type(name: Ident, fields: FieldsNamed, repr: Repr) -> Res } let args = field_arguments(&name, fields); + let packed = repr.packed.is_some(); + let alignment = repr.align.map(|align| quote! { .set_alignment(#align) }); Ok(quote! { impl ::binaryninja::types::AbstractType for #name { fn resolve_type() -> ::binaryninja::rc::Ref<::binaryninja::types::Type> { @@ -133,6 +162,9 @@ fn impl_abstract_union_type(name: Ident, fields: FieldsNamed, repr: Repr) -> Res .set_structure_type( ::binaryninja::types::StructureType::UnionStructureType ) + .set_width(::std::mem::size_of::<#name>() as u64) + .set_packed(#packed) + #alignment .finalize() ) } @@ -148,6 +180,12 @@ fn impl_abstract_enum_type( if repr.c { return Err(name.span().error("`repr(C)` enums are not supported")); } + if repr.align.is_some() { + // No way to set custom alignment for enums in Binja + return Err(name + .span() + .error("`repr(align(...))` on enums is not supported")); + } let Some((primitive, signed)) = repr.primitive else { return Err(name From e39b7d12472bc5e9ca05ee45d500686592451f66 Mon Sep 17 00:00:00 2001 From: Michael Krasnitski Date: Mon, 1 Apr 2024 01:53:40 -0400 Subject: [PATCH 05/16] Allow named types using a `#[named]` field attribute --- rust/binaryninja-derive/src/lib.rs | 34 ++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/rust/binaryninja-derive/src/lib.rs b/rust/binaryninja-derive/src/lib.rs index 43e5d2902..a39e0ad9e 100644 --- a/rust/binaryninja-derive/src/lib.rs +++ b/rust/binaryninja-derive/src/lib.rs @@ -3,8 +3,8 @@ use proc_macro2_diagnostics::{Diagnostic, SpanDiagnosticExt}; use quote::quote; use syn::spanned::Spanned; use syn::{ - parenthesized, parse_macro_input, token, Attribute, Data, DeriveInput, Fields, FieldsNamed, - Ident, LitInt, Path, Variant, + parenthesized, parse_macro_input, token, Attribute, Data, DeriveInput, Field, Fields, + FieldsNamed, Ident, LitInt, Path, Variant, }; type Result = std::result::Result; @@ -23,7 +23,14 @@ impl Repr { let mut align = None; let mut primitive = None; for attr in attrs { - if attr.path().is_ident("repr") { + let Some(ident) = attr.path().get_ident() else { + continue; + }; + if ident == "named" { + return Err(attr + .span() + .error("`#[named]` attribute can only be applied to fields")); + } else if ident == "repr" { attr.parse_nested_meta(|meta| { if let Some(ident) = meta.path.get_ident() { if ident == "C" { @@ -69,7 +76,7 @@ fn ident_in_list(ident: &Ident, list: [&'static str; N]) -> bool list.iter().any(|id| ident == id) } -#[proc_macro_derive(AbstractType)] +#[proc_macro_derive(AbstractType, attributes(named))] pub fn abstract_type_derive(input: proc_macro::TokenStream) -> proc_macro::TokenStream { let input = parse_macro_input!(input as DeriveInput); match impl_abstract_type(input) { @@ -102,15 +109,30 @@ fn impl_abstract_type(ast: DeriveInput) -> Result { } } +fn field_resolved_type(field: &Field) -> TokenStream { + let ty = &field.ty; + let resolved_ty = quote! { <#ty as ::binaryninja::types::AbstractType>::resolve_type() }; + if field.attrs.iter().any(|attr| attr.path().is_ident("named")) { + quote! { + ::binaryninja::types::Type::named_type_from_type( + stringify!(#ty), + &#resolved_ty + ) + } + } else { + resolved_ty + } +} + fn field_arguments(name: &Ident, fields: FieldsNamed) -> Vec { fields .named .iter() .map(|field| { let ident = field.ident.as_ref().unwrap(); - let ty = &field.ty; + let resolved_ty = field_resolved_type(field); quote! { - &<#ty as ::binaryninja::types::AbstractType>::resolve_type(), + &#resolved_ty, stringify!(#ident), ::std::mem::offset_of!(#name, #ident) as u64, false, From 85e144dac4df43d78a2a29519e2f880f4223b88e Mon Sep 17 00:00:00 2001 From: Michael Krasnitski Date: Wed, 3 Apr 2024 22:30:19 -0400 Subject: [PATCH 06/16] Support pointer fields (hardcoded to 64-bit width for now) --- rust/binaryninja-derive/src/lib.rs | 83 ++++++++++++++++++++++-------- 1 file changed, 61 insertions(+), 22 deletions(-) diff --git a/rust/binaryninja-derive/src/lib.rs b/rust/binaryninja-derive/src/lib.rs index a39e0ad9e..d3f0fbedc 100644 --- a/rust/binaryninja-derive/src/lib.rs +++ b/rust/binaryninja-derive/src/lib.rs @@ -4,11 +4,56 @@ use quote::quote; use syn::spanned::Spanned; use syn::{ parenthesized, parse_macro_input, token, Attribute, Data, DeriveInput, Field, Fields, - FieldsNamed, Ident, LitInt, Path, Variant, + FieldsNamed, Ident, LitInt, Path, Type, Variant, }; type Result = std::result::Result; +struct AbstractField { + ty: Type, + ident: Ident, + named: bool, + pointer: bool, +} + +impl AbstractField { + fn from_field(field: Field) -> Result { + let Some(ident) = field.ident else { + return Err(field.span().error("field must be named")); + }; + let named = field.attrs.iter().any(|attr| attr.path().is_ident("named")); + let (ty, pointer) = match field.ty { + Type::Ptr(ty) => (*ty.elem, true), + _ => (field.ty, false), + }; + Ok(Self { + ty, + ident, + named, + pointer, + }) + } + + fn resolved_ty(&self) -> TokenStream { + let ty = &self.ty; + let mut resolved = quote! { <#ty as ::binaryninja::types::AbstractType>::resolve_type() }; + if self.named { + resolved = quote! { + ::binaryninja::types::Type::named_type_from_type( + stringify!(#ty), + &#resolved + ) + }; + } + if self.pointer { + resolved = quote! { + ::binaryninja::types::Type::pointer_of_width(&#resolved, 8, false, false, None) + } + } + resolved + } +} + struct Repr { c: bool, packed: Option, @@ -109,28 +154,12 @@ fn impl_abstract_type(ast: DeriveInput) -> Result { } } -fn field_resolved_type(field: &Field) -> TokenStream { - let ty = &field.ty; - let resolved_ty = quote! { <#ty as ::binaryninja::types::AbstractType>::resolve_type() }; - if field.attrs.iter().any(|attr| attr.path().is_ident("named")) { - quote! { - ::binaryninja::types::Type::named_type_from_type( - stringify!(#ty), - &#resolved_ty - ) - } - } else { - resolved_ty - } -} - -fn field_arguments(name: &Ident, fields: FieldsNamed) -> Vec { +fn field_arguments(name: &Ident, fields: &[AbstractField]) -> Vec { fields - .named .iter() .map(|field| { - let ident = field.ident.as_ref().unwrap(); - let resolved_ty = field_resolved_type(field); + let ident = &field.ident; + let resolved_ty = field.resolved_ty(); quote! { &#resolved_ty, stringify!(#ident), @@ -148,7 +177,12 @@ fn impl_abstract_struct_type(name: Ident, fields: FieldsNamed, repr: Repr) -> Re return Err(name.span().error("struct must be `repr(C)`")); } - let args = field_arguments(&name, fields); + let fields = fields + .named + .into_iter() + .map(AbstractField::from_field) + .collect::>>()?; + let args = field_arguments(&name, &fields); let packed = repr.packed.is_some(); let alignment = repr.align.map(|align| quote! { .set_alignment(#align) }); Ok(quote! { @@ -172,7 +206,12 @@ fn impl_abstract_union_type(name: Ident, fields: FieldsNamed, repr: Repr) -> Res return Err(name.span().error("union must be `repr(C)`")); } - let args = field_arguments(&name, fields); + let fields = fields + .named + .into_iter() + .map(AbstractField::from_field) + .collect::>>()?; + let args = field_arguments(&name, &fields); let packed = repr.packed.is_some(); let alignment = repr.align.map(|align| quote! { .set_alignment(#align) }); Ok(quote! { From 707bdf05a0cb9c78fd466e8eb6a0895de5b32d85 Mon Sep 17 00:00:00 2001 From: Michael Krasnitski Date: Fri, 5 Apr 2024 19:38:03 -0400 Subject: [PATCH 07/16] Unify codegen for structs and unions --- rust/binaryninja-derive/src/lib.rs | 97 +++++++++++++----------------- 1 file changed, 43 insertions(+), 54 deletions(-) diff --git a/rust/binaryninja-derive/src/lib.rs b/rust/binaryninja-derive/src/lib.rs index d3f0fbedc..a10cac1e0 100644 --- a/rust/binaryninja-derive/src/lib.rs +++ b/rust/binaryninja-derive/src/lib.rs @@ -140,7 +140,9 @@ fn impl_abstract_type(ast: DeriveInput) -> Result { let ident = ast.ident; match ast.data { Data::Struct(s) => match s.fields { - Fields::Named(fields) => impl_abstract_struct_type(ident, fields, repr), + Fields::Named(fields) => { + impl_abstract_structure_type(ident, fields, repr, StructureKind::Struct) + } Fields::Unnamed(_) => Err(s .fields .span() @@ -150,12 +152,35 @@ fn impl_abstract_type(ast: DeriveInput) -> Result { .error("unit structs are unsupported; provide at least one named field")), }, Data::Enum(e) => impl_abstract_enum_type(ident, e.variants, repr), - Data::Union(u) => impl_abstract_union_type(ident, u.fields, repr), + Data::Union(u) => impl_abstract_structure_type(ident, u.fields, repr, StructureKind::Union), } } -fn field_arguments(name: &Ident, fields: &[AbstractField]) -> Vec { - fields +enum StructureKind { + Struct, + Union, +} + +fn impl_abstract_structure_type( + name: Ident, + fields: FieldsNamed, + repr: Repr, + kind: StructureKind, +) -> Result { + if !repr.c { + let msg = match kind { + StructureKind::Struct => "struct must be `repr(C)`", + StructureKind::Union => "union must be `repr(C)`", + }; + return Err(name.span().error(msg)); + } + + let fields = fields + .named + .into_iter() + .map(AbstractField::from_field) + .collect::>>()?; + let args = fields .iter() .map(|field| { let ident = &field.ident; @@ -169,63 +194,27 @@ fn field_arguments(name: &Ident, fields: &[AbstractField]) -> Vec { ::binaryninja::types::MemberScope::NoScope, } }) - .collect() -} - -fn impl_abstract_struct_type(name: Ident, fields: FieldsNamed, repr: Repr) -> Result { - if !repr.c { - return Err(name.span().error("struct must be `repr(C)`")); - } - - let fields = fields - .named - .into_iter() - .map(AbstractField::from_field) - .collect::>>()?; - let args = field_arguments(&name, &fields); - let packed = repr.packed.is_some(); - let alignment = repr.align.map(|align| quote! { .set_alignment(#align) }); - Ok(quote! { - impl ::binaryninja::types::AbstractType for #name { - fn resolve_type() -> ::binaryninja::rc::Ref<::binaryninja::types::Type> { - ::binaryninja::types::Type::structure( - &::binaryninja::types::Structure::builder() - #(.insert(#args))* - .set_width(::std::mem::size_of::<#name>() as u64) - .set_packed(#packed) - #alignment - .finalize() - ) - } - } - }) -} - -fn impl_abstract_union_type(name: Ident, fields: FieldsNamed, repr: Repr) -> Result { - if !repr.c { - return Err(name.span().error("union must be `repr(C)`")); - } - - let fields = fields - .named - .into_iter() - .map(AbstractField::from_field) - .collect::>>()?; - let args = field_arguments(&name, &fields); - let packed = repr.packed.is_some(); - let alignment = repr.align.map(|align| quote! { .set_alignment(#align) }); + .collect::>(); + let is_packed = repr.packed.is_some(); + let set_alignment = repr.align.map(|align| quote! { .set_alignment(#align) }); + let set_union = match kind { + StructureKind::Struct => None, + StructureKind::Union => Some(quote! { + .set_structure_type( + ::binaryninja::types::StructureType::UnionStructureType + ) + }), + }; Ok(quote! { impl ::binaryninja::types::AbstractType for #name { fn resolve_type() -> ::binaryninja::rc::Ref<::binaryninja::types::Type> { ::binaryninja::types::Type::structure( &::binaryninja::types::Structure::builder() #(.insert(#args))* - .set_structure_type( - ::binaryninja::types::StructureType::UnionStructureType - ) .set_width(::std::mem::size_of::<#name>() as u64) - .set_packed(#packed) - #alignment + .set_packed(#is_packed) + #set_alignment + #set_union .finalize() ) } From 797c3e84857bc3b93d9f9780d78672c01669ff18 Mon Sep 17 00:00:00 2001 From: Michael Krasnitski Date: Fri, 5 Apr 2024 19:47:40 -0400 Subject: [PATCH 08/16] Support pointers of other sizes using a `width` field attribute We accomplish this by generating alternative struct layouts with swapped out field types, and recursively calculating size and alignment using associated constants on the `AbstractType` trait --- rust/Cargo.lock | 7 ++ rust/Cargo.toml | 1 + rust/binaryninja-derive/src/lib.rs | 147 ++++++++++++++++++++++------- rust/src/lib.rs | 1 + rust/src/types.rs | 4 +- 5 files changed, 126 insertions(+), 34 deletions(-) diff --git a/rust/Cargo.lock b/rust/Cargo.lock index c07693207..9da1cd0e0 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -102,6 +102,7 @@ version = "0.1.0" dependencies = [ "binaryninja-derive", "binaryninjacore-sys", + "elain", "lazy_static", "libc", "log", @@ -358,6 +359,12 @@ version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" +[[package]] +name = "elain" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3522094fae7d65c8313a135fe45fa7e22ec2110c9d387063b66d235281f7f771" + [[package]] name = "encoding_rs" version = "0.8.33" diff --git a/rust/Cargo.toml b/rust/Cargo.toml index dc6a51100..b10134df8 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -14,6 +14,7 @@ libc = "0.2" rayon = { version = "1.8", optional = true } binaryninjacore-sys = { path = "binaryninjacore-sys" } binaryninja-derive = { path = "binaryninja-derive" } +elain = "0.3.0" [patch.crates-io] # Patched pdb crate to implement some extra structures diff --git a/rust/binaryninja-derive/src/lib.rs b/rust/binaryninja-derive/src/lib.rs index a10cac1e0..ea49d05a9 100644 --- a/rust/binaryninja-derive/src/lib.rs +++ b/rust/binaryninja-derive/src/lib.rs @@ -1,19 +1,19 @@ use proc_macro2::TokenStream; use proc_macro2_diagnostics::{Diagnostic, SpanDiagnosticExt}; -use quote::quote; +use quote::{format_ident, quote}; use syn::spanned::Spanned; use syn::{ - parenthesized, parse_macro_input, token, Attribute, Data, DeriveInput, Field, Fields, - FieldsNamed, Ident, LitInt, Path, Type, Variant, + parenthesized, parse_macro_input, token, Attribute, Data, DeriveInput, Expr, Field, Fields, + FieldsNamed, Ident, Lit, LitInt, Path, Type, Variant, }; type Result = std::result::Result; struct AbstractField { ty: Type, + width: Option, ident: Ident, named: bool, - pointer: bool, } impl AbstractField { @@ -22,16 +22,38 @@ impl AbstractField { return Err(field.span().error("field must be named")); }; let named = field.attrs.iter().any(|attr| attr.path().is_ident("named")); - let (ty, pointer) = match field.ty { - Type::Ptr(ty) => (*ty.elem, true), - _ => (field.ty, false), - }; - Ok(Self { - ty, - ident, - named, - pointer, - }) + let width = field + .attrs + .iter() + .find(|attr| attr.path().is_ident("width")); + if let Type::Ptr(ty) = field.ty { + if let Some(attr) = width { + if let Expr::Lit(expr) = &attr.meta.require_name_value()?.value { + if let Lit::Str(lit_str) = &expr.lit { + return Ok(Self { + ty: *ty.elem, + width: Some(lit_str.parse()?), + ident, + named, + }); + } + } + } + Err(ident.span() + .error("pointer field must have explicit `#[width = \"\"]` attribute, for example: `u64`")) + } else { + match width { + Some(attr) => Err(attr + .span() + .error("`#[width]` attribute can only be applied to pointer fields")), + None => Ok(Self { + ty: field.ty, + width: None, + ident, + named, + }), + } + } } fn resolved_ty(&self) -> TokenStream { @@ -45,9 +67,15 @@ impl AbstractField { ) }; } - if self.pointer { + if let Some(width) = &self.width { resolved = quote! { - ::binaryninja::types::Type::pointer_of_width(&#resolved, 8, false, false, None) + ::binaryninja::types::Type::pointer_of_width( + &#resolved, + ::std::mem::size_of::<#width>(), + false, + false, + None + ) } } resolved @@ -56,8 +84,8 @@ impl AbstractField { struct Repr { c: bool, - packed: Option, - align: Option, + packed: Option>, + align: Option, primitive: Option<(Path, bool)>, } @@ -75,6 +103,10 @@ impl Repr { return Err(attr .span() .error("`#[named]` attribute can only be applied to fields")); + } else if ident == "width" { + return Err(attr + .span() + .error("`#[width]` attribute can only be applied to pointer fields")); } else if ident == "repr" { attr.parse_nested_meta(|meta| { if let Some(ident) = meta.path.get_ident() { @@ -84,14 +116,14 @@ impl Repr { if meta.input.peek(token::Paren) { let content; parenthesized!(content in meta.input); - packed = Some(content.parse::()?.base10_parse()?); + packed = Some(Some(content.parse()?)); } else { - packed = Some(1); + packed = Some(None); } } else if ident == "align" { let content; parenthesized!(content in meta.input); - align = Some(content.parse::()?.base10_parse()?); + align = Some(content.parse()?); } else if ident_in_list(ident, ["u8", "u16", "u32", "u64"]) { primitive = Some((meta.path.clone(), false)); } else if ident_in_list(ident, ["i8", "i16", "i32", "i64"]) { @@ -121,7 +153,7 @@ fn ident_in_list(ident: &Ident, list: [&'static str; N]) -> bool list.iter().any(|id| ident == id) } -#[proc_macro_derive(AbstractType, attributes(named))] +#[proc_macro_derive(AbstractType, attributes(named, width))] pub fn abstract_type_derive(input: proc_macro::TokenStream) -> proc_macro::TokenStream { let input = parse_macro_input!(input as DeriveInput); match impl_abstract_type(input) { @@ -175,12 +207,27 @@ fn impl_abstract_structure_type( return Err(name.span().error(msg)); } - let fields = fields + let abstract_fields = fields .named .into_iter() .map(AbstractField::from_field) .collect::>>()?; - let args = fields + let layout_name = format_ident!("__{name}_layout"); + let field_wrapper = format_ident!("__{name}_field_wrapper"); + let layout_fields = abstract_fields + .iter() + .map(|field| { + let ident = &field.ident; + let layout_ty = field.width.as_ref().unwrap_or(&field.ty); + quote! { + #ident: #field_wrapper< + [u8; <#layout_ty as ::binaryninja::types::AbstractType>::SIZE], + { <#layout_ty as ::binaryninja::types::AbstractType>::ALIGN }, + > + } + }) + .collect::>(); + let args = abstract_fields .iter() .map(|field| { let ident = &field.ident; @@ -188,7 +235,7 @@ fn impl_abstract_structure_type( quote! { &#resolved_ty, stringify!(#ident), - ::std::mem::offset_of!(#name, #ident) as u64, + ::std::mem::offset_of!(#layout_name, #ident) as u64, false, ::binaryninja::types::MemberAccess::NoAccess, ::binaryninja::types::MemberScope::NoScope, @@ -196,22 +243,56 @@ fn impl_abstract_structure_type( }) .collect::>(); let is_packed = repr.packed.is_some(); - let set_alignment = repr.align.map(|align| quote! { .set_alignment(#align) }); - let set_union = match kind { - StructureKind::Struct => None, - StructureKind::Union => Some(quote! { - .set_structure_type( - ::binaryninja::types::StructureType::UnionStructureType + let packed = repr.packed.map(|size| match size { + Some(n) => quote! { #[repr(packed(#n))] }, + None => quote! { #[repr(packed)] }, + }); + let (align, set_alignment) = repr + .align + .map(|n| { + ( + quote! { #[repr(align(#n))] }, + quote! { .set_alignment(Self::ALIGN) }, ) - }), + }) + .unzip(); + let (kind, set_union) = match kind { + StructureKind::Struct => (quote! { struct }, None), + StructureKind::Union => ( + quote! { union }, + Some(quote! { + .set_structure_type( + ::binaryninja::types::StructureType::UnionStructureType + ) + }), + ), }; Ok(quote! { + #[repr(C)] + #[derive(Copy, Clone)] + struct #field_wrapper + where + ::binaryninja::elain::Align: ::binaryninja::elain::Alignment + { + t: T, + _align: ::binaryninja::elain::Align, + } + + #[repr(C)] + #packed + #align + #kind #layout_name { + #(#layout_fields),* + } + impl ::binaryninja::types::AbstractType for #name { + const SIZE: usize = ::std::mem::size_of::<#layout_name>(); + const ALIGN: usize = ::std::mem::align_of::<#layout_name>(); fn resolve_type() -> ::binaryninja::rc::Ref<::binaryninja::types::Type> { ::binaryninja::types::Type::structure( &::binaryninja::types::Structure::builder() #(.insert(#args))* - .set_width(::std::mem::size_of::<#name>() as u64) + .set_width(Self::SIZE as u64) .set_packed(#is_packed) #set_alignment #set_union diff --git a/rust/src/lib.rs b/rust/src/lib.rs index e24a3f784..db04a9e51 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -169,6 +169,7 @@ use std::path::PathBuf; pub use binaryninjacore_sys::BNBranchType as BranchType; pub use binaryninjacore_sys::BNEndianness as Endianness; use binaryview::BinaryView; +pub use elain; use metadata::Metadata; use metadata::MetadataType; use string::BnStrCompatible; diff --git a/rust/src/types.rs b/rust/src/types.rs index 66dc88c21..11bb83718 100644 --- a/rust/src/types.rs +++ b/rust/src/types.rs @@ -696,7 +696,9 @@ impl Drop for TypeBuilder { // Type pub use binaryninja_derive::*; -pub trait AbstractType { +pub trait AbstractType: Sized { + const SIZE: usize = std::mem::size_of::(); + const ALIGN: usize = std::mem::align_of::(); fn resolve_type() -> Ref; } From 6e50eb830cef8c9d88cda9fc1c906dbb35b93ec3 Mon Sep 17 00:00:00 2001 From: Michael Krasnitski Date: Tue, 9 Apr 2024 18:11:29 -0400 Subject: [PATCH 09/16] Gate `AbstractType` derive macro behind a `derive` feature-flag --- rust/Cargo.toml | 6 ++++-- rust/src/lib.rs | 1 + rust/src/types.rs | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/rust/Cargo.toml b/rust/Cargo.toml index b10134df8..681c85a94 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -6,6 +6,7 @@ edition = "2021" [features] noexports = [] +derive = ["binaryninja-derive", "elain"] [dependencies] lazy_static = "1.4.0" @@ -13,8 +14,9 @@ log = "0.4" libc = "0.2" rayon = { version = "1.8", optional = true } binaryninjacore-sys = { path = "binaryninjacore-sys" } -binaryninja-derive = { path = "binaryninja-derive" } -elain = "0.3.0" +binaryninja-derive = { path = "binaryninja-derive", optional = true } +# Const-generic alignment gadgetry used by the `AbstractType` derive macro +elain = { version = "0.3.0", optional = true } [patch.crates-io] # Patched pdb crate to implement some extra structures diff --git a/rust/src/lib.rs b/rust/src/lib.rs index db04a9e51..32920c838 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -169,6 +169,7 @@ use std::path::PathBuf; pub use binaryninjacore_sys::BNBranchType as BranchType; pub use binaryninjacore_sys::BNEndianness as Endianness; use binaryview::BinaryView; +#[cfg(feature = "derive")] pub use elain; use metadata::Metadata; use metadata::MetadataType; diff --git a/rust/src/types.rs b/rust/src/types.rs index 11bb83718..de2e90d2e 100644 --- a/rust/src/types.rs +++ b/rust/src/types.rs @@ -695,6 +695,7 @@ impl Drop for TypeBuilder { ////////// // Type +#[cfg(feature = "derive")] pub use binaryninja_derive::*; pub trait AbstractType: Sized { const SIZE: usize = std::mem::size_of::(); From 98fde41abc6048c98a48fd760e1d2e830c2212c0 Mon Sep 17 00:00:00 2001 From: Michael Krasnitski Date: Sun, 12 May 2024 23:06:49 -1000 Subject: [PATCH 10/16] Support 128-bit reprs --- rust/binaryninja-derive/src/lib.rs | 6 +++--- rust/src/types.rs | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/rust/binaryninja-derive/src/lib.rs b/rust/binaryninja-derive/src/lib.rs index ea49d05a9..cc0f2ba7e 100644 --- a/rust/binaryninja-derive/src/lib.rs +++ b/rust/binaryninja-derive/src/lib.rs @@ -124,11 +124,11 @@ impl Repr { let content; parenthesized!(content in meta.input); align = Some(content.parse()?); - } else if ident_in_list(ident, ["u8", "u16", "u32", "u64"]) { + } else if ident_in_list(ident, ["u8", "u16", "u32", "u64", "u128"]) { primitive = Some((meta.path.clone(), false)); - } else if ident_in_list(ident, ["i8", "i16", "i32", "i64"]) { + } else if ident_in_list(ident, ["i8", "i16", "i32", "i64", "i128"]) { primitive = Some((meta.path.clone(), true)); - } else if ident_in_list(ident, ["usize", "isize", "u128", "i128"]) { + } else if ident_in_list(ident, ["usize", "isize"]) { return Err(ident .span() .error(format!("`repr({ident})` types are not supported")) diff --git a/rust/src/types.rs b/rust/src/types.rs index de2e90d2e..109d8211c 100644 --- a/rust/src/types.rs +++ b/rust/src/types.rs @@ -720,10 +720,12 @@ abstract_type! { u16 => Type::int(2, false), u32 => Type::int(4, false), u64 => Type::int(8, false), + u128 => Type::int(16, false), i8 => Type::int(1, true), i16 => Type::int(2, true), i32 => Type::int(4, true), i64 => Type::int(8, true), + i128 => Type::int(16, true), f32 => Type::float(4), f64 => Type::float(8) } From d4ddfcb360597765ca11b257de1873dc8751bc6e Mon Sep 17 00:00:00 2001 From: Michael Krasnitski Date: Wed, 15 May 2024 21:52:57 -1000 Subject: [PATCH 11/16] Switch to using a `#[binja(...)]` attribute instead of naked decorators The attribute supports three properties: - Specifying pointer width is done for the whole struct/union now instead of each field using a `#[binja(pointer_width = )]` property. - Specifying named fields is done either with `#[binja(name = "...")]`, or `#[binja(named)]` which will use the name of the Rust type as a default value. --- rust/binaryninja-derive/src/lib.rs | 228 +++++++++++++++++++---------- 1 file changed, 153 insertions(+), 75 deletions(-) diff --git a/rust/binaryninja-derive/src/lib.rs b/rust/binaryninja-derive/src/lib.rs index cc0f2ba7e..dd6e10ac6 100644 --- a/rust/binaryninja-derive/src/lib.rs +++ b/rust/binaryninja-derive/src/lib.rs @@ -1,87 +1,155 @@ -use proc_macro2::TokenStream; +use proc_macro2::{Span, TokenStream}; use proc_macro2_diagnostics::{Diagnostic, SpanDiagnosticExt}; use quote::{format_ident, quote}; +use std::cell::OnceCell; use syn::spanned::Spanned; use syn::{ parenthesized, parse_macro_input, token, Attribute, Data, DeriveInput, Expr, Field, Fields, - FieldsNamed, Ident, Lit, LitInt, Path, Type, Variant, + FieldsNamed, Ident, Lit, LitInt, Meta, Path, Type, Variant, }; type Result = std::result::Result; +enum FieldKind { + Ptr(Type, usize), + Ty(Type), +} + +impl FieldKind { + fn ty(&self) -> &Type { + match self { + FieldKind::Ptr(ty, _) | FieldKind::Ty(ty) => &ty, + } + } +} + struct AbstractField { - ty: Type, - width: Option, + kind: FieldKind, ident: Ident, - named: bool, + name: Option, } impl AbstractField { - fn from_field(field: Field) -> Result { + fn from_field(field: Field, parent_name: &Ident, pointer_width: Option) -> Result { let Some(ident) = field.ident else { return Err(field.span().error("field must be named")); }; - let named = field.attrs.iter().any(|attr| attr.path().is_ident("named")); - let width = field - .attrs - .iter() - .find(|attr| attr.path().is_ident("width")); - if let Type::Ptr(ty) = field.ty { - if let Some(attr) = width { - if let Expr::Lit(expr) = &attr.meta.require_name_value()?.value { - if let Lit::Str(lit_str) = &expr.lit { - return Ok(Self { - ty: *ty.elem, - width: Some(lit_str.parse()?), - ident, - named, - }); - } - } + let kind = match field.ty { + Type::Ptr(ty) => { + let Some(width) = pointer_width else { + return Err(parent_name.span().error( + // broken up to make rustfmt happy + "types containing pointer fields must be \ + decorated with `#[binja(pointer_width = )]`", + )); + }; + FieldKind::Ptr(*ty.elem, width) } - Err(ident.span() - .error("pointer field must have explicit `#[width = \"\"]` attribute, for example: `u64`")) - } else { - match width { - Some(attr) => Err(attr - .span() - .error("`#[width]` attribute can only be applied to pointer fields")), - None => Ok(Self { - ty: field.ty, - width: None, - ident, - named, - }), - } - } + _ => FieldKind::Ty(field.ty), + }; + let name = find_binja_attr(&field.attrs)? + .map(|attr| match attr.kind { + BinjaAttrKind::PointerWidth(_) => Err(attr.span.error( + // broken up to make rustfmt happy + "invalid attribute, expected either \ + `#[binja(named)]` or `#[binja(name = \"...\")]`", + )), + BinjaAttrKind::Named(Some(name)) => Ok(name), + BinjaAttrKind::Named(None) => { + let ty = kind.ty(); + Ok(quote!(#ty).to_string()) + } + }) + .transpose()?; + Ok(Self { kind, ident, name }) } fn resolved_ty(&self) -> TokenStream { - let ty = &self.ty; + let ty = self.kind.ty(); let mut resolved = quote! { <#ty as ::binaryninja::types::AbstractType>::resolve_type() }; - if self.named { + if let Some(name) = &self.name { resolved = quote! { - ::binaryninja::types::Type::named_type_from_type( - stringify!(#ty), - &#resolved - ) - }; + ::binaryninja::types::Type::named_type_from_type(#name, &#resolved) + } } - if let Some(width) = &self.width { + if let FieldKind::Ptr(_, width) = self.kind { resolved = quote! { - ::binaryninja::types::Type::pointer_of_width( - &#resolved, - ::std::mem::size_of::<#width>(), - false, - false, - None - ) + ::binaryninja::types::Type::pointer_of_width(&#resolved, #width, false, false, None) } } resolved } } +#[derive(Debug)] +struct BinjaAttr { + kind: BinjaAttrKind, + span: Span, +} + +#[derive(Debug)] +enum BinjaAttrKind { + PointerWidth(usize), + Named(Option), +} + +fn find_binja_attr(attrs: &[Attribute]) -> Result> { + let binja_attr = OnceCell::new(); + + let set_attr = |attr: BinjaAttr| { + let span = attr.span; + binja_attr + .set(attr) + .map_err(|_| span.error("conflicting `#[binja(...)]` attributes")) + }; + + for attr in attrs { + let Some(ident) = attr.path().get_ident() else { + continue; + }; + if ident == "binja" { + let meta = attr.parse_args::()?; + let meta_ident = meta.path().require_ident()?; + if meta_ident == "pointer_width" { + let value = &meta.require_name_value()?.value; + if let Expr::Lit(expr) = &value { + if let Lit::Int(val) = &expr.lit { + set_attr(BinjaAttr { + kind: BinjaAttrKind::PointerWidth(val.base10_parse()?), + span: attr.span(), + })?; + continue; + } + } + return Err(value.span().error("expected integer literal")); + } else if meta_ident == "name" { + let value = &meta.require_name_value()?.value; + if let Expr::Lit(expr) = &value { + if let Lit::Str(lit) = &expr.lit { + set_attr(BinjaAttr { + kind: BinjaAttrKind::Named(Some(lit.value())), + span: attr.span(), + })?; + continue; + } + } + return Err(value.span().error(r#"expected string literal"#)); + } else if meta_ident == "named" { + meta.require_path_only()?; + set_attr(BinjaAttr { + kind: BinjaAttrKind::Named(None), + span: attr.span(), + })?; + } else { + return Err(meta + .span() + .error(format!("unrecognized property `{meta_ident}`"))); + } + } + } + Ok(binja_attr.into_inner()) +} + struct Repr { c: bool, packed: Option>, @@ -90,7 +158,7 @@ struct Repr { } impl Repr { - fn from_attrs(attrs: Vec) -> Result { + fn from_attrs(attrs: &[Attribute]) -> Result { let mut c = false; let mut packed = None; let mut align = None; @@ -99,15 +167,7 @@ impl Repr { let Some(ident) = attr.path().get_ident() else { continue; }; - if ident == "named" { - return Err(attr - .span() - .error("`#[named]` attribute can only be applied to fields")); - } else if ident == "width" { - return Err(attr - .span() - .error("`#[width]` attribute can only be applied to pointer fields")); - } else if ident == "repr" { + if ident == "repr" { attr.parse_nested_meta(|meta| { if let Some(ident) = meta.path.get_ident() { if ident == "C" { @@ -153,7 +213,7 @@ fn ident_in_list(ident: &Ident, list: [&'static str; N]) -> bool list.iter().any(|id| ident == id) } -#[proc_macro_derive(AbstractType, attributes(named, width))] +#[proc_macro_derive(AbstractType, attributes(binja))] pub fn abstract_type_derive(input: proc_macro::TokenStream) -> proc_macro::TokenStream { let input = parse_macro_input!(input as DeriveInput); match impl_abstract_type(input) { @@ -163,7 +223,18 @@ pub fn abstract_type_derive(input: proc_macro::TokenStream) -> proc_macro::Token } fn impl_abstract_type(ast: DeriveInput) -> Result { - let repr = Repr::from_attrs(ast.attrs)?; + let repr = Repr::from_attrs(&ast.attrs)?; + let width = find_binja_attr(&ast.attrs)? + .map(|attr| match attr.kind { + BinjaAttrKind::PointerWidth(width) => Ok(width), + BinjaAttrKind::Named(Some(_)) => Err(attr + .span + .error(r#"`#[binja(name = "...")] is only supported on fields"#)), + BinjaAttrKind::Named(None) => Err(attr + .span + .error("`#[binja(named)]` is only supported on fields")), + }) + .transpose()?; if !ast.generics.params.is_empty() { return Err(ast.generics.span().error("type must not be generic")); @@ -173,7 +244,7 @@ fn impl_abstract_type(ast: DeriveInput) -> Result { match ast.data { Data::Struct(s) => match s.fields { Fields::Named(fields) => { - impl_abstract_structure_type(ident, fields, repr, StructureKind::Struct) + impl_abstract_structure_type(ident, fields, repr, width, StructureKind::Struct) } Fields::Unnamed(_) => Err(s .fields @@ -184,7 +255,9 @@ fn impl_abstract_type(ast: DeriveInput) -> Result { .error("unit structs are unsupported; provide at least one named field")), }, Data::Enum(e) => impl_abstract_enum_type(ident, e.variants, repr), - Data::Union(u) => impl_abstract_structure_type(ident, u.fields, repr, StructureKind::Union), + Data::Union(u) => { + impl_abstract_structure_type(ident, u.fields, repr, width, StructureKind::Union) + } } } @@ -197,6 +270,7 @@ fn impl_abstract_structure_type( name: Ident, fields: FieldsNamed, repr: Repr, + pointer_width: Option, kind: StructureKind, ) -> Result { if !repr.c { @@ -210,7 +284,7 @@ fn impl_abstract_structure_type( let abstract_fields = fields .named .into_iter() - .map(AbstractField::from_field) + .map(|field| AbstractField::from_field(field, &name, pointer_width)) .collect::>>()?; let layout_name = format_ident!("__{name}_layout"); let field_wrapper = format_ident!("__{name}_field_wrapper"); @@ -218,13 +292,17 @@ fn impl_abstract_structure_type( .iter() .map(|field| { let ident = &field.ident; - let layout_ty = field.width.as_ref().unwrap_or(&field.ty); - quote! { - #ident: #field_wrapper< - [u8; <#layout_ty as ::binaryninja::types::AbstractType>::SIZE], - { <#layout_ty as ::binaryninja::types::AbstractType>::ALIGN }, - > - } + let (size, align) = match &field.kind { + FieldKind::Ptr(_, width) => { + let align = width.next_power_of_two(); + (quote! { #width }, quote! { #align }) + } + FieldKind::Ty(ty) => ( + quote! { <#ty as ::binaryninja::types::AbstractType>::SIZE }, + quote! { { <#ty as ::binaryninja::types::AbstractType>::ALIGN } }, + ), + }; + quote! { #ident: #field_wrapper<[u8; #size], #align> } }) .collect::>(); let args = abstract_fields From d9be065c09fabc9d4e0f9f707fc439fe1488620a Mon Sep 17 00:00:00 2001 From: Michael Krasnitski Date: Fri, 24 May 2024 23:04:30 -0400 Subject: [PATCH 12/16] Replace `SIZE` and `ALIGN` constants with a single `LAYOUT` constant --- rust/binaryninja-derive/src/lib.rs | 21 ++++++++++----------- rust/src/types.rs | 6 ++++-- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/rust/binaryninja-derive/src/lib.rs b/rust/binaryninja-derive/src/lib.rs index dd6e10ac6..1e8e8c6b5 100644 --- a/rust/binaryninja-derive/src/lib.rs +++ b/rust/binaryninja-derive/src/lib.rs @@ -298,11 +298,11 @@ fn impl_abstract_structure_type( (quote! { #width }, quote! { #align }) } FieldKind::Ty(ty) => ( - quote! { <#ty as ::binaryninja::types::AbstractType>::SIZE }, - quote! { { <#ty as ::binaryninja::types::AbstractType>::ALIGN } }, + quote! { { <#ty as ::binaryninja::types::AbstractType>::LAYOUT.size() } }, + quote! { { <#ty as ::binaryninja::types::AbstractType>::LAYOUT.align() } }, ), }; - quote! { #ident: #field_wrapper<[u8; #size], #align> } + quote! { #ident: #field_wrapper<#size, #align> } }) .collect::>(); let args = abstract_fields @@ -330,7 +330,7 @@ fn impl_abstract_structure_type( .map(|n| { ( quote! { #[repr(align(#n))] }, - quote! { .set_alignment(Self::ALIGN) }, + quote! { .set_alignment(Self::LAYOUT.align()) }, ) }) .unzip(); @@ -348,12 +348,12 @@ fn impl_abstract_structure_type( Ok(quote! { #[repr(C)] #[derive(Copy, Clone)] - struct #field_wrapper + struct #field_wrapper where - ::binaryninja::elain::Align: ::binaryninja::elain::Alignment + ::binaryninja::elain::Align: ::binaryninja::elain::Alignment { - t: T, - _align: ::binaryninja::elain::Align, + t: [u8; SIZE], + _align: ::binaryninja::elain::Align, } #[repr(C)] @@ -364,13 +364,12 @@ fn impl_abstract_structure_type( } impl ::binaryninja::types::AbstractType for #name { - const SIZE: usize = ::std::mem::size_of::<#layout_name>(); - const ALIGN: usize = ::std::mem::align_of::<#layout_name>(); + const LAYOUT: ::std::alloc::Layout = ::std::alloc::Layout::new::<#layout_name>(); fn resolve_type() -> ::binaryninja::rc::Ref<::binaryninja::types::Type> { ::binaryninja::types::Type::structure( &::binaryninja::types::Structure::builder() #(.insert(#args))* - .set_width(Self::SIZE as u64) + .set_width(Self::LAYOUT.size() as u64) .set_packed(#is_packed) #set_alignment #set_union diff --git a/rust/src/types.rs b/rust/src/types.rs index 109d8211c..ec6e80471 100644 --- a/rust/src/types.rs +++ b/rust/src/types.rs @@ -33,6 +33,7 @@ use crate::{ use lazy_static::lazy_static; use std::ptr::null_mut; use std::{ + alloc::Layout, borrow::{Borrow, Cow}, collections::{HashMap, HashSet}, ffi::CStr, @@ -698,8 +699,9 @@ impl Drop for TypeBuilder { #[cfg(feature = "derive")] pub use binaryninja_derive::*; pub trait AbstractType: Sized { - const SIZE: usize = std::mem::size_of::(); - const ALIGN: usize = std::mem::align_of::(); + #[doc(hidden)] + const LAYOUT: Layout = Layout::new::(); + fn resolve_type() -> Ref; } From 2c6aee6adc9c5d9559691b10f9d2c9361cf83e93 Mon Sep 17 00:00:00 2001 From: Michael Krasnitski Date: Sat, 25 May 2024 21:47:54 -0400 Subject: [PATCH 13/16] Add comments to `AbstractType` derive macro implementation --- rust/binaryninja-derive/src/lib.rs | 177 ++++++++++++++++++++++++----- 1 file changed, 146 insertions(+), 31 deletions(-) diff --git a/rust/binaryninja-derive/src/lib.rs b/rust/binaryninja-derive/src/lib.rs index 1e8e8c6b5..ef922533c 100644 --- a/rust/binaryninja-derive/src/lib.rs +++ b/rust/binaryninja-derive/src/lib.rs @@ -34,6 +34,8 @@ impl AbstractField { let Some(ident) = field.ident else { return Err(field.span().error("field must be named")); }; + + // If the field is a pointer, we want the type being pointed at, not the pointer itself. let kind = match field.ty { Type::Ptr(ty) => { let Some(width) = pointer_width else { @@ -47,6 +49,8 @@ impl AbstractField { } _ => FieldKind::Ty(field.ty), }; + + // Fields may be decorated with either `#[binja(name = "...")]` or `#[binja(named)]`. let name = find_binja_attr(&field.attrs)? .map(|attr| match attr.kind { BinjaAttrKind::PointerWidth(_) => Err(attr.span.error( @@ -61,9 +65,11 @@ impl AbstractField { } }) .transpose()?; + Ok(Self { kind, ident, name }) } + /// Transforms the `AbstractField` into a token stream that constructs a binja `Type` object fn resolved_ty(&self) -> TokenStream { let ty = self.kind.ty(); let mut resolved = quote! { <#ty as ::binaryninja::types::AbstractType>::resolve_type() }; @@ -93,9 +99,19 @@ enum BinjaAttrKind { Named(Option), } +/// Given a list of attributes, look for a `#[binja(...)]` attribute. At most one copy of the +/// attribute is allowed to decorate an item (i.e. a type or field). If more than one copy is +/// present, we throw an error. +/// +/// Three properties are supported, and for any given item they are mutually exclusive: +/// - `pointer_width`: Expects an integer literal. Only allowed on types, not fields. +/// - `name`: Expects a string literal. Only allowed on fields. +/// - `named`: Must be a bare path. Only allowed on fields. fn find_binja_attr(attrs: &[Attribute]) -> Result> { + // Use a `OnceCell` to assert that we only allow a single `#[binja(...)]` attribute per-item. let binja_attr = OnceCell::new(); + // Wrapper function for setting the value of the `OnceCell` above. let set_attr = |attr: BinjaAttr| { let span = attr.span; binja_attr @@ -111,6 +127,7 @@ fn find_binja_attr(attrs: &[Attribute]) -> Result> { let meta = attr.parse_args::()?; let meta_ident = meta.path().require_ident()?; if meta_ident == "pointer_width" { + // #[binja(pointer_width = )] let value = &meta.require_name_value()?.value; if let Expr::Lit(expr) = &value { if let Lit::Int(val) = &expr.lit { @@ -123,6 +140,7 @@ fn find_binja_attr(attrs: &[Attribute]) -> Result> { } return Err(value.span().error("expected integer literal")); } else if meta_ident == "name" { + // #[binja(name = "...")] let value = &meta.require_name_value()?.value; if let Expr::Lit(expr) = &value { if let Lit::Str(lit) = &expr.lit { @@ -135,6 +153,7 @@ fn find_binja_attr(attrs: &[Attribute]) -> Result> { } return Err(value.span().error(r#"expected string literal"#)); } else if meta_ident == "named" { + // #[binja(named)] meta.require_path_only()?; set_attr(BinjaAttr { kind: BinjaAttrKind::Named(None), @@ -150,6 +169,7 @@ fn find_binja_attr(attrs: &[Attribute]) -> Result> { Ok(binja_attr.into_inner()) } +/// Struct representing the contents of all `#[repr(...)]` attributes decorating a type. struct Repr { c: bool, packed: Option>, @@ -158,6 +178,8 @@ struct Repr { } impl Repr { + /// Scan through a list of attributes and finds every instance of a `#[repr(...)]` attribute, + /// then initialize `Self` based off the collective contents of those attributes. fn from_attrs(attrs: &[Attribute]) -> Result { let mut c = false; let mut packed = None; @@ -213,23 +235,34 @@ fn ident_in_list(ident: &Ident, list: [&'static str; N]) -> bool list.iter().any(|id| ident == id) } +/// Entry point to the proc-macro. #[proc_macro_derive(AbstractType, attributes(binja))] pub fn abstract_type_derive(input: proc_macro::TokenStream) -> proc_macro::TokenStream { let input = parse_macro_input!(input as DeriveInput); + // Transforming the error diagnostic into tokens for emission allows the business logic to + // return `Result` and make use of the `?` operator like any normal Rust program match impl_abstract_type(input) { Ok(tokens) => tokens.into(), Err(diag) => diag.emit_as_item_tokens().into(), } } +/// Main business logic of the macro. Parses any relevant attributes decorating the type, then +/// defers execution based on the kind of type: struct, enum, or union. fn impl_abstract_type(ast: DeriveInput) -> Result { let repr = Repr::from_attrs(&ast.attrs)?; let width = find_binja_attr(&ast.attrs)? .map(|attr| match attr.kind { - BinjaAttrKind::PointerWidth(width) => Ok(width), + BinjaAttrKind::PointerWidth(width) => { + if let Data::Enum(_) = ast.data { + Err(attr.span.error("`#[binja(pointer_width)]` is only supported on structs and unions, not enums")) + } else { + Ok(width) + } + } BinjaAttrKind::Named(Some(_)) => Err(attr .span - .error(r#"`#[binja(name = "...")] is only supported on fields"#)), + .error(r#"`#[binja(name)] is only supported on fields"#)), BinjaAttrKind::Named(None) => Err(attr .span .error("`#[binja(named)]` is only supported on fields")), @@ -240,23 +273,23 @@ fn impl_abstract_type(ast: DeriveInput) -> Result { return Err(ast.generics.span().error("type must not be generic")); } - let ident = ast.ident; match ast.data { Data::Struct(s) => match s.fields { Fields::Named(fields) => { - impl_abstract_structure_type(ident, fields, repr, width, StructureKind::Struct) + impl_abstract_structure_type(ast.ident, fields, repr, width, StructureKind::Struct) } - Fields::Unnamed(_) => Err(s - .fields - .span() - .error("tuple structs are unsupported; struct must have named fields")), - Fields::Unit => Err(ident - .span() - .error("unit structs are unsupported; provide at least one named field")), + Fields::Unnamed(_) => Err(s.fields.span().error( + "tuple structs are unsupported; \ + struct must have named fields", + )), + Fields::Unit => Err(ast.ident.span().error( + "unit structs are unsupported; \ + provide at least one named field", + )), }, - Data::Enum(e) => impl_abstract_enum_type(ident, e.variants, repr), + Data::Enum(e) => impl_abstract_enum_type(ast.ident, e.variants, repr), Data::Union(u) => { - impl_abstract_structure_type(ident, u.fields, repr, width, StructureKind::Union) + impl_abstract_structure_type(ast.ident, u.fields, repr, width, StructureKind::Union) } } } @@ -266,6 +299,70 @@ enum StructureKind { Union, } +/// Implements the `AbstractType` trait for either a struct or union, based on the value of `kind`. +/// +/// Unlike C-style enums, structs and unions can contain other types within them that affect their +/// size and alignment. For example, the size of a struct is at least the sum of the sizes of its +/// fields (plus any padding), and its alignment is equal to that of the most-aligned field. +/// Likewise, a union's size is at least that of its largest field. +/// +/// Normally this would be fine, because the compiler can give you size and alignment information +/// using `std::mem::{size_of, align_of}`. However, the `#[binja(pointer_width)]` attribute allows +/// users to change the width of pointer fields to be different in Binja compared to the host CPU +/// architecture, meaning the value calculated by the compiler will be wrong in that case. What's +/// worse, is that a pointer field with custom width not only affects the size/alignment of its +/// parent struct, but anything that contains *that* struct, and so on up the tree. +/// +/// So, we need a way to propagate the modified layout information at compile-time. To accomplish +/// this, we use the `AbstractType::LAYOUT` associated constant, which by default matches the +/// layout of the struct as calculated by the compiler, but which can be swapped out for any other +/// valid `std::alloc::Layout` object when implementing the `AbstractType` trait. We then create a +/// mock-type with the desired custom layout and use that for propagation. +/// +/// In order to mock a type, we make use of the following construction: +/// +/// ```ignore +/// #[repr(C)] +/// struct Mock +/// where: +/// elain::Align: elain::Alignment, +/// { +/// t: [u8; SIZE], +/// _align: elain::Align +/// } +/// ``` +/// +/// The `elain::Align` type is a zero-size type with a const-generic parameter specifying its +/// alignment. The trait bound serves to restrict the possible values of `ALIGN` to only those +/// valid for specifying alignment (powers of two). Additionally, we know that `[u8; SIZE]` is +/// always of size `SIZE`, and alignment 1. Therefore, the `Mock` type is guaranteed to be of size +/// `SIZE` and alignment equal to `ALIGN`. +/// +/// This constructed `Mock` type allows us to generate a struct with arbitrary layout, which we can +/// use to mimic the layout of another struct: +/// +/// ```ignore +/// #[derive(AbstractType)] +/// #[repr(C)] +/// struct S { +/// first: u8, +/// second: u16, +/// third: u64, +/// } +/// +/// // Identical layout to `S` above +/// #[repr(C)] +/// struct __S_layout { +/// first: Mock<1, 1>, +/// second: Mock<2, 2>, +/// third: Mock<8, 8>, +/// } +/// ``` +/// +/// Then, we can propagate any changes in the layout of `S` (due to custom pointer widths) by +/// setting the `S::LAYOUT` constant equal to `alloc::Layout<__S_layout>` rather than the default +/// value of `alloc::Layout`. Then, when mocking fields of type `S`, we use `S::LAYOUT.size()` +/// and `S::LAYOUT.align()` for the const-generic parameters of `Mock`, instead of just integers. fn impl_abstract_structure_type( name: Ident, fields: FieldsNamed, @@ -286,17 +383,41 @@ fn impl_abstract_structure_type( .into_iter() .map(|field| AbstractField::from_field(field, &name, pointer_width)) .collect::>>()?; + + // Generate the arguments to `StructureBuilder::insert`. Luckily `mem::offset_of!` was stabilized in + // Rust 1.77 or otherwise this would be a lot more complicated. let layout_name = format_ident!("__{name}_layout"); + let args = abstract_fields + .iter() + .map(|field| { + let ident = &field.ident; + let resolved_ty = field.resolved_ty(); + quote! { + &#resolved_ty, + stringify!(#ident), + ::std::mem::offset_of!(#layout_name, #ident) as u64, + false, + ::binaryninja::types::MemberAccess::NoAccess, + ::binaryninja::types::MemberScope::NoScope, + } + }) + .collect::>(); + + // Calculate size and alignment for each field - these may differ from the compiler's + // calculated values so we use the construction discussed above to mock/propagate them. let field_wrapper = format_ident!("__{name}_field_wrapper"); let layout_fields = abstract_fields .iter() .map(|field| { let ident = &field.ident; let (size, align) = match &field.kind { + // Since pointers can be of arbitrary size as specified by the user, we manually + // calculate size/alignment for them. FieldKind::Ptr(_, width) => { let align = width.next_power_of_two(); (quote! { #width }, quote! { #align }) } + // All other types defer to the value of Self::LAYOUT FieldKind::Ty(ty) => ( quote! { { <#ty as ::binaryninja::types::AbstractType>::LAYOUT.size() } }, quote! { { <#ty as ::binaryninja::types::AbstractType>::LAYOUT.align() } }, @@ -305,21 +426,9 @@ fn impl_abstract_structure_type( quote! { #ident: #field_wrapper<#size, #align> } }) .collect::>(); - let args = abstract_fields - .iter() - .map(|field| { - let ident = &field.ident; - let resolved_ty = field.resolved_ty(); - quote! { - &#resolved_ty, - stringify!(#ident), - ::std::mem::offset_of!(#layout_name, #ident) as u64, - false, - ::binaryninja::types::MemberAccess::NoAccess, - ::binaryninja::types::MemberScope::NoScope, - } - }) - .collect::>(); + + // If the struct/union is marked `#[repr(packed)]` or `#[repr(align(...))]`, we decorate the + // mocked layout type with those as well let is_packed = repr.packed.is_some(); let packed = repr.packed.map(|size| match size { Some(n) => quote! { #[repr(packed(#n))] }, @@ -334,17 +443,18 @@ fn impl_abstract_structure_type( ) }) .unzip(); + + // Distinguish between structs and unions let (kind, set_union) = match kind { StructureKind::Struct => (quote! { struct }, None), StructureKind::Union => ( quote! { union }, Some(quote! { - .set_structure_type( - ::binaryninja::types::StructureType::UnionStructureType - ) + .set_structure_type(::binaryninja::types::StructureType::UnionStructureType) }), ), }; + Ok(quote! { #[repr(C)] #[derive(Copy, Clone)] @@ -380,6 +490,7 @@ fn impl_abstract_structure_type( }) } +/// Implements the `AbstractType` trait for an enum. fn impl_abstract_enum_type( name: Ident, variants: impl IntoIterator, @@ -400,6 +511,9 @@ fn impl_abstract_enum_type( .span() .error("must provide a primitive `repr` type, e.g. `u32`")); }; + + // Extract the variant names and the value of their discriminants. Variants must not hold any + // nested data (in other words, they must be simple C-style identifiers). let variants = variants .into_iter() .map(|variant| { @@ -415,6 +529,7 @@ fn impl_abstract_enum_type( Ok(quote! { stringify!(#ident), #discriminant as u64 }) }) .collect::>>()?; + Ok(quote! { impl ::binaryninja::types::AbstractType for #name { fn resolve_type() -> ::binaryninja::rc::Ref<::binaryninja::types::Type> { From bdb867a91b121632a21f4986f3052ac8c579b59c Mon Sep 17 00:00:00 2001 From: Michael Krasnitski Date: Wed, 5 Jun 2024 13:31:27 -0400 Subject: [PATCH 14/16] Add dev guide for `AbstractType` trait --- docs/dev/abstract-type.md | 191 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 191 insertions(+) create mode 100644 docs/dev/abstract-type.md diff --git a/docs/dev/abstract-type.md b/docs/dev/abstract-type.md new file mode 100644 index 000000000..3ea002dfa --- /dev/null +++ b/docs/dev/abstract-type.md @@ -0,0 +1,191 @@ +# Defining types using native Rust syntax + +Writing a Binary Ninja plugin often involves defining one or more types inside a Binary View. The easiest way to do this using the C++ or Python APIs is to use the `TypeBuilder` class, or one of its variants, like `StructureBuilder` or `EnumerationBuilder`. The Rust API also has equivalent builders for this. However, the newly added `AbstractType` trait allows you to automatically generate a type object ready for ingestion into Binary Ninja by simply decorating a Rust type definition with `#[derive(AbstractType)]`, with no additional effort required! + +As an example, say you'd like to define the following type inside of a Binary View: + +```c +struct MyStruct { + uint8_t first; + uint32_t second; + int16_t third[2]; +}; +``` + +Using the `StructureBuilder` API, you could generate the type as follows: + +```rust +use binaryninja::types::{Structure, Type}; + +let ty = Type::structure( + Structure::builder() + .with_members([ + (&Type::int(1, false), "first"), + (&Type::int(4, false), "second"), + (&Type::array(&Type::int(2, true), 2), "third"), + ]) + .finalize() + .as_ref(), +); +``` + +Or, you could generate the same type using a native Rust struct definition instead: + +```rust +use binaryninja::types::AbstractType; + +#[derive(AbstractType)] +#[repr(C)] +struct MyStruct { + first: u8, + second: u32, + third: [i16; 2], +} + +let ty = MyStruct::resolve_type(); +``` + +By deriving the `AbstractType` trait for a type `T`, the `resolve_type` method will automatically construct a `Type` corresponding to the layout of `T`. This has multiple benefits, the first of which is improved readability. Another is that if your plugin performs some additional processing that makes use of `T`, you can define it once in Rust and use that definition both for processing actual data as well as defining types inside of Binary Ninja. + +## Deriving `AbstractType` for a type + +While the trait itself is public, the derive macro for `AbstractType` is gated behind the `derive` crate feature. In order to make use of it, include the following line in your `Cargo.toml`: + +```toml +[dependencies] +binaryninja = { git = "https://github.com/Vector35/binaryninja-api.git", branch = "dev", features = ["derive"] } +``` + +Furthermore, in order for `AbstractType::resolve_type` to produce unambiguous results, some restrictions are enforced when deriving the trait that ensure the generated implementation correctly produces the intended corresponding C type. + +### Structs and Unions + +Structs and unions must be marked `#[repr(C)]`. This is because the `AbstractType` derive macro relies on compiler-generated layout information in order to accurately generate equivalent C type definitions. Because we are targeting the C ABI (and because the Rust ABI is not stable), deriving `AbstractType` requires opting into the C ABI as well. + +### Enums +In contrast to structs, the fundamental representation of enums in Rust is different compared to C; decorating a Rust enum with `#[repr(C)]` produces a "tagged union" whose layout is not the same as a C-style enum. Therefore, Rust enums that derive `AbstractType` must instead be decorated with `#[repr()]`, for example `u32` or `u64`. Additionally, their variants must not contain any data, and all variants must have an explicitly assigned discriminant. As an example: + +```rust +#[derive(AbstractType)] +#[repr(u32)] +enum Color { + Red = 0xff0000, + Green = 0x00ff00, + Blue = 0x0000ff, +} +``` + +## Special cases + +### Pointers + +Creating pointers using the Binary Ninja API requires either defining them with respect to a specific architecture (if using the `Type::pointer` constructor), or otherwise manually specifying their width using `Type::pointer_of_width`. Likewise, deriving `AbstractType` for a type `T` that contains any pointer fields requires decorating `T` with a `#[binja(pointer_width)]` attribute: + +```rust +#[derive(AbstractType)] +#[binja(pointer_width = 4)] // Explicitly required because `A` contains pointers +#[repr(C)] +struct A { + first: u8, + second: *const u64, // 4 bytes wide + third: *const u32, // also 4 bytes wide - all pointers inside `A` are given the same width +} +``` + +Part of the reason for this requirement is that the architecture of the Binary View may be different than the host system - therefore, the Rust compiler would otherwise report an incorrect size for any pointers compared to what the Binary View expects. + +### Named types + +If you wish to define a type containing a non-primitive field, by default the type of that field will be defined inline in Binja, which may initially feel surprising. As an example, let's say we want to express the following construct: + +```c +struct A { + uint8_t first; + struct B second; +} + +struct B { + uint16_t third; + uint32_t fourth; +} +``` + +If we simply define the types `A` and `B` in Rust like so: + +```rust +#[derive(AbstractType)] +#[repr(C)] +struct A { + first: u8, + second: B, +} + +#[derive(AbstractType)] +#[repr(C)] +struct B { + third: u16, + fourth: u32, +} +``` + +...then, calling `A::resolve_type()` and passing the result to a Binary View will result in the following definition in the view: + +```c +struct A { + uint8_t first; + struct { + uint16_t third; + uint32_t fourth; + } second; +} +``` + +Obviously, this is not quite what we intended. To fix this, decorate the `A::second` field with a `#[binja(named)]` attribute to signal to the compiler to used a named type for the field rather than inlining the type's definition: + +```rust +#[derive(AbstractType)] +#[repr(C)] +struct A { + first: u8, + #[binja(named)] + second: B, +} +``` + +...resulting in the correct C definition: + +```c +struct A { + uint8_t first; + struct B second; +} +``` + +The `named` attribute will use the name of the Rust type (in this case, `B`) as the name for the defined type in Binja. If you would like a different name to be used, you can explicitly specify it by instead using the `#[binja(name = "...")]` attribute: + +```rust +#[derive(AbstractType)] +#[repr(C)] +struct A { + first: u8, + #[binja(name = "C")] + second: B, +} +``` + +...which will result in the following C-type: + +```c +struct A { + uint8_t first; + struct C second; +} +``` + +Note that defining structs with named fields does not require that the types with the specified names are already defined inside the Binary View. In other words, in the example above, the order in which you define `A` and `B` (e.g. by calling `BinaryView::define_user_type`) does not matter. + +## Additional Notes + +### Modifying default alignment + +Decorating a Rust type with `#[repr(packed)]` or `#[repr(align)]` can change the alignment of a struct and its fields. These changes will also be reflected inside Binary Ninja. For example, decorating a struct with `#[repr(packed)]` will cause it to be marked `__packed` when defined in the Binary View. From c8aae0bd381f29e078b80a5ca05f24c7daa8a9ee Mon Sep 17 00:00:00 2001 From: Michael Krasnitski Date: Mon, 10 Jun 2024 19:51:21 -0400 Subject: [PATCH 15/16] Fix alignment bug `StructureBuilder::insert` doesn't affect the alignment of the type, so we need to always explicitly specify alignment. --- rust/binaryninja-derive/src/lib.rs | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/rust/binaryninja-derive/src/lib.rs b/rust/binaryninja-derive/src/lib.rs index ef922533c..4fa6e74ab 100644 --- a/rust/binaryninja-derive/src/lib.rs +++ b/rust/binaryninja-derive/src/lib.rs @@ -384,6 +384,10 @@ fn impl_abstract_structure_type( .map(|field| AbstractField::from_field(field, &name, pointer_width)) .collect::>>()?; + if abstract_fields.is_empty() { + return Err(name.span().error("expected at least one named field")); + } + // Generate the arguments to `StructureBuilder::insert`. Luckily `mem::offset_of!` was stabilized in // Rust 1.77 or otherwise this would be a lot more complicated. let layout_name = format_ident!("__{name}_layout"); @@ -434,15 +438,7 @@ fn impl_abstract_structure_type( Some(n) => quote! { #[repr(packed(#n))] }, None => quote! { #[repr(packed)] }, }); - let (align, set_alignment) = repr - .align - .map(|n| { - ( - quote! { #[repr(align(#n))] }, - quote! { .set_alignment(Self::LAYOUT.align()) }, - ) - }) - .unzip(); + let align = repr.align.map(|n| quote! { #[repr(align(#n))] }); // Distinguish between structs and unions let (kind, set_union) = match kind { @@ -478,11 +474,11 @@ fn impl_abstract_structure_type( fn resolve_type() -> ::binaryninja::rc::Ref<::binaryninja::types::Type> { ::binaryninja::types::Type::structure( &::binaryninja::types::Structure::builder() - #(.insert(#args))* - .set_width(Self::LAYOUT.size() as u64) .set_packed(#is_packed) - #set_alignment + .set_width(Self::LAYOUT.size() as u64) + .set_alignment(Self::LAYOUT.align()) #set_union + #(.insert(#args))* .finalize() ) } From 9a23a8eb33ca82cde32d05465b8c937047c644ff Mon Sep 17 00:00:00 2001 From: Michael Krasnitski Date: Mon, 10 Jun 2024 20:11:43 -0400 Subject: [PATCH 16/16] Add testing example for `AbstractType` derive macro --- rust/Cargo.lock | 7 + rust/Cargo.toml | 1 + rust/examples/abstract-type/Cargo.toml | 7 + rust/examples/abstract-type/build.rs | 68 ++++++ rust/examples/abstract-type/src/main.rs | 297 ++++++++++++++++++++++++ 5 files changed, 380 insertions(+) create mode 100644 rust/examples/abstract-type/Cargo.toml create mode 100644 rust/examples/abstract-type/build.rs create mode 100644 rust/examples/abstract-type/src/main.rs diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 9da1cd0e0..e28155d28 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -2,6 +2,13 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "abstract-type" +version = "0.1.0" +dependencies = [ + "binaryninja", +] + [[package]] name = "adler" version = "1.0.2" diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 681c85a94..e8c350ef2 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -24,6 +24,7 @@ pdb = { path = "./examples/pdb-ng/pdb-0.8.0-patched" } [workspace] members = [ + "examples/abstract-type", "examples/basic_script", "examples/decompile", "examples/dwarf/dwarf_export", diff --git a/rust/examples/abstract-type/Cargo.toml b/rust/examples/abstract-type/Cargo.toml new file mode 100644 index 000000000..bab72cea3 --- /dev/null +++ b/rust/examples/abstract-type/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "abstract-type" +version = "0.1.0" +edition = "2021" + +[dependencies] +binaryninja = { path="../../", features = ["derive"] } diff --git a/rust/examples/abstract-type/build.rs b/rust/examples/abstract-type/build.rs new file mode 100644 index 000000000..5ba9bcde4 --- /dev/null +++ b/rust/examples/abstract-type/build.rs @@ -0,0 +1,68 @@ +use std::env; +use std::fs::File; +use std::io::BufReader; +use std::path::PathBuf; + +#[cfg(target_os = "macos")] +static LASTRUN_PATH: (&str, &str) = ("HOME", "Library/Application Support/Binary Ninja/lastrun"); + +#[cfg(target_os = "linux")] +static LASTRUN_PATH: (&str, &str) = ("HOME", ".binaryninja/lastrun"); + +#[cfg(windows)] +static LASTRUN_PATH: (&str, &str) = ("APPDATA", "Binary Ninja\\lastrun"); + +// Check last run location for path to BinaryNinja; Otherwise check the default install locations +fn link_path() -> PathBuf { + use std::io::prelude::*; + + let home = PathBuf::from(env::var(LASTRUN_PATH.0).unwrap()); + let lastrun = PathBuf::from(&home).join(LASTRUN_PATH.1); + + File::open(lastrun) + .and_then(|f| { + let mut binja_path = String::new(); + let mut reader = BufReader::new(f); + + reader.read_line(&mut binja_path)?; + Ok(PathBuf::from(binja_path.trim())) + }) + .unwrap_or_else(|_| { + #[cfg(target_os = "macos")] + return PathBuf::from("/Applications/Binary Ninja.app/Contents/MacOS"); + + #[cfg(target_os = "linux")] + return home.join("binaryninja"); + + #[cfg(windows)] + return PathBuf::from(env::var("PROGRAMFILES").unwrap()) + .join("Vector35\\BinaryNinja\\"); + }) +} + +fn main() { + // Use BINARYNINJADIR first for custom BN builds/configurations (BN devs/build server), fallback on defaults + let install_path = env::var("BINARYNINJADIR") + .map(PathBuf::from) + .unwrap_or_else(|_| link_path()); + + #[cfg(target_os = "linux")] + println!( + "cargo:rustc-link-arg=-Wl,-rpath,{},-L{},-l:libbinaryninjacore.so.1", + install_path.to_str().unwrap(), + install_path.to_str().unwrap(), + ); + + #[cfg(target_os = "macos")] + println!( + "cargo:rustc-link-arg=-Wl,-rpath,{},-L{},-lbinaryninjacore", + install_path.to_str().unwrap(), + install_path.to_str().unwrap(), + ); + + #[cfg(target_os = "windows")] + { + println!("cargo:rustc-link-lib=binaryninjacore"); + println!("cargo:rustc-link-search={}", install_path.to_str().unwrap()); + } +} diff --git a/rust/examples/abstract-type/src/main.rs b/rust/examples/abstract-type/src/main.rs new file mode 100644 index 000000000..e8df03159 --- /dev/null +++ b/rust/examples/abstract-type/src/main.rs @@ -0,0 +1,297 @@ +use binaryninja::rc::Ref; +use binaryninja::types::{AbstractType, EnumerationBuilder, StructureBuilder, StructureType, Type}; + +fn create_struct(f: F) -> Ref +where + F: FnOnce(&StructureBuilder) -> &StructureBuilder, +{ + Type::structure(&f(&StructureBuilder::new()).finalize()) +} + +fn create_enum(width: usize, signed: bool, f: F) -> Ref +where + F: FnOnce(&EnumerationBuilder) -> &EnumerationBuilder, +{ + Type::enumeration(&f(&EnumerationBuilder::new()).finalize(), width, signed) +} + +fn primitive() { + assert_eq!(u8::resolve_type(), Type::int(1, false)); + assert_eq!(u16::resolve_type(), Type::int(2, false)); + assert_eq!(u32::resolve_type(), Type::int(4, false)); + assert_eq!(u64::resolve_type(), Type::int(8, false)); + assert_eq!(u128::resolve_type(), Type::int(16, false)); + + assert_eq!(i8::resolve_type(), Type::int(1, true)); + assert_eq!(i16::resolve_type(), Type::int(2, true)); + assert_eq!(i32::resolve_type(), Type::int(4, true)); + assert_eq!(i64::resolve_type(), Type::int(8, true)); + assert_eq!(i128::resolve_type(), Type::int(16, true)); + + assert_eq!(f32::resolve_type(), Type::float(4)); + assert_eq!(f64::resolve_type(), Type::float(8)); +} + +fn basic_struct() { + #[derive(AbstractType)] + #[repr(C)] + struct A { + first: u8, + second: u32, + third: u16, + } + + assert_eq!( + A::resolve_type(), + create_struct(|s| { + s.with_members([ + (&Type::int(1, false), "first"), + (&Type::int(4, false), "second"), + (&Type::int(2, false), "third"), + ]) + }) + ); +} + +fn packed_struct() { + #[derive(AbstractType)] + #[repr(C, packed)] + struct A { + first: u8, + second: u32, + third: u16, + } + + assert_eq!( + A::resolve_type(), + create_struct(|s| { + s.set_packed(true).with_members([ + (&Type::int(1, false), "first"), + (&Type::int(4, false), "second"), + (&Type::int(2, false), "third"), + ]) + }) + ); +} + +fn custom_alignment() { + #[derive(AbstractType)] + #[repr(C, align(16))] + struct A { + first: u8, + second: u32, + third: u16, + } + + assert_eq!( + A::resolve_type(), + create_struct(|s| { + s.set_alignment(16).with_members([ + (&Type::int(1, false), "first"), + (&Type::int(4, false), "second"), + (&Type::int(2, false), "third"), + ]) + }) + ); +} + +fn named_field() { + #[derive(AbstractType)] + #[repr(C)] + struct A { + first: u8, + #[binja(named)] + second: B, + } + + #[derive(AbstractType)] + #[repr(C)] + struct B { + third: u16, + } + + assert_eq!( + A::resolve_type(), + create_struct(|s| { + s.with_members([ + (&Type::int(1, false), "first"), + ( + &Type::named_type_from_type("B", &B::resolve_type()), + "second", + ), + ]) + }) + ); + assert_eq!( + B::resolve_type(), + create_struct(|s| { s.with_members([(&Type::int(2, false), "third")]) }) + ); +} + +fn pointer_field() { + #[derive(AbstractType)] + #[repr(C)] + #[binja(pointer_width = 4)] + struct A { + first: u8, + second: *const u32, + } + + assert_eq!( + A::resolve_type(), + create_struct(|s| { + s.with_members([ + (&Type::int(1, false), "first"), + ( + &Type::pointer_of_width(&Type::int(4, false), 4, false, false, None), + "second", + ), + ]) + }) + ); +} + +fn nested_pointer_field() { + #[derive(AbstractType)] + #[repr(C)] + struct A { + first: u8, + #[binja(named)] + second: B, + } + + #[derive(AbstractType)] + #[repr(C)] + #[binja(pointer_width = 4)] + struct B { + third: u32, + fourth: *const u16, + } + + assert_eq!( + A::resolve_type(), + create_struct(|s| { + s.with_members([ + (&Type::int(1, false), "first"), + ( + &Type::named_type_from_type("B", &B::resolve_type()), + "second", + ), + ]) + }) + ); + assert_eq!( + B::resolve_type(), + create_struct(|s| { + s.with_members([ + (&Type::int(4, false), "third"), + ( + &Type::pointer_of_width(&Type::int(2, false), 4, false, false, None), + "fourth", + ), + ]) + }) + ); +} + +fn named_pointer_field() { + #[derive(AbstractType)] + #[repr(C)] + #[binja(pointer_width = 4)] + struct A { + first: u8, + #[binja(named)] + second: *const B, + } + + #[derive(AbstractType)] + #[repr(C)] + struct B { + third: u32, + fourth: u16, + } + + assert_eq!( + A::resolve_type(), + create_struct(|s| { + s.with_members([ + (&Type::int(1, false), "first"), + ( + &Type::pointer_of_width( + &Type::named_type_from_type("B", &B::resolve_type()), + 4, + false, + false, + None, + ), + "second", + ), + ]) + }) + ); + assert_eq!( + B::resolve_type(), + create_struct(|s| { + s.with_members([ + (&Type::int(4, false), "third"), + (&Type::int(2, false), "fourth"), + ]) + }) + ) +} + +fn union() { + #[derive(AbstractType)] + #[repr(C)] + union A { + first: u32, + second: [u16; 2], + third: [u8; 4], + } + + assert_eq!( + A::resolve_type(), + create_struct(|s| { + s.set_structure_type(StructureType::UnionStructureType) + .with_members([ + (&Type::int(4, false), "first"), + (&Type::array(&Type::int(2, false), 2), "second"), + (&Type::array(&Type::int(1, false), 4), "third"), + ]) + }) + ); +} + +fn enumeration() { + #[derive(AbstractType)] + #[repr(u32)] + #[allow(dead_code)] + enum Color { + Red = 0xff0000, + Green = 0x00ff00, + Blue = 0x0000ff, + } + + assert_eq!( + Color::resolve_type(), + create_enum(4, false, |e| { + e.insert("Red", 0xff0000) + .insert("Green", 0x00ff00) + .insert("Blue", 0x0000ff) + }) + ); +} + +fn main() { + let _ = binaryninja::headless::Session::new(); + primitive(); + basic_struct(); + packed_struct(); + custom_alignment(); + named_field(); + pointer_field(); + nested_pointer_field(); + named_pointer_field(); + union(); + enumeration(); +}