From 4d7e4a36e417cc9652dfaef6134fb28413074525 Mon Sep 17 00:00:00 2001 From: Adrian Taylor Date: Thu, 20 Feb 2025 08:19:03 +0000 Subject: [PATCH 1/3] Deduplicate logic to call discovery callback. No functional change - just deduplicating the logic which calls this callback, which will make it easier to make further changes in future. Part of https://github.com/google/autocxx/issues/124 --- bindgen/codegen/mod.rs | 108 ++++++++++++++++++++--------------------- 1 file changed, 52 insertions(+), 56 deletions(-) diff --git a/bindgen/codegen/mod.rs b/bindgen/codegen/mod.rs index 433fc85f01..6416dfe4d2 100644 --- a/bindgen/codegen/mod.rs +++ b/bindgen/codegen/mod.rs @@ -995,16 +995,13 @@ impl CodeGenerator for Type { let rust_name = ctx.rust_ident(&name); - ctx.options().for_each_callback(|cb| { - cb.new_item_found( - DiscoveredItemId::new(item.id().as_usize()), - DiscoveredItem::Alias { - alias_name: rust_name.to_string(), - alias_for: DiscoveredItemId::new( - inner_item.id().as_usize(), - ), - }, - ); + utils::call_discovered_item_callback(ctx, item, || { + DiscoveredItem::Alias { + alias_name: rust_name.to_string(), + alias_for: DiscoveredItemId::new( + inner_item.id().as_usize(), + ), + } }); let mut tokens = if let Some(comment) = item.comment(ctx) { @@ -2480,28 +2477,23 @@ impl CodeGenerator for CompInfo { let is_rust_union = is_union && struct_layout.is_rust_union(); - let discovered_id = DiscoveredItemId::new(item.id().as_usize()); - ctx.options().for_each_callback(|cb| { - let discovered_item = match self.kind() { - CompKind::Struct => DiscoveredItem::Struct { - original_name: item - .kind() - .expect_type() - .name() - .map(String::from), - final_name: canonical_ident.to_string(), - }, - CompKind::Union => DiscoveredItem::Union { - original_name: item - .kind() - .expect_type() - .name() - .map(String::from), - final_name: canonical_ident.to_string(), - }, - }; - - cb.new_item_found(discovered_id, discovered_item); + utils::call_discovered_item_callback(ctx, item, || match self.kind() { + CompKind::Struct => DiscoveredItem::Struct { + original_name: item + .kind() + .expect_type() + .name() + .map(String::from), + final_name: canonical_ident.to_string(), + }, + CompKind::Union => DiscoveredItem::Union { + original_name: item + .kind() + .expect_type() + .name() + .map(String::from), + final_name: canonical_ident.to_string(), + }, }); // The custom derives callback may return a list of derive attributes; @@ -2699,6 +2691,7 @@ impl CodeGenerator for CompInfo { } let mut method_names = Default::default(); + let discovered_id = DiscoveredItemId::new(item.id().as_usize()); if ctx.options().codegen_config.methods() { for method in self.methods() { assert_ne!(method.kind(), MethodKind::Constructor); @@ -3020,7 +3013,6 @@ impl Method { // First of all, output the actual function. let function_item = ctx.resolve_item(self.signature()); - let id = DiscoveredItemId::new(function_item.id().as_usize()); if !function_item.process_before_codegen(ctx, result) { return; } @@ -3067,14 +3059,11 @@ impl Method { method_names.insert(name.clone()); - ctx.options().for_each_callback(|cb| { - cb.new_item_found( - id, - DiscoveredItem::Method { - parent: parent_id, - final_name: name.clone(), - }, - ); + utils::call_discovered_item_callback(ctx, function_item, || { + DiscoveredItem::Method { + parent: parent_id, + final_name: name.clone(), + } }); let mut function_name = function_item.canonical_name(ctx); @@ -3804,13 +3793,10 @@ impl CodeGenerator for Enum { let repr = repr.to_rust_ty_or_opaque(ctx, item); let has_typedef = ctx.is_enum_typedef_combo(item.id()); - ctx.options().for_each_callback(|cb| { - cb.new_item_found( - DiscoveredItemId::new(item.id().as_usize()), - DiscoveredItem::Enum { - final_name: name.to_string(), - }, - ); + utils::call_discovered_item_callback(ctx, item, || { + DiscoveredItem::Enum { + final_name: name.to_string(), + } }); let mut builder = EnumBuilder::new( @@ -4595,7 +4581,6 @@ impl CodeGenerator for Function { ) -> Self::Return { debug!("::codegen: item = {item:?}"); debug_assert!(item.is_enabled_for_codegen(ctx)); - let id = DiscoveredItemId::new(item.id().as_usize()); let is_internal = matches!(self.linkage(), Linkage::Internal); @@ -4712,13 +4697,10 @@ impl CodeGenerator for Function { if times_seen > 0 { write!(&mut canonical_name, "{times_seen}").unwrap(); } - ctx.options().for_each_callback(|cb| { - cb.new_item_found( - id, - DiscoveredItem::Function { - final_name: canonical_name.to_string(), - }, - ); + utils::call_discovered_item_callback(ctx, item, || { + DiscoveredItem::Function { + final_name: canonical_name.to_string(), + } }); let link_name_attr = self.link_name().or_else(|| { @@ -5259,6 +5241,7 @@ pub(crate) mod utils { use super::helpers::BITFIELD_UNIT; use super::serialize::CSerialize; use super::{error, CodegenError, CodegenResult, ToRustTyOrOpaque}; + use crate::callbacks::DiscoveredItemId; use crate::ir::context::BindgenContext; use crate::ir::context::TypeId; use crate::ir::function::{Abi, ClangAbi, FunctionSig}; @@ -5988,4 +5971,17 @@ pub(crate) mod utils { true } + + pub(super) fn call_discovered_item_callback( + ctx: &BindgenContext, + item: &Item, + discovered_item_creator: impl Fn() -> crate::callbacks::DiscoveredItem, + ) { + ctx.options().for_each_callback(|cb| { + cb.new_item_found( + DiscoveredItemId::new(item.id().as_usize()), + discovered_item_creator(), + ); + }); + } } From 65ad89a5b47135fc21df5f060baa1e318004485d Mon Sep 17 00:00:00 2001 From: Adrian Taylor Date: Wed, 19 Feb 2025 16:54:05 +0000 Subject: [PATCH 2/3] Include source location in callback. This adds more information to ParseCallbacks which indicates the location in the original source code at which a given item was found. This has proven to be useful in downstream code generators in providing diagnostics to explain why a given item can't be represented in Rust. (There are lots of reasons why this might not be the case - autocxx has around 100 which can be found here - https://github.com/google/autocxx/blob/d85eac76c9b3089d0d86249e857ff0e8c36b988f/engine/src/conversion/convert_error.rs#L39 - but irrespective of the specific reasons, it's useful to be able to point to the original location when emitting diagnostics). Should we make this a new callback or include this information within the existing callback? Pros of making it a new callback: * No compatibility breakage. Pros of including it in this existing callback: * No need to specify and test a policy about whether such callbacks always happen together, or may arrive individually * Easier for recipients (including bindgen's own test suite) to keep track of the source code location received. * Because we add new items to the DiscoveryItem enum anyway, we seem to have accepted it's OK to break compatibility in this callback (for now at least). Therefore I'm adding it as a parameter to the existing callback. If it's deemed acceptable to break compatibility in this way, I will follow the same thought process for some other changes too. Part of https://github.com/google/autocxx/issues/124. --- .../item_discovery_callback/mod.rs | 284 +++++++++++++----- bindgen/callbacks.rs | 16 +- bindgen/codegen/mod.rs | 11 + 3 files changed, 236 insertions(+), 75 deletions(-) diff --git a/bindgen-tests/tests/parse_callbacks/item_discovery_callback/mod.rs b/bindgen-tests/tests/parse_callbacks/item_discovery_callback/mod.rs index 6a6d0149f5..8873ecede9 100644 --- a/bindgen-tests/tests/parse_callbacks/item_discovery_callback/mod.rs +++ b/bindgen-tests/tests/parse_callbacks/item_discovery_callback/mod.rs @@ -4,23 +4,57 @@ use std::rc::Rc; use regex::Regex; -use bindgen::callbacks::{DiscoveredItem, DiscoveredItemId, ParseCallbacks}; +use bindgen::callbacks::{ + DiscoveredItem, DiscoveredItemId, ParseCallbacks, SourceLocation, +}; use bindgen::Builder; #[derive(Debug, Default)] struct ItemDiscovery(Rc>); -pub type ItemCache = HashMap; +pub type ItemCache = HashMap; + +#[derive(Debug)] +pub struct DiscoveredInformation(DiscoveredItem, Option); impl ParseCallbacks for ItemDiscovery { - fn new_item_found(&self, _id: DiscoveredItemId, _item: DiscoveredItem) { - self.0.borrow_mut().insert(_id, _item); + fn new_item_found( + &self, + id: DiscoveredItemId, + item: DiscoveredItem, + source_location: Option<&SourceLocation>, + ) { + self.0 + .borrow_mut() + .insert(id, DiscoveredInformation(item, source_location.cloned())); } } +#[derive(Debug)] +pub struct ItemExpectations { + item: DiscoveredItem, + source_location: Option<(usize, usize, usize)>, +} + +impl ItemExpectations { + fn new( + item: DiscoveredItem, + line: usize, + col: usize, + byte_offset: usize, + ) -> Self { + Self { + item, + source_location: Some((line, col, byte_offset)), + } + } +} + +type ExpectationMap = HashMap; + fn test_item_discovery_callback( header: &str, - expected: &HashMap, + expected: &HashMap, ) { let discovery = ItemDiscovery::default(); let info = Rc::clone(&discovery.0); @@ -34,72 +68,117 @@ fn test_item_discovery_callback( .generate() .expect("TODO: panic message"); - compare_item_caches(&info.borrow(), expected); + compare_item_caches(&info.borrow(), expected, header); } #[test] fn test_item_discovery_callback_c() { - let expected = ItemCache::from([ + let expected = ExpectationMap::from([ ( DiscoveredItemId::new(10), - DiscoveredItem::Struct { - original_name: Some("NamedStruct".to_string()), - final_name: "NamedStruct".to_string(), - }, + ItemExpectations::new( + DiscoveredItem::Struct { + original_name: Some("NamedStruct".to_string()), + final_name: "NamedStruct".to_string(), + }, + 4, + 8, + 73, + ), ), ( DiscoveredItemId::new(11), - DiscoveredItem::Alias { - alias_name: "AliasOfNamedStruct".to_string(), - alias_for: DiscoveredItemId::new(10), - }, + ItemExpectations::new( + DiscoveredItem::Alias { + alias_name: "AliasOfNamedStruct".to_string(), + alias_for: DiscoveredItemId::new(10), + }, + 7, + 28, + 118, + ), ), ( DiscoveredItemId::new(20), - DiscoveredItem::Union { - original_name: Some("NamedUnion".to_string()), - final_name: "NamedUnion".to_string(), - }, + ItemExpectations::new( + DiscoveredItem::Union { + original_name: Some("NamedUnion".to_string()), + final_name: "NamedUnion".to_string(), + }, + 13, + 7, + 209, + ), ), ( DiscoveredItemId::new(21), - DiscoveredItem::Alias { - alias_name: "AliasOfNamedUnion".to_string(), - alias_for: DiscoveredItemId::new(20), - }, + ItemExpectations::new( + DiscoveredItem::Alias { + alias_name: "AliasOfNamedUnion".to_string(), + alias_for: DiscoveredItemId::new(20), + }, + 16, + 26, + 251, + ), ), ( DiscoveredItemId::new(27), - DiscoveredItem::Alias { - alias_name: "AliasOfNamedEnum".to_string(), - alias_for: DiscoveredItemId::new(24), - }, + ItemExpectations::new( + DiscoveredItem::Alias { + alias_name: "AliasOfNamedEnum".to_string(), + alias_for: DiscoveredItemId::new(24), + }, + 28, + 24, + 515, + ), ), ( DiscoveredItemId::new(24), - DiscoveredItem::Enum { - final_name: "NamedEnum".to_string(), - }, + ItemExpectations::new( + DiscoveredItem::Enum { + final_name: "NamedEnum".to_string(), + }, + 24, + 6, + 466, + ), ), ( DiscoveredItemId::new(30), - DiscoveredItem::Struct { - original_name: None, - final_name: "_bindgen_ty_*".to_string(), - }, + ItemExpectations::new( + DiscoveredItem::Struct { + original_name: None, + final_name: "_bindgen_ty_*".to_string(), + }, + 2, + 38, + 48, + ), ), ( DiscoveredItemId::new(40), - DiscoveredItem::Union { - original_name: None, - final_name: "_bindgen_ty_*".to_string(), - }, + ItemExpectations::new( + DiscoveredItem::Union { + original_name: None, + final_name: "_bindgen_ty_*".to_string(), + }, + 11, + 37, + 186, + ), ), ( DiscoveredItemId::new(41), - DiscoveredItem::Function { - final_name: "named_function".to_string(), - }, + ItemExpectations::new( + DiscoveredItem::Function { + final_name: "named_function".to_string(), + }, + 32, + 6, + 553, + ), ), ]); test_item_discovery_callback( @@ -108,40 +187,58 @@ fn test_item_discovery_callback_c() { #[test] fn test_item_discovery_callback_cpp() { - let expected = ItemCache::from([ + let expected = ExpectationMap::from([ ( DiscoveredItemId::new(1), - DiscoveredItem::Struct { - original_name: Some("SomeClass".to_string()), - final_name: "SomeClass".to_string(), - }, + ItemExpectations::new( + DiscoveredItem::Struct { + original_name: Some("SomeClass".to_string()), + final_name: "SomeClass".to_string(), + }, + 3, + 7, + 18, + ), ), ( DiscoveredItemId::new(2), - DiscoveredItem::Method { - final_name: "named_method".to_string(), - parent: DiscoveredItemId::new(1), - }, + ItemExpectations::new( + DiscoveredItem::Method { + final_name: "named_method".to_string(), + parent: DiscoveredItemId::new(1), + }, + 5, + 10, + 47, + ), ), ]); test_item_discovery_callback( "/tests/parse_callbacks/item_discovery_callback/header_item_discovery.hpp", &expected); } -pub fn compare_item_caches(generated: &ItemCache, expected: &ItemCache) { +fn compare_item_caches( + generated: &ItemCache, + expected: &ExpectationMap, + expected_filename: &str, +) { // We can't use a simple Eq::eq comparison because of two reasons: // - anonymous structs/unions will have a final name generated by bindgen which may change // if the header file or the bindgen logic is altered // - aliases have a DiscoveredItemId that we can't directly compare for the same instability reasons for expected_item in expected.values() { - let found = generated.iter().find(|(_generated_id, generated_item)| { - compare_item_info( - expected_item, - generated_item, - expected, - generated, - ) - }); + let found = + generated + .iter() + .find(|(_generated_id, generated_item)| { + compare_item_info( + expected_item, + generated_item, + expected, + generated, + expected_filename, + ) + }); assert!( found.is_some(), @@ -151,40 +248,72 @@ pub fn compare_item_caches(generated: &ItemCache, expected: &ItemCache) { } fn compare_item_info( - expected_item: &DiscoveredItem, - generated_item: &DiscoveredItem, - expected: &ItemCache, + expected_item: &ItemExpectations, + generated_item: &DiscoveredInformation, + expected: &ExpectationMap, generated: &ItemCache, + expected_filename: &str, ) -> bool { - if std::mem::discriminant(expected_item) != - std::mem::discriminant(generated_item) + if std::mem::discriminant(&expected_item.item) + != std::mem::discriminant(&generated_item.0) { return false; } - match generated_item { + let is_a_match = match generated_item.0 { DiscoveredItem::Struct { .. } => { - compare_struct_info(expected_item, generated_item) + compare_struct_info(&expected_item.item, &generated_item.0) } DiscoveredItem::Union { .. } => { - compare_union_info(expected_item, generated_item) + compare_union_info(&expected_item.item, &generated_item.0) } DiscoveredItem::Alias { .. } => compare_alias_info( - expected_item, - generated_item, + &expected_item.item, + &generated_item.0, expected, generated, + expected_filename, ), DiscoveredItem::Enum { .. } => { - compare_enum_info(expected_item, generated_item) + compare_enum_info(&expected_item.item, &generated_item.0) } DiscoveredItem::Function { .. } => { - compare_function_info(expected_item, generated_item) + compare_function_info(&expected_item.item, &generated_item.0) } DiscoveredItem::Method { .. } => { - compare_method_info(expected_item, generated_item) + compare_method_info(&expected_item.item, &generated_item.0) + } + }; + + if is_a_match { + // Compare source location + assert!( + generated_item.1.is_some() + == expected_item.source_location.is_some(), + "No source location provided when one was expected" + ); + if let Some(generated_location) = generated_item.1.as_ref() { + let expected_location = expected_item.source_location.unwrap(); + assert!( + generated_location + .file_name + .as_ref() + .expect("No filename provided") + .ends_with(expected_filename), + "Filename differed" + ); + assert_eq!( + ( + generated_location.line, + generated_location.col, + generated_location.byte_offset + ), + expected_location, + "Line/col/offsets differ" + ); } } + is_a_match } pub fn compare_names(expected_name: &str, generated_name: &str) -> bool { @@ -288,8 +417,9 @@ pub fn compare_enum_info( pub fn compare_alias_info( expected_item: &DiscoveredItem, generated_item: &DiscoveredItem, - expected: &ItemCache, + expected: &ExpectationMap, generated: &ItemCache, + expected_filename: &str, ) -> bool { let DiscoveredItem::Alias { alias_name: expected_alias_name, @@ -319,7 +449,13 @@ pub fn compare_alias_info( return false; }; - compare_item_info(expected_aliased, generated_aliased, expected, generated) + compare_item_info( + expected_aliased, + generated_aliased, + expected, + generated, + expected_filename, + ) } pub fn compare_function_info( diff --git a/bindgen/callbacks.rs b/bindgen/callbacks.rs index 8ad06b3375..ea53f0fbb0 100644 --- a/bindgen/callbacks.rs +++ b/bindgen/callbacks.rs @@ -169,7 +169,7 @@ pub trait ParseCallbacks: fmt::Debug { } /// This will get called everytime an item (currently struct, union, and alias) is found with some information about it - fn new_item_found(&self, _id: DiscoveredItemId, _item: DiscoveredItem) {} + fn new_item_found(&self, _id: DiscoveredItemId, _item: DiscoveredItem, _source_location: Option<&SourceLocation>) {} // TODO add callback for ResolvedTypeRef } @@ -315,3 +315,17 @@ pub struct FieldInfo<'a> { /// The name of the type of the field. pub field_type_name: Option<&'a str>, } + +/// Location in the source code. Roughly equivalent to the same type +/// within `clang_sys`. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct SourceLocation { + /// Line number. + pub line: usize, + /// Column number within line. + pub col: usize, + /// Byte offset within file. + pub byte_offset: usize, + /// Filename, if known. + pub file_name: Option, +} diff --git a/bindgen/codegen/mod.rs b/bindgen/codegen/mod.rs index 6416dfe4d2..41d77f4fd7 100644 --- a/bindgen/codegen/mod.rs +++ b/bindgen/codegen/mod.rs @@ -5977,10 +5977,21 @@ pub(crate) mod utils { item: &Item, discovered_item_creator: impl Fn() -> crate::callbacks::DiscoveredItem, ) { + let source_location = item.location().map(|clang_location| { + let (file, line, col, byte_offset) = clang_location.location(); + let file_name = file.name(); + crate::callbacks::SourceLocation { + line, + col, + byte_offset, + file_name, + } + }); ctx.options().for_each_callback(|cb| { cb.new_item_found( DiscoveredItemId::new(item.id().as_usize()), discovered_item_creator(), + source_location.as_ref(), ); }); } From 73c626c9f2c2cf98ecce1773a849b3d85b12344e Mon Sep 17 00:00:00 2001 From: Adrian Taylor Date: Thu, 20 Feb 2025 13:09:01 +0000 Subject: [PATCH 3/3] Clippy formatting fixes. --- .../item_discovery_callback/mod.rs | 29 +++++++++---------- bindgen/callbacks.rs | 8 ++++- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/bindgen-tests/tests/parse_callbacks/item_discovery_callback/mod.rs b/bindgen-tests/tests/parse_callbacks/item_discovery_callback/mod.rs index 8873ecede9..a9a9523895 100644 --- a/bindgen-tests/tests/parse_callbacks/item_discovery_callback/mod.rs +++ b/bindgen-tests/tests/parse_callbacks/item_discovery_callback/mod.rs @@ -227,18 +227,15 @@ fn compare_item_caches( // if the header file or the bindgen logic is altered // - aliases have a DiscoveredItemId that we can't directly compare for the same instability reasons for expected_item in expected.values() { - let found = - generated - .iter() - .find(|(_generated_id, generated_item)| { - compare_item_info( - expected_item, - generated_item, - expected, - generated, - expected_filename, - ) - }); + let found = generated.iter().find(|(_generated_id, generated_item)| { + compare_item_info( + expected_item, + generated_item, + expected, + generated, + expected_filename, + ) + }); assert!( found.is_some(), @@ -254,8 +251,8 @@ fn compare_item_info( generated: &ItemCache, expected_filename: &str, ) -> bool { - if std::mem::discriminant(&expected_item.item) - != std::mem::discriminant(&generated_item.0) + if std::mem::discriminant(&expected_item.item) != + std::mem::discriminant(&generated_item.0) { return false; } @@ -288,8 +285,8 @@ fn compare_item_info( if is_a_match { // Compare source location assert!( - generated_item.1.is_some() - == expected_item.source_location.is_some(), + generated_item.1.is_some() == + expected_item.source_location.is_some(), "No source location provided when one was expected" ); if let Some(generated_location) = generated_item.1.as_ref() { diff --git a/bindgen/callbacks.rs b/bindgen/callbacks.rs index ea53f0fbb0..c8ac9a5e15 100644 --- a/bindgen/callbacks.rs +++ b/bindgen/callbacks.rs @@ -169,7 +169,13 @@ pub trait ParseCallbacks: fmt::Debug { } /// This will get called everytime an item (currently struct, union, and alias) is found with some information about it - fn new_item_found(&self, _id: DiscoveredItemId, _item: DiscoveredItem, _source_location: Option<&SourceLocation>) {} + fn new_item_found( + &self, + _id: DiscoveredItemId, + _item: DiscoveredItem, + _source_location: Option<&SourceLocation>, + ) { + } // TODO add callback for ResolvedTypeRef }