From 187336525f397bb5356f539a8f783644a44f79db Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Wed, 2 Jul 2025 19:32:03 +0300 Subject: [PATCH 1/3] builder: more robust `format_args!` "decompiling". --- .../src/builder/builder_methods.rs | 102 ++++++++++-------- 1 file changed, 55 insertions(+), 47 deletions(-) diff --git a/crates/rustc_codegen_spirv/src/builder/builder_methods.rs b/crates/rustc_codegen_spirv/src/builder/builder_methods.rs index 84d6f17f13..eab6fac90f 100644 --- a/crates/rustc_codegen_spirv/src/builder/builder_methods.rs +++ b/crates/rustc_codegen_spirv/src/builder/builder_methods.rs @@ -3399,15 +3399,18 @@ impl<'a, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'tcx> { .map(|operand| operand.id_ref_any()) .collect::>>()?; + let const_as_u32 = |id| match self.builder.lookup_const_by_id(id)? { + SpirvConst::Scalar(x) => u32::try_from(x).ok(), + _ => None, + }; + // Decode the instruction into one of our `Inst`s. Some( match (inst.class.opcode, inst.result_id, &id_operands[..]) { (Op::Bitcast, Some(r), &[x]) => Inst::Bitcast(r, x), (Op::InBoundsAccessChain, Some(r), &[p, i]) => { - if let Some(SpirvConst::Scalar(i)) = - self.builder.lookup_const_by_id(i) - { - Inst::InBoundsAccessChain(r, p, i as u32) + if let Some(i) = const_as_u32(i) { + Inst::InBoundsAccessChain(r, p, i) } else { Inst::Unsupported(inst.class.opcode) } @@ -3494,47 +3497,65 @@ impl<'a, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'tcx> { if rt_args_count > 0 { let rt_args_array_ptr_id = rt_args_slice_ptr_id.unwrap(); - // Each runtime argument has 4 instructions to call one of + // Each runtime argument has A instructions to call one of // the `fmt::rt::Argument::new_*` functions (and temporarily - // store its result), and 4 instructions to copy it into - // the appropriate slot in the array. The groups of 4 and 4 - // instructions, for all runtime args, are each separate. - let copies_to_rt_args_array = - try_rev_take(rt_args_count * 4).ok_or_else(|| { + // store its result), and B instructions to copy it into + // the appropriate slot in the array. The groups of A and B + // instructions, for all runtime args, are each separate, + // so the B×N later instructions are all processed first, + // before moving (backwards) to the A×N earlier instructions. + + let rev_copies_to_rt_args_array_src_ptrs: SmallVec<[_; 4]> = + (0..rt_args_count).rev().map(|rt_arg_idx| { + let copy_to_rt_args_array_insts = try_rev_take(4).ok_or_else(|| { FormatArgsNotRecognized( - "[fmt::rt::Argument; N] copies: ran out of instructions".into(), + "[fmt::rt::Argument; N] copy: ran out of instructions".into(), ) })?; - let copies_to_rt_args_array = copies_to_rt_args_array.chunks(4); - let rt_arg_new_calls = try_rev_take(rt_args_count * 4).ok_or_else(|| { - FormatArgsNotRecognized( - "fmt::rt::Argument::new calls: ran out of instructions".into(), - ) - })?; - let rt_arg_new_calls = rt_arg_new_calls.chunks(4); + match copy_to_rt_args_array_insts[..] { + [ + Inst::InBoundsAccessChain(array_slot, array_base, array_idx), + Inst::InBoundsAccessChain(dst_field_ptr, dst_base_ptr, 0), + Inst::InBoundsAccessChain(src_field_ptr, src_base_ptr, 0), + Inst::CopyMemory(copy_dst, copy_src), + ] if array_base == rt_args_array_ptr_id + && array_idx as usize == rt_arg_idx + && dst_base_ptr == array_slot + && (copy_dst, copy_src) == (dst_field_ptr, src_field_ptr) => + { + Ok(src_base_ptr) + } + _ => { + Err(FormatArgsNotRecognized(format!( + "[fmt::rt::Argument; N] copy sequence ({copy_to_rt_args_array_insts:?})" + ))) + } + } + }).collect::>()?; - for (rt_arg_idx, (rt_arg_new_call_insts, copy_to_rt_args_array_insts)) in - rt_arg_new_calls.zip(copies_to_rt_args_array).enumerate() - { - let call_ret_slot_ptr = match rt_arg_new_call_insts[..] { + let rev_ref_arg_ids_with_ty_and_spec = (rev_copies_to_rt_args_array_src_ptrs + .into_iter()) + .map(|copy_to_rt_args_array_src_ptr| { + let rt_arg_new_call_insts = try_rev_take(4).ok_or_else(|| { + FormatArgsNotRecognized( + "fmt::rt::Argument::new call: ran out of instructions".into(), + ) + })?; + match rt_arg_new_call_insts[..] { [ Inst::Call(call_ret_id, callee_id, ref call_args), Inst::InBoundsAccessChain(tmp_slot_field_ptr, tmp_slot_ptr, 0), Inst::CompositeExtract(field, wrapper_newtype, 0), Inst::Store(st_dst_ptr, st_val), ] if wrapper_newtype == call_ret_id + && tmp_slot_ptr == copy_to_rt_args_array_src_ptr && (st_dst_ptr, st_val) == (tmp_slot_field_ptr, field) => { self.fmt_rt_arg_new_fn_ids_to_ty_and_spec .borrow() .get(&callee_id) .and_then(|&(ty, spec)| match call_args[..] { - [x] => { - decoded_format_args - .ref_arg_ids_with_ty_and_spec - .push((x, ty, spec)); - Some(tmp_slot_ptr) - } + [x] => Some((x, ty, spec)), _ => None, }) } @@ -3544,26 +3565,13 @@ impl<'a, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'tcx> { FormatArgsNotRecognized(format!( "fmt::rt::Argument::new call sequence ({rt_arg_new_call_insts:?})" )) - })?; + }) + }) + .collect::>()?; - match copy_to_rt_args_array_insts[..] { - [ - Inst::InBoundsAccessChain(array_slot, array_base, array_idx), - Inst::InBoundsAccessChain(dst_field_ptr, dst_base_ptr, 0), - Inst::InBoundsAccessChain(src_field_ptr, src_base_ptr, 0), - Inst::CopyMemory(copy_dst, copy_src), - ] if array_base == rt_args_array_ptr_id - && array_idx as usize == rt_arg_idx - && dst_base_ptr == array_slot - && src_base_ptr == call_ret_slot_ptr - && (copy_dst, copy_src) == (dst_field_ptr, src_field_ptr) => {} - _ => { - return Err(FormatArgsNotRecognized(format!( - "[fmt::rt::Argument; N] copies sequence ({copy_to_rt_args_array_insts:?})" - ))); - } - } - } + decoded_format_args.ref_arg_ids_with_ty_and_spec = + rev_ref_arg_ids_with_ty_and_spec; + decoded_format_args.ref_arg_ids_with_ty_and_spec.reverse(); } // If the `pieces: &[&str]` slice needs a bitcast, it'll be here. From 8a4f37afcb712f70d56037f7d9d35941032bdee5 Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 29 Oct 2024 13:21:31 +0200 Subject: [PATCH 2/3] builder: force `format_args!`-related warnings to be always shown. --- .../src/builder/builder_methods.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/crates/rustc_codegen_spirv/src/builder/builder_methods.rs b/crates/rustc_codegen_spirv/src/builder/builder_methods.rs index eab6fac90f..da3be309a5 100644 --- a/crates/rustc_codegen_spirv/src/builder/builder_methods.rs +++ b/crates/rustc_codegen_spirv/src/builder/builder_methods.rs @@ -3664,7 +3664,16 @@ impl<'a, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'tcx> { Err(FormatArgsNotRecognized(step)) => { if let Some(current_span) = self.current_span { - let mut warn = self.tcx.dcx().struct_span_warn( + // HACK(eddyb) Cargo silences warnings in dependencies. + let force_warn = |span, msg| -> rustc_errors::Diag<'_, ()> { + rustc_errors::Diag::new( + self.tcx.dcx(), + rustc_errors::Level::ForceWarning(None), + msg, + ) + .with_span(span) + }; + let mut warn = force_warn( current_span, "failed to find and remove `format_args!` construction for this `panic!`", ); From b951fa8f43552f1717a23fa7d9865ec120f7c31c Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Mon, 7 Jul 2025 11:10:05 +0300 Subject: [PATCH 3/3] builder: handle `fmt::Arguments::new_v1_formatted` (for complex `format_args!`). --- .../src/builder/builder_methods.rs | 248 ++++++++++++++---- .../src/codegen_cx/declare.rs | 6 + 2 files changed, 204 insertions(+), 50 deletions(-) diff --git a/crates/rustc_codegen_spirv/src/builder/builder_methods.rs b/crates/rustc_codegen_spirv/src/builder/builder_methods.rs index da3be309a5..5521aa973c 100644 --- a/crates/rustc_codegen_spirv/src/builder/builder_methods.rs +++ b/crates/rustc_codegen_spirv/src/builder/builder_methods.rs @@ -7,7 +7,7 @@ use crate::builder_spirv::{BuilderCursor, SpirvConst, SpirvValue, SpirvValueExt, use crate::codegen_cx::CodegenCx; use crate::custom_insts::{CustomInst, CustomOp}; use crate::spirv_type::SpirvType; -use itertools::Itertools; +use itertools::{Either, Itertools}; use rspirv::dr::{InsertPoint, Instruction, Operand}; use rspirv::spirv::{Capability, MemoryModel, MemorySemantics, Op, Scope, StorageClass, Word}; use rustc_apfloat::{Float, Round, Status, ieee}; @@ -3230,6 +3230,16 @@ impl<'a, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'tcx> { /// * `&a` with `typeof a` and ' ', /// * `&b` with `typeof b` and 'x' ref_arg_ids_with_ty_and_spec: SmallVec<[(Word, Ty<'tcx>, char); 2]>, + + /// If `fmt::Arguments::new_v1_formatted` was used, this holds + /// the length of the `&[fmt::rt::Placeholder]` slice, which + /// currently cannot be directly supported, and therefore even + /// if all of `ref_arg_ids_with_ty_and_spec` are printable, + /// a much jankier fallback still has to be used, as it it were: + /// + /// `format!("a{{0}}b{{1}}c\n with {{…}} from: {}, {}", x, y)` + /// (w/ `const_pieces = ["a", "b", "c"]` & `ref_args = [&x, &y]`). + has_unknown_fmt_placeholder_to_args_mapping: Option, } struct FormatArgsNotRecognized(String); @@ -3455,6 +3465,105 @@ impl<'a, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'tcx> { }; match call_args[..] { + // `::new_v1_formatted` + // + // HACK(eddyb) this isn't fully supported, + // as that would require digging into unstable + // internals of `core::fmt::rt::Placeholder`s, + // but the whole call still needs to be removed, + // and both const str pieces and runtime args + // can still be printed (even if in jankier way). + [ + pieces_slice_ptr_id, + pieces_len_id, + rt_args_slice_ptr_id, + rt_args_len_id, + _fmt_placeholders_slice_ptr_id, + fmt_placeholders_len_id, + ] if (pieces_len, rt_args_count) == (!0, !0) => { + let [pieces_len, rt_args_len, fmt_placeholders_len] = match [ + pieces_len_id, + rt_args_len_id, + fmt_placeholders_len_id, + ] + .map(const_u32_as_usize) + { + [Some(a), Some(b), Some(c)] => [a, b, c], + _ => { + return Err(FormatArgsNotRecognized( + "fmt::Arguments::new_v1_formatted \ + with dynamic lengths" + .into(), + )); + } + }; + + // FIXME(eddyb) simplify the logic below after + // https://github.com/rust-lang/rust/pull/139131 + // (~1.88) as it makes `&[rt::Placeholder]` + // constant (via promotion to 'static). + + // HACK(eddyb) this accounts for all of these: + // - `rt::Placeholder` copies into array: 2 insts each + // - `rt::UnsafeArg::new()` call: 1 inst + // - runtime args array->slice ptr cast: 1 inst + // - placeholders array->slice ptr cast: 1 inst + let extra_insts = try_rev_take(3 + fmt_placeholders_len * 2).ok_or_else(|| { + FormatArgsNotRecognized( + "fmt::Arguments::new_v1_formatted call: ran out of instructions".into(), + ) + })?; + let rt_args_slice_ptr_id = match extra_insts[..] { + [.., Inst::Bitcast(out_id, in_id), Inst::Bitcast(..)] + if out_id == rt_args_slice_ptr_id => + { + in_id + } + _ => { + let mut insts = extra_insts; + insts.extend(fmt_args_new_call_insts); + return Err(FormatArgsNotRecognized(format!( + "fmt::Arguments::new_v1_formatted call sequence ({insts:?})", + ))); + } + }; + + // HACK(eddyb) even worse, each call made to + // `rt::Placeholder::new(...)` takes anywhere + // between 16 and 20 instructions each, due + // to `enum`s represented as scalar pairs. + for _ in 0..fmt_placeholders_len { + try_rev_take(16).and_then(|insts| { + let scalar_pairs_with_used_2nd_field = insts + .iter() + .take_while(|inst| { + !matches!(inst, Inst::Load(..)) + }) + .filter(|inst| { + matches!(inst, Inst::InBoundsAccessChain(.., 1)) + }) + .count(); + try_rev_take(scalar_pairs_with_used_2nd_field * 2)?; + Some(()) + }) + .ok_or_else(|| { + FormatArgsNotRecognized( + "fmt::rt::Placeholder::new call: ran out of instructions" + .into(), + ) + })?; + } + + decoded_format_args + .has_unknown_fmt_placeholder_to_args_mapping = + Some(fmt_placeholders_len); + + ( + (pieces_slice_ptr_id, pieces_len), + (Some(rt_args_slice_ptr_id), rt_args_len), + ) + } + // `::new_v1` [pieces_slice_ptr_id, rt_args_slice_ptr_id] => ( (pieces_slice_ptr_id, pieces_len), @@ -3608,58 +3717,97 @@ impl<'a, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'tcx> { let mut debug_printf_args = SmallVec::<[_; 2]>::new(); let message = match try_decode_and_remove_format_args() { Ok(DecodedFormatArgs { - const_pieces, + const_pieces: None, .. + }) => "".into(), + + Ok(DecodedFormatArgs { + const_pieces: Some(const_pieces), ref_arg_ids_with_ty_and_spec, + has_unknown_fmt_placeholder_to_args_mapping, }) => { - match const_pieces { - Some(const_pieces) => { - const_pieces - .into_iter() - .map(|s| Cow::Owned(s.replace('%', "%%"))) - .interleave(ref_arg_ids_with_ty_and_spec.iter().map( - |&(ref_id, ty, spec)| { - use rustc_target::abi::{ - Float::*, Integer::*, Primitive::*, - }; - - let layout = self.layout_of(ty); - - let scalar = match layout.backend_repr { - BackendRepr::Scalar(scalar) => Some(scalar.primitive()), - _ => None, - }; - let debug_printf_fmt = match (spec, scalar) { - // FIXME(eddyb) support more of these, - // potentially recursing to print ADTs. - (' ' | '?', Some(Int(I32, false))) => "%u", - ('x', Some(Int(I32, false))) => "%x", - (' ' | '?', Some(Int(I32, true))) => "%i", - (' ' | '?', Some(Float(F32))) => "%f", - - _ => "", - }; - - if debug_printf_fmt.is_empty() { - return Cow::Owned( - format!("{{/* unprintable {ty} */:{spec}}}") - .replace('%', "%%"), - ); - } + let args = ref_arg_ids_with_ty_and_spec + .iter() + .map(|&(ref_id, ty, spec)| { + use rustc_target::abi::{Float::*, Integer::*, Primitive::*}; - let spirv_type = layout.spirv_type(self.span(), self); - debug_printf_args.push( - self.emit() - .load(spirv_type, None, ref_id, None, []) - .unwrap() - .with_type(spirv_type), - ); - Cow::Borrowed(debug_printf_fmt) - }, - )) - .collect::() - } - None => "".into(), - } + let layout = self.layout_of(ty); + + let scalar = match layout.backend_repr { + BackendRepr::Scalar(scalar) => Some(scalar.primitive()), + _ => None, + }; + let debug_printf_fmt = match (spec, scalar) { + // FIXME(eddyb) support more of these, + // potentially recursing to print ADTs. + (' ' | '?', Some(Int(I32, false))) => "%u", + ('x', Some(Int(I32, false))) => "%x", + (' ' | '?', Some(Int(I32, true))) => "%i", + (' ' | '?', Some(Float(F32))) => "%f", + + _ => "", + }; + + if debug_printf_fmt.is_empty() { + return Cow::Owned( + format!("{{/* unprintable {ty} */:{spec}}}").replace('%', "%%"), + ); + } + + let spirv_type = layout.spirv_type(self.span(), self); + debug_printf_args.push( + self.emit() + .load(spirv_type, None, ref_id, None, []) + .unwrap() + .with_type(spirv_type), + ); + Cow::Borrowed(debug_printf_fmt) + }); + + // HACK(eddyb) due to `fmt::Arguments::new_v1_formatted`, + // we can't always assume that all the formatting arguments + // are used 1:1 as placeholders (i.e. between `const_pieces`). + let (placeholder_count, placeholders_are_args) = + match has_unknown_fmt_placeholder_to_args_mapping { + Some(count) => (count, false), + None => (args.len(), true), + }; + + // HACK(eddyb) extra sanity check to avoid visual mishaps. + let valid_placeholder_count = placeholder_count + .clamp(const_pieces.len().saturating_sub(1), const_pieces.len()); + let placeholders_are_args = + placeholders_are_args && placeholder_count == valid_placeholder_count; + + // FIXME(eddyb) stop using `itertools`'s `intersperse`, + // when it gets stabilized on `Iterator` instead. + #[allow(unstable_name_collisions)] + let (placeholders, suffix) = if placeholders_are_args { + (Either::Left(args), None) + } else { + // See also `has_unknown_fmt_placeholder_to_args_mapping` + // comment (which has an example for 3 pieces and 2 args). + // + // FIXME(eddyb) this could definitely be improved, but + // so far this only really gets hit in esoteric `core` + // internals (UB checks and `char::encode_utf{8,16}`). + ( + Either::Right( + (0..valid_placeholder_count).map(|i| format!("{{{i}}}").into()), + ), + Some( + ["\n with {…} from: ".into()] + .into_iter() + .chain(args.intersperse(", ".into())), + ), + ) + }; + + const_pieces + .into_iter() + .map(|s| Cow::Owned(s.replace('%', "%%"))) + .interleave(placeholders) + .chain(suffix.into_iter().flatten()) + .collect::() } Err(FormatArgsNotRecognized(step)) => { diff --git a/crates/rustc_codegen_spirv/src/codegen_cx/declare.rs b/crates/rustc_codegen_spirv/src/codegen_cx/declare.rs index 9ce6f8fc78..a01bcadaff 100644 --- a/crates/rustc_codegen_spirv/src/codegen_cx/declare.rs +++ b/crates/rustc_codegen_spirv/src/codegen_cx/declare.rs @@ -229,6 +229,12 @@ impl<'tcx> CodegenCx<'tcx> { (pieces_len.parse().unwrap(), rt_args_len.parse().unwrap()), ); } + if demangled_symbol_name == "::new_v1_formatted" { + // HACK(eddyb) `!0` used as a placeholder value to indicate "dynamic". + self.fmt_args_new_fn_ids + .borrow_mut() + .insert(fn_id, (!0, !0)); + } // HACK(eddyb) there is no good way to identify these definitions // (e.g. no `#[lang = "..."]` attribute), but this works well enough.