diff --git a/Cargo.lock b/Cargo.lock index 6a288eb..434ceda 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,32 +2,38 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "dot-writer" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2f7a508d3f95b7cb559acf2231c7efad02fe04061d3165b12513c2dbcc77af0" + [[package]] name = "once_cell" -version = "1.19.0" +version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" [[package]] name = "pin-project-lite" -version = "0.2.14" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" [[package]] name = "proc-macro2" -version = "1.0.85" +version = "1.0.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22244ce15aa966053a896d1accb3a6e68469b97c7f33f284b99f0d576879fc23" +checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.36" +version = "1.0.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" dependencies = [ "proc-macro2", ] @@ -36,14 +42,15 @@ dependencies = [ name = "smir_pretty" version = "0.1.0" dependencies = [ + "dot-writer", "tracing", ] [[package]] name = "syn" -version = "2.0.87" +version = "2.0.96" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" +checksum = "d5d0adab1ae378d7f53bdebc67a39f1f151407ef230f0ce2883572f5d8985c80" dependencies = [ "proc-macro2", "quote", @@ -52,9 +59,9 @@ dependencies = [ [[package]] name = "tracing" -version = "0.1.40" +version = "0.1.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" dependencies = [ "pin-project-lite", "tracing-attributes", @@ -63,9 +70,9 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.27" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" dependencies = [ "proc-macro2", "quote", @@ -74,15 +81,15 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.32" +version = "0.1.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" dependencies = [ "once_cell", ] [[package]] name = "unicode-ident" -version = "1.0.12" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" diff --git a/Cargo.toml b/Cargo.toml index ec09317..721edc2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +dot-writer = "0.1.4" tracing = "0.1" # serde = { version = "=1.0.202", features = ["derive"] } # serde_cbor = "0.11" diff --git a/README.md b/README.md index a3f06f7..c9e7853 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,14 @@ To generate stable MIR output without building a binary, you can invoke the tool ./run.sh -Z no-codegen ``` +There is experimental support for rendering the Stable-MIR items and their basic blocks as a +call graph in graphviz' dot format. + +To produce a dot file `*.smir.dot` (instead of `*.smir.json`), one can invoke the driver with +_first_ argument `--dot`. When using `--json` as the first argument, the `*.smir.json` file +will be written. Any other strings given as first argument will be passed to the compiler +(like all subsequent arguments). + There are a few environment variables that can be set to control the tools output: 1. `LINK_ITEMS` - add entries to the link-time `functions` map for each monomorphic item in the crate; diff --git a/src/lib.rs b/src/lib.rs index f17c342..86ca158 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,6 @@ #![feature(rustc_private)] pub mod driver; +pub mod mk_graph; pub mod printer; pub use driver::stable_mir_driver; pub use printer::*; diff --git a/src/main.rs b/src/main.rs index d1c9700..231bec1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,8 +4,23 @@ pub mod driver; pub mod printer; use driver::stable_mir_driver; use printer::emit_smir; +use smir_pretty::mk_graph::emit_dotfile; fn main() { - let args: Vec<_> = env::args().collect(); - stable_mir_driver(&args, emit_smir) + let mut args: Vec = env::args().collect(); + + match args.get(1) { + None => + stable_mir_driver(&args, emit_smir), // backward compatibility + Some(arg) if arg == "--json" => { + args.remove(1); + stable_mir_driver(&args, emit_smir) + } + Some(arg) if arg == "--dot" => { + args.remove(1); + stable_mir_driver(&args, emit_dotfile) + } + Some(_other) => + stable_mir_driver(&args, emit_smir), // backward compatibility + } } diff --git a/src/mk_graph.rs b/src/mk_graph.rs new file mode 100644 index 0000000..0411450 --- /dev/null +++ b/src/mk_graph.rs @@ -0,0 +1,331 @@ +use std::{collections::{HashMap, HashSet}, fs::File, hash::{DefaultHasher, Hash, Hasher}, io::{self, Write}}; + +use dot_writer::{Attributes, Color, DotWriter, Scope, Style}; + +extern crate rustc_middle; +use rustc_middle::ty::TyCtxt; + +extern crate stable_mir; +use rustc_session::config::{OutFileName, OutputType}; + +extern crate rustc_session; +use stable_mir::ty::Ty; +use stable_mir::mir::{ + BasicBlock, + ConstOperand, + Operand, + Place, + Statement, + TerminatorKind, + UnwindAction, +}; + +use crate::{printer::{FnSymType, SmirJson, collect_smir}, MonoItemKind}; + +// entry point to write the dot file +pub fn emit_dotfile(tcx: TyCtxt<'_>) { + + let smir_dot = collect_smir(tcx).to_dot_file(); + + match tcx.output_filenames(()).path(OutputType::Mir) { + OutFileName::Stdout => { + write!(io::stdout(), "{}", smir_dot).expect("Failed to write smir.dot"); + } + OutFileName::Real(path) => { + let mut b = + io::BufWriter::new( + File::create(&path.with_extension("smir.dot")) + .expect("Failed to create {path}.smir.dot output file")); + write!(b, "{}", smir_dot).expect("Failed to write smir.dot"); + } + } +} + +impl SmirJson<'_> { + + pub fn to_dot_file(self) -> String { + let mut bytes = Vec::new(); + + { + let mut writer = DotWriter::from(&mut bytes); + + writer.set_pretty_print(true); + + let mut graph = writer.digraph(); + graph.set_label(&self.name[..]); + + let func_map: HashMap = + self.functions + .into_iter() + .map(|(k,v)| (k.0, function_string(v))) + .collect(); + + let item_names: HashSet = + self.items + .iter() + .map(|i| i.symbol_name.clone()) + .collect(); + + // first create all nodes for functions not in the items list + for f in func_map.values() { + if ! item_names.contains(f) { + graph + .node_named(block_name(f, 0)) + .set_label(&name_lines(f)) + .set_color(Color::Red); + } + } + + for item in self.items { + match item.mono_item_kind { + MonoItemKind::MonoItemFn{ name, body, id: _} => { + let mut c = graph.cluster(); + c.set_label(&name_lines(&name)); + if is_unqualified(&name) { + c.set_style(Style::Filled); + c.set_color(Color::LightGrey); + } + + // Cannot define local functions that capture env. variables. Instead we define _closures_. + let process_block = |cluster:&mut Scope<'_,'_>, node_id: usize, b: &BasicBlock | { + let name = &item.symbol_name; + let this_block = block_name(name, node_id); + let mut n = cluster.node_named(&this_block); + // TODO: render statements and terminator as text label (with line breaks) + // switch on terminator kind, add inner and out-edges according to terminator + use TerminatorKind::*; + match &b.terminator.kind { + + Goto{target} => { + n.set_label("Goto"); + drop(n); // so we can borrow `cluster` again below + cluster.edge(&this_block, block_name(name, *target)); + }, + SwitchInt{discr:_, targets} => { + n.set_label("SwitchInt"); + drop(n); // so we can borrow `cluster` again below + for (d,t) in targets.clone().branches() { + cluster + .edge(&this_block, block_name(name, t)) + .attributes() + .set_label(&format!("{d}")); + }; + cluster + .edge(&this_block, block_name(name, targets.otherwise())) + .attributes() + .set_label("other"); + }, + Resume{} => { + n.set_label("Resume"); + }, + Abort{} => { + n.set_label("Abort"); + }, + Return{} => { + n.set_label("Return"); + }, + Unreachable{} => { + n.set_label("Unreachable"); + }, + TerminatorKind::Drop{place, target, unwind} => { + n.set_label(&format!("Drop {}", show_place(place))); + drop(n); + if let UnwindAction::Cleanup(t) = unwind { + cluster + .edge(&this_block, block_name(name, *t)) + .attributes() + .set_label("Cleanup"); + } + cluster + .edge(&this_block, block_name(name, *target)); + }, + Call{func: _, args: _, destination, target, unwind} => { + n.set_label(&format!("Call()")); + drop(n); + if let UnwindAction::Cleanup(t) = unwind { + cluster + .edge(&this_block, block_name(name, *t)) + .attributes() + .set_label("Cleanup"); + } + if let Some(t) = target { + let dest = show_place(destination); + cluster + .edge(&this_block, block_name(name, *t)) + .attributes() + .set_label(&dest); + } + + // The call edge has to be drawn outside the cluster, outside this function (cluster borrows &mut graph)! + // Code for that is therefore separated into its own second function below. + }, + Assert{target, ..} => { + n.set_label("Assert"); + drop(n); + cluster + .edge(&this_block, block_name(name, *target)); + }, + InlineAsm{destination, unwind,..} => { + n.set_label("Inline ASM"); + drop(n); + if let Some(t) = destination { + cluster + .edge(&this_block, block_name(name, *t)); + } + if let UnwindAction::Cleanup(t) = unwind { + cluster + .edge(&this_block, block_name(name, *t)) + .attributes() + .set_label("Cleanup"); + } + } + } + }; + + let process_blocks = |cluster:&mut Scope<'_,'_>, offset, blocks: &Vec| { + let mut n:usize = offset; + for b in blocks { + process_block(cluster, n, b); + n += 1; + } + }; + + match &body.len() { + 0 => { + c.node_auto().set_label(""); + }, + 1 => { + process_blocks(&mut c, 0, &body[0].blocks); + } + _more => { + let mut curr: usize = 0; + for b in &body { + let mut cc = c.cluster(); + process_blocks(&mut cc, curr, &b.blocks); + curr += b.blocks.len(); + } + } + } + drop(c); // so we can borrow graph again + + // call edges have to be added _outside_ the cluster of blocks for one function + // because they go between different clusters. Due to a scope/borrow issue, we have + // to make a 2nd pass over the bodies of the item. + let add_call_edges = | graph: &mut Scope<'_,'_>, offset: usize, bs: &Vec | { + for (i, b) in bs.iter().enumerate() { + let this_block = block_name(&item.symbol_name, offset + i); + + match &b.terminator.kind { + TerminatorKind::Call{func, args, ..} => { + let e = match func { + Operand::Constant(ConstOperand{const_, ..}) => { + if let Some(callee) = func_map.get(&const_.ty()) { + // callee node/body will be added when its body is added, missing ones added before + graph + .edge(&this_block, block_name(callee, 0)) + } else { + let unknown = format!("{}", const_.ty()); + // pathological case, could panic! instead. + // all unknown callees will be collapsed into one `unknown` node + graph + .edge(&this_block, unknown) + } + }, + Operand::Copy(place) => { + graph.edge(&this_block, format!("{}: {}", &this_block, show_place(place))) + }, + Operand::Move(place) => { + graph.edge(&this_block, format!("{}: {}", &this_block, show_place(place))) + }, + }; + let arg_str = args.into_iter().map(show_op).collect::>().join(","); + e.attributes().set_label(&arg_str); + + }, + _other => { + // nothing to do + }, + } + } + }; + + match &body.len() { + 0 => {}, + 1 => { + add_call_edges(&mut graph, 0, &body[0].blocks); + } + _more => { + let mut curr: usize = 0; + for b in &body { + add_call_edges(&mut graph, curr, &b.blocks); + curr += b.blocks.len(); + } + } + } + + } + MonoItemKind::MonoItemGlobalAsm { asm } => { + let mut n = graph.node_named(short_name(&asm)); + n.set_label(&asm.lines().collect::()[..]); + } + MonoItemKind::MonoItemStatic { name, id: _, allocation: _ } => { + let mut n = graph.node_named(short_name(&name)); + n.set_label(&name[..]); + } + } + + } + + } + + String::from_utf8(bytes).expect("Error converting dot file") + } + +} + +fn show_op(op: &Operand) -> String { + match op { + Operand::Constant(ConstOperand{const_, ..}) => format!("const :: {}", const_.ty()), + Operand::Copy(place) => show_place(place), + Operand::Move(place) => show_place(place), + } +} + +fn show_place(p: &Place) -> String { + format!("_{}{}", p.local, if p.projection.len() > 0 { "(...)"} else {""}) +} + +fn is_unqualified(name: &String) -> bool { + ! name.contains("::") +} + +fn function_string(f: FnSymType) -> String { + match f { + FnSymType::NormalSym(name) => name, + FnSymType::NoOpSym(name) => format!("NoOp: {name}"), + FnSymType::IntrinsicSym(name) => format!("Intr: {name}"), + } +} + +fn name_lines(name: &String) -> String { + name + .split_inclusive(" ") + .flat_map(|s| s.split_inclusive("::")) + .map(|s| s.to_string()) + .collect::>() + .join("\\n") +} + +/// consistently naming function clusters +fn short_name(function_name: &String) -> String { + let mut h = DefaultHasher::new(); + function_name.hash(&mut h); + format!("X{:x}", h.finish()) +} + +/// consistently naming block nodes in function clusters +fn block_name(function_name: &String, id: usize) -> String { + let mut h = DefaultHasher::new(); + function_name.hash(&mut h); + format!("X{:x}_{}", h.finish(), id) +} diff --git a/src/printer.rs b/src/printer.rs index 3207266..0db98ff 100644 --- a/src/printer.rs +++ b/src/printer.rs @@ -1,3 +1,4 @@ +use std::io::Write; use std::{collections::HashMap,fs::File,io,iter::Iterator,vec::Vec,str,}; extern crate rustc_middle; extern crate rustc_monomorphize; @@ -15,12 +16,12 @@ use rustc_middle::ty::{TyCtxt, Ty, TyKind, EarlyBinder, FnSig, GenericArgs, Type use rustc_session::config::{OutFileName, OutputType}; use rustc_span::{def_id::{DefId, LOCAL_CRATE}, symbol}; use rustc_smir::rustc_internal; -use stable_mir::{ +use stable_mir::{ CrateItem, CrateDef, ItemKind, - mir::{Body,LocalDecl,Terminator,TerminatorKind,Rvalue,visit::MirVisitor}, - ty::{Allocation,ForeignItemKind}, + mir::{Body,LocalDecl,Terminator,TerminatorKind,Rvalue,alloc::AllocId,visit::MirVisitor}, + ty::{Allocation,ConstDef,ForeignItemKind}, mir::mono::{MonoItem,Instance,InstanceKind} }; use serde::{Serialize, Serializer}; @@ -225,7 +226,7 @@ fn hash(obj: T) -> u64 { // ========================================================= #[derive(Serialize, Clone)] -enum MonoItemKind { +pub enum MonoItemKind { MonoItemFn { name: String, id: stable_mir::DefId, @@ -241,11 +242,11 @@ enum MonoItemKind { }, } #[derive(Serialize, Clone)] -struct Item { +pub struct Item { #[serde(skip)] mono_item: MonoItem, - symbol_name: String, - mono_item_kind: MonoItemKind, + pub symbol_name: String, + pub mono_item_kind: MonoItemKind, details: Option, } @@ -299,7 +300,7 @@ fn mk_item(tcx: TyCtxt<'_>, item: MonoItem, sym_name: String) -> Item { // ========================== #[derive(Debug, Clone, Serialize, PartialEq, Eq)] -enum FnSymType { +pub enum FnSymType { NoOpSym(String), IntrinsicSym(String), NormalSym(String), @@ -329,7 +330,7 @@ fn fn_inst_sym<'tcx>(tcx: TyCtxt<'tcx>, ty: Option, inst: Op } #[derive(Clone, Debug, Eq, Hash, PartialEq)] -struct LinkMapKey<'tcx>(stable_mir::ty::Ty, Option>); +pub struct LinkMapKey<'tcx>(pub stable_mir::ty::Ty, Option>); impl Serialize for LinkMapKey<'_> { fn serialize(&self, serializer: S) -> Result @@ -492,7 +493,7 @@ fn collect_ty(val_collector: &mut InternedValueCollector, val: stable_mir::ty::T Some(val.layout()) } }; - + let maybe_layout_shape = if let Some(Ok(layout)) = maybe_layout { Some(layout.shape()) } else { @@ -716,15 +717,36 @@ fn collect_items(tcx: TyCtxt<'_>) -> HashMap { }).collect::>() } +/// the serialised data structure as a whole +#[derive(Serialize)] +pub struct SmirJson<'t> { + pub name: String, + pub crate_id: u64, + pub allocs: Vec<(AllocId,AllocInfo)>, + pub functions: Vec<(LinkMapKey<'t>, FnSymType)>, + pub uneval_consts: Vec<(ConstDef, String)>, + pub items: Vec, + pub debug: Option> +} + +#[derive(Serialize)] +pub struct SmirJsonDebugInfo<'t> { + fn_sources: Vec<(LinkMapKey<'t>,ItemSource)>, + types: TyMap, + foreign_modules: Vec<(String, Vec)> +} + // Serialization Entrypoint // ======================== -fn emit_smir_internal(tcx: TyCtxt<'_>, writer: &mut dyn io::Write) { +pub fn collect_smir(tcx: TyCtxt<'_>) -> SmirJson { let local_crate = stable_mir::local_crate(); let items = collect_items(tcx); let items_clone = items.clone(); - let (unevaluated_consts, mut items) = collect_unevaluated_constant_items(tcx, items); - let (calls_map, visited_allocs, visited_tys) = collect_interned_values(tcx, items.iter().map(|i| &i.mono_item).collect::>()); + let (unevaluated_consts, mut items) = + collect_unevaluated_constant_items(tcx, items); + let (calls_map, visited_allocs, visited_tys) = + collect_interned_values(tcx, items.iter().map(|i| &i.mono_item).collect::>()); // FIXME: We dump extra static items here --- this should be handled better for (_, alloc) in visited_allocs.iter() { @@ -738,39 +760,48 @@ fn emit_smir_internal(tcx: TyCtxt<'_>, writer: &mut dyn io::Write) { } } - let called_functions = calls_map.iter().map(|(k,(_,name))| (k,name)).collect::>(); - let allocs = visited_allocs.iter().collect::>(); - let crate_id = tcx.stable_crate_id(LOCAL_CRATE).as_u64(); - let json_items = serde_json::to_value(&items).expect("serde_json mono items to value failed"); - write!(writer, "{{\"name\": {}, \"crate_id\": {}, \"allocs\": {}, \"functions\": {}, \"uneval_consts\": {}, \"items\": {}", - serde_json::to_string(&local_crate.name).expect("serde_json string to json failed"), - serde_json::to_string(&crate_id).expect("serde_json number to json failed"), - serde_json::to_string(&allocs).expect("serde_json global allocs to json failed"), - serde_json::to_string(&called_functions).expect("serde_json functions to json failed"), - serde_json::to_string(&unevaluated_consts).expect("serde_json unevaluated consts to json failed"), - serde_json::to_string(&json_items).expect("serde_json mono items to json failed"), - ).expect("Failed to write JSON to file"); - if debug_enabled() { - let fn_sources = calls_map.iter().map(|(k,(source,_))| (k,source)).collect::>(); - write!(writer, ",\"fn_sources\": {}, \"types\": {}, \"foreign_modules\": {}}}", - serde_json::to_string(&fn_sources).expect("serde_json functions failed"), - serde_json::to_string(&visited_tys).expect("serde_json tys failed"), - serde_json::to_string(&get_foreign_module_details()).expect("foreign_module serialization failed"), - ).expect("Failed to write JSON to file"); - } else { - write!(writer, "}}").expect("Failed to write JSON to file"); + let debug: Option = + if debug_enabled() { + let fn_sources = + calls_map.clone().into_iter().map(|(k,(source,_))| (k,source)).collect::>(); + Some(SmirJsonDebugInfo { fn_sources, types: visited_tys, foreign_modules: get_foreign_module_details()}) + } else { + None + }; + + let called_functions = + calls_map.into_iter().map(|(k,(_,name))| (k,name)).collect::>(); + let allocs = + visited_allocs.into_iter().collect::>(); + let crate_id = + tcx.stable_crate_id(LOCAL_CRATE).as_u64(); + + SmirJson { + name: local_crate.name, + crate_id: crate_id, + allocs, + functions: called_functions, + uneval_consts: unevaluated_consts.into_iter().collect(), + items, + debug } + } pub fn emit_smir(tcx: TyCtxt<'_>) { + + let smir_json = serde_json::to_string(&collect_smir(tcx)).expect("serde_json failed to write result"); + match tcx.output_filenames(()).path(OutputType::Mir) { OutFileName::Stdout => { - let mut f = io::stdout(); - emit_smir_internal(tcx, &mut f); + write!(&io::stdout(), "{}", smir_json).expect("Failed to write smir.json"); } OutFileName::Real(path) => { - let mut f = io::BufWriter::new(File::create(&path.with_extension("smir.json")).expect("Failed to create SMIR output file")); - emit_smir_internal(tcx, &mut f); + let mut b = + io::BufWriter::new( + File::create(&path.with_extension("smir.json")) + .expect("Failed to create {path}.smir.json output file")); + write!(b, "{}", smir_json).expect("Failed to write smir.json"); } } }