Skip to content

Commit e526781

Browse files
authored
analyze: add NON_NULL rewrites (#1095)
This branch implements rewriting of nullable pointers (those lacking `PermissionSet::NON_NULL`) to `Option`. This includes the following kinds of rewrites: * Type annotations: `Option<&mut T>` instead of `&mut T` * Casts between nullable and non-nullable via `p.unwrap()` and `Some(p)`. For most permissions, we implement only one direction because the other is invalid/nonsensical, but here we implement both in order to support "unsound" rewrites involving overridden `NON_NULL` flags (as in #1088). * Borrows: when casting `p: Option<&mut T>`, we use `p.as_deref_mut().unwrap()` instead of `p.unwrap()` to avoid consuming the original `p`. (In the code, this is called a "downgrade", since it allows borrowing `Option<Box<T>>` as `Option<&T>` and similar.) * Pointer projections on nullable pointers. Where `NON_NULL` pointers would use `&p[0]`, nullable ones use `p.map(|ptr| &ptr[0])`. Internally, this is represented similar to `Some(&p.unwrap()[0])`, but it's handled specially by `rewrite::expr::convert` to produce a `map` call instead, which passes through `None` without a panic. * `unwrap()` calls on derefs. `*p` is rewritten to `*p.unwrap()`, or to `*p.as_deref().unwrap()` if a downgrade/borrow is necessary to avoid moving `p`. * `ptr::null()` and `0 as *const _` to `None`, and `p.is_null()` to `p.is_none()`. The new `non_null_rewrites.rs` test case passes, and the rewritten code compiles.
2 parents 9ad9d35 + 73652be commit e526781

File tree

12 files changed

+1067
-285
lines changed

12 files changed

+1067
-285
lines changed

c2rust-analyze/src/dataflow/type_check.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use crate::pointee_type::PointeeTypes;
55
use crate::pointer_id::PointerTable;
66
use crate::recent_writes::RecentWrites;
77
use crate::util::{
8-
describe_rvalue, is_null_const, is_transmutable_ptr_cast, ty_callee, Callee, RvalueDesc,
8+
self, describe_rvalue, is_transmutable_ptr_cast, ty_callee, Callee, RvalueDesc,
99
UnknownDefCallee,
1010
};
1111
use assert_matches::assert_matches;
@@ -120,7 +120,7 @@ impl<'tcx> TypeChecker<'tcx, '_> {
120120
CastKind::PointerFromExposedAddress => {
121121
// We support only one case here, which is the case of null pointers
122122
// constructed via casts such as `0 as *const T`
123-
if !op.constant().copied().map(is_null_const).unwrap_or(false) {
123+
if !util::is_null_const_operand(op) {
124124
panic!("Creating non-null pointers from exposed addresses not supported");
125125
}
126126
// The target type of the cast must not have `NON_NULL` permission.

c2rust-analyze/src/rewrite/apply.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,13 @@ impl<S: Sink> Emitter<'_, S> {
411411
self.emit(rw, 0)
412412
}
413413

414+
Rewrite::Closure1(ref name, ref rw) => {
415+
self.emit_str("|")?;
416+
self.emit_str(name)?;
417+
self.emit_str("| ")?;
418+
self.emit(rw, 0)
419+
}
420+
414421
Rewrite::TyPtr(ref rw, mutbl) => {
415422
match mutbl {
416423
Mutability::Not => self.emit_str("*const ")?,

c2rust-analyze/src/rewrite/expr/convert.rs

Lines changed: 328 additions & 148 deletions
Large diffs are not rendered by default.

c2rust-analyze/src/rewrite/expr/mir_op.rs

Lines changed: 254 additions & 23 deletions
Large diffs are not rendered by default.

c2rust-analyze/src/rewrite/expr/unlower.rs

Lines changed: 225 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -114,15 +114,10 @@ impl<'a, 'tcx> UnlowerVisitor<'a, 'tcx> {
114114
}
115115

116116
fn operand_desc(&self, op: &Operand<'tcx>) -> MirOriginDesc {
117-
match *op {
118-
mir::Operand::Copy(pl) | mir::Operand::Move(pl) => {
119-
if is_temp_var(self.mir, pl.as_ref()) {
120-
MirOriginDesc::LoadFromTemp
121-
} else {
122-
MirOriginDesc::Expr
123-
}
124-
}
125-
mir::Operand::Constant(..) => MirOriginDesc::Expr,
117+
if is_temp_var_operand(self.mir, op) {
118+
MirOriginDesc::LoadFromTemp
119+
} else {
120+
MirOriginDesc::Expr
126121
}
127122
}
128123

@@ -215,20 +210,16 @@ impl<'a, 'tcx> UnlowerVisitor<'a, 'tcx> {
215210
match ex.kind {
216211
hir::ExprKind::Assign(pl, rv, _span) => {
217212
// For `Assign`, we expect the assignment to be the whole thing.
218-
let (loc, _mir_pl, mir_rv) = match self.get_sole_assign(&locs) {
213+
let (loc, mir_pl, mir_rv) = match self.get_sole_assign(&locs) {
219214
Some(x) => x,
220215
None => {
221216
warn("expected exactly one StatementKind::Assign");
222217
return;
223218
}
224219
};
225-
let desc = match mir_rv {
226-
mir::Rvalue::Use(op) => self.operand_desc(op),
227-
_ => MirOriginDesc::Expr,
228-
};
229220
self.record(loc, &[], ex);
230-
self.record(loc, &[SubLoc::Dest], pl);
231-
self.record_desc(loc, &[SubLoc::Rvalue], rv, desc);
221+
self.visit_expr_place(pl, loc, vec![SubLoc::Dest], mir_pl, &[]);
222+
self.visit_expr_rvalue(rv, loc, vec![SubLoc::Rvalue], mir_rv, &[]);
232223
}
233224

234225
hir::ExprKind::Call(_, args) | hir::ExprKind::MethodCall(_, args, _) => {
@@ -275,8 +266,9 @@ impl<'a, 'tcx> UnlowerVisitor<'a, 'tcx> {
275266

276267
self.record(loc, &[SubLoc::Rvalue], ex);
277268
for (i, (arg, mir_arg)) in args.iter().zip(mir_args).enumerate() {
278-
self.record_operand(loc, &[SubLoc::Rvalue, SubLoc::CallArg(i)], arg, mir_arg);
279-
self.visit_expr_operand(arg, mir_arg, &[]);
269+
let sub_loc = vec![SubLoc::Rvalue, SubLoc::CallArg(i)];
270+
self.record_operand(loc, &sub_loc, arg, mir_arg);
271+
self.visit_expr_operand(arg, loc, sub_loc, mir_arg, &[]);
280272
}
281273

282274
if extra_locs.len() > 0 {
@@ -306,8 +298,7 @@ impl<'a, 'tcx> UnlowerVisitor<'a, 'tcx> {
306298
}
307299
};
308300
self.record_desc(cursor.loc, &[], ex, MirOriginDesc::StoreIntoLocal);
309-
self.peel_adjustments(ex, &mut cursor);
310-
self.record_desc(cursor.loc, &cursor.sub_loc, ex, MirOriginDesc::Expr);
301+
self.walk_expr(ex, &mut cursor);
311302
self.finish_visit_expr_cursor(ex, cursor);
312303
}
313304
}
@@ -436,14 +427,124 @@ impl<'a, 'tcx> UnlowerVisitor<'a, 'tcx> {
436427
}
437428
}
438429

430+
fn visit_expr_rvalue(
431+
&mut self,
432+
ex: &'tcx hir::Expr<'tcx>,
433+
loc: Location,
434+
sub_loc: Vec<SubLoc>,
435+
rv: &'a mir::Rvalue<'tcx>,
436+
extra_locs: &[Location],
437+
) {
438+
let _g = panic_detail::set_current_span(ex.span);
439+
440+
// TODO: We do this check early to ensure that the `LoadFromTemp` is emitted with subloc
441+
// `[Rvalue]` rather than `[Rvalue, RvalueOperand(0)]`, since `mir_op` emits rewrites with
442+
// just `[Rvalue]`. For `Rvalue::Use`, the rvalue and its operand are basically
443+
// synonymous, so ideally we would accept both sublocs as well. We should probably add an
444+
// aliasing system or some kind of cleverness in `distribute` to make both versions work,
445+
// or else modify the definition of `SubLoc` so there's only one way to express this
446+
// location.
447+
if is_temp_var_rvalue(self.mir, rv) {
448+
self.record_desc(loc, &sub_loc, ex, MirOriginDesc::LoadFromTemp);
449+
return;
450+
}
451+
452+
let mut cursor = VisitExprCursor::new(
453+
self.mir,
454+
ExprMir::Rvalue(rv),
455+
extra_locs,
456+
loc,
457+
sub_loc.to_owned(),
458+
);
459+
self.walk_expr(ex, &mut cursor);
460+
461+
self.finish_visit_expr_cursor(ex, cursor);
462+
}
463+
439464
fn visit_expr_operand(
440465
&mut self,
441466
ex: &'tcx hir::Expr<'tcx>,
442-
_rv: &'a mir::Operand<'tcx>,
443-
_extra_locs: &[Location],
467+
loc: Location,
468+
sub_loc: Vec<SubLoc>,
469+
op: &'a mir::Operand<'tcx>,
470+
extra_locs: &[Location],
471+
) {
472+
let _g = panic_detail::set_current_span(ex.span);
473+
474+
if is_temp_var_operand(self.mir, op) {
475+
self.record_desc(loc, &sub_loc, ex, MirOriginDesc::LoadFromTemp);
476+
return;
477+
}
478+
479+
let mut cursor = VisitExprCursor::new(
480+
self.mir,
481+
ExprMir::Operand(op),
482+
extra_locs,
483+
loc,
484+
sub_loc.to_owned(),
485+
);
486+
self.walk_expr(ex, &mut cursor);
487+
488+
self.finish_visit_expr_cursor(ex, cursor);
489+
}
490+
491+
fn visit_expr_place(
492+
&mut self,
493+
ex: &'tcx hir::Expr<'tcx>,
494+
loc: Location,
495+
sub_loc: Vec<SubLoc>,
496+
pl: mir::Place<'tcx>,
497+
extra_locs: &[Location],
444498
) {
445499
let _g = panic_detail::set_current_span(ex.span);
446-
// TODO: handle adjustments, overloaded operators, etc
500+
501+
let mut cursor = VisitExprCursor::new(
502+
self.mir,
503+
ExprMir::Place(pl.as_ref()),
504+
extra_locs,
505+
loc,
506+
sub_loc.to_owned(),
507+
);
508+
self.walk_expr(ex, &mut cursor);
509+
510+
self.finish_visit_expr_cursor(ex, cursor);
511+
}
512+
513+
fn walk_expr(&mut self, ex: &'tcx hir::Expr<'tcx>, cursor: &mut VisitExprCursor<'a, '_, 'tcx>) {
514+
let mut ex = ex;
515+
loop {
516+
self.peel_adjustments(ex, cursor);
517+
if cursor.is_temp_var() {
518+
self.record_desc(cursor.loc, &cursor.sub_loc, ex, MirOriginDesc::LoadFromTemp);
519+
break;
520+
} else {
521+
self.record_desc(cursor.loc, &cursor.sub_loc, ex, MirOriginDesc::Expr);
522+
}
523+
524+
match ex.kind {
525+
hir::ExprKind::Unary(hir::UnOp::Deref, ptr_ex) => {
526+
if let Some(()) = cursor.peel_deref() {
527+
ex = ptr_ex;
528+
continue;
529+
}
530+
}
531+
hir::ExprKind::Field(base_ex, _field_ident) => {
532+
if let Some(()) = cursor.peel_field() {
533+
ex = base_ex;
534+
continue;
535+
}
536+
}
537+
hir::ExprKind::Index(arr_ex, _idx_ex) => {
538+
if let Some(()) = cursor.peel_index() {
539+
ex = arr_ex;
540+
continue;
541+
}
542+
}
543+
_ => {}
544+
}
545+
// Keep looping only in cases that we specifically recognize.
546+
break;
547+
}
447548
}
448549
}
449550

@@ -526,6 +627,16 @@ impl<'a, 'b, 'tcx> VisitExprCursor<'a, 'b, 'tcx> {
526627
}
527628
}
528629

630+
/// Check whether the current MIR is a temporary.
631+
pub fn is_temp_var(&self) -> bool {
632+
match self.cur {
633+
ExprMir::Rvalue(rv) => is_temp_var_rvalue(self.mir, rv),
634+
ExprMir::Operand(op) => is_temp_var_operand(self.mir, op),
635+
ExprMir::Place(pl) => is_temp_var(self.mir, pl),
636+
ExprMir::Call(_) => false,
637+
}
638+
}
639+
529640
/// If the current MIR is a temporary, and the previous `Location` is an assignment to
530641
/// that temporary, peel it off, leaving the temporary's initializer as the current
531642
/// `Rvalue`. This also adds `LoadFromTemp` and `StoreIntoLocal` entries in `self.temp_info`
@@ -673,12 +784,37 @@ impl<'a, 'b, 'tcx> VisitExprCursor<'a, 'b, 'tcx> {
673784
if let Some((&outer_proj, remaining_proj)) = pl.projection.split_last() {
674785
if matches!(outer_proj, mir::PlaceElem::Deref) {
675786
pl.projection = remaining_proj;
676-
// Number of derefs, not counting the outermost one we just peeled off.
677-
let num_inner_derefs = remaining_proj
678-
.iter()
679-
.filter(|p| matches!(p, mir::PlaceElem::Deref))
680-
.count();
681-
self.sub_loc.push(SubLoc::PlacePointer(num_inner_derefs));
787+
self.sub_loc.push(SubLoc::PlaceDerefPointer);
788+
return Some(());
789+
}
790+
}
791+
self.peel_temp()?;
792+
}
793+
}
794+
795+
/// If the current MIR is a `Place` ending with a `Field` projection, peel off the `Field`.
796+
pub fn peel_field(&mut self) -> Option<()> {
797+
loop {
798+
let pl = self.require_place_mut()?;
799+
if let Some((&outer_proj, remaining_proj)) = pl.projection.split_last() {
800+
if matches!(outer_proj, mir::PlaceElem::Field(_idx, _ty)) {
801+
pl.projection = remaining_proj;
802+
self.sub_loc.push(SubLoc::PlaceFieldBase);
803+
return Some(());
804+
}
805+
}
806+
self.peel_temp()?;
807+
}
808+
}
809+
810+
/// If the current MIR is a `Place` ending with an `Index` projection, peel off the `Index`.
811+
pub fn peel_index(&mut self) -> Option<()> {
812+
loop {
813+
let pl = self.require_place_mut()?;
814+
if let Some((&outer_proj, remaining_proj)) = pl.projection.split_last() {
815+
if matches!(outer_proj, mir::PlaceElem::Index(_)) {
816+
pl.projection = remaining_proj;
817+
self.sub_loc.push(SubLoc::PlaceIndexArray);
682818
return Some(());
683819
}
684820
}
@@ -712,25 +848,77 @@ fn is_temp_var(mir: &Body, pl: mir::PlaceRef) -> bool {
712848
pl.projection.len() == 0 && mir.local_kind(pl.local) == mir::LocalKind::Temp
713849
}
714850

851+
fn is_temp_var_operand(mir: &Body, op: &mir::Operand) -> bool {
852+
match get_operand_place(op) {
853+
Some(pl) => is_temp_var(mir, pl.as_ref()),
854+
None => false,
855+
}
856+
}
857+
858+
fn is_temp_var_rvalue(mir: &Body, rv: &mir::Rvalue) -> bool {
859+
match get_rvalue_operand(rv) {
860+
Some(op) => is_temp_var_operand(mir, op),
861+
None => false,
862+
}
863+
}
864+
865+
fn get_rvalue_operand<'a, 'tcx>(rv: &'a mir::Rvalue<'tcx>) -> Option<&'a mir::Operand<'tcx>> {
866+
match *rv {
867+
mir::Rvalue::Use(ref op) => Some(op),
868+
_ => None,
869+
}
870+
}
871+
872+
fn get_operand_place<'tcx>(op: &mir::Operand<'tcx>) -> Option<mir::Place<'tcx>> {
873+
match *op {
874+
mir::Operand::Copy(pl) | mir::Operand::Move(pl) => Some(pl),
875+
_ => None,
876+
}
877+
}
878+
879+
/// Indicate whether a given MIR statement should be considered when building the unlowering map.
880+
fn filter_stmt(stmt: &mir::Statement) -> bool {
881+
match stmt.kind {
882+
// Ignore `AscribeUserType` annotations. These appear in the middle of some expressions.
883+
// It's easier to ignore them all at this level rather than try to handle them in all the
884+
// places they might appear.
885+
mir::StatementKind::AscribeUserType(..) => false,
886+
_ => true,
887+
}
888+
}
889+
890+
/// Indicate whether a given MIR terminator should be considered when building the unlowering map.
891+
fn filter_term(term: &mir::Terminator) -> bool {
892+
match term.kind {
893+
_ => true,
894+
}
895+
}
896+
715897
fn build_span_index(mir: &Body<'_>) -> SpanIndex<Location> {
716-
eprintln!("building span index for {:?}:", mir.source);
898+
debug!("building span index for {:?}:", mir.source);
717899
let mut span_index_items = Vec::new();
718900
for (bb, bb_data) in mir.basic_blocks().iter_enumerated() {
719901
for (i, stmt) in bb_data.statements.iter().enumerate() {
902+
if !filter_stmt(stmt) {
903+
continue;
904+
}
720905
let loc = Location {
721906
block: bb,
722907
statement_index: i,
723908
};
724-
eprintln!(" {:?}: {:?}", loc, stmt.source_info.span);
909+
debug!(" {:?}: {:?}", loc, stmt.source_info.span);
725910
span_index_items.push((stmt.source_info.span, loc));
726911
}
727912

728-
let loc = Location {
729-
block: bb,
730-
statement_index: bb_data.statements.len(),
731-
};
732-
eprintln!(" {:?}: {:?}", loc, bb_data.terminator().source_info.span);
733-
span_index_items.push((bb_data.terminator().source_info.span, loc));
913+
let term = bb_data.terminator();
914+
if filter_term(term) {
915+
let loc = Location {
916+
block: bb,
917+
statement_index: bb_data.statements.len(),
918+
};
919+
debug!(" {:?}: {:?}", loc, term.source_info.span);
920+
span_index_items.push((term.source_info.span, loc));
921+
}
734922
}
735923

736924
SpanIndex::new(span_index_items)

c2rust-analyze/src/rewrite/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,9 @@ pub enum Rewrite<S = Span> {
9898
/// Single-variable `let` binding. This has the same scoping issues as multi-variable `Let`;
9999
/// because of this, `Let` should generally be used instead of multiple `Let1`s.
100100
Let1(String, Box<Rewrite>),
101+
/// Single-argument closure. As with `Let` and `Let1`, the body must be carefully constructed
102+
/// to avoid potential shadowing.
103+
Closure1(String, Box<Rewrite>),
101104

102105
// Type builders
103106
/// Emit a complete pretty-printed type, discarding the original annotation.
@@ -196,6 +199,7 @@ impl Rewrite {
196199
Let(new_vars)
197200
}
198201
Let1(ref name, ref rw) => Let1(String::clone(name), try_subst(rw)?),
202+
Closure1(ref name, ref rw) => Closure1(String::clone(name), try_subst(rw)?),
199203

200204
Print(ref s) => Print(String::clone(s)),
201205
TyPtr(ref rw, mutbl) => TyPtr(try_subst(rw)?, mutbl),

0 commit comments

Comments
 (0)