diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 8c13663deba..e22e7b223be 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -837,6 +837,9 @@ struct OptimizeInstructions if (auto* ret = combineAnd(curr)) { return replaceCurrent(ret); } + if (auto* ret = optimizeAndNoOverlappingBits(curr)) { + return replaceCurrent(ret); + } } // for or, we can potentially combine if (curr->op == OrInt32) { @@ -850,6 +853,12 @@ struct OptimizeInstructions return replaceCurrent(ret); } } + if (curr->op == AndInt64) { + if (auto* ret = optimizeAndNoOverlappingBits(curr)) { + return replaceCurrent(ret); + } + } + // relation/comparisons allow for math optimizations if (curr->isRelational()) { if (auto* ret = optimizeRelational(curr)) { @@ -3549,6 +3558,35 @@ struct OptimizeInstructions return nullptr; } + // Bitwise AND of a value with bits in [0, n) and a constant with no bits in + // [0, n) always yields 0. Replace with zero. + Expression* optimizeAndNoOverlappingBits(Binary* curr) { + assert(curr->op == AndInt32 || curr->op == AndInt64); + + auto* left = curr->left; + auto* right = curr->right; + + // Check left's max bits and right is constant. + auto leftMaxBits = Bits::getMaxBits(left, this); + uint64_t maskLeft; + if (!left->type.isNumber() || leftMaxBits == left->type.getByteSize() * 8) { + // If we know nothing useful about the bits on the left, + // we cannot optimize. + return nullptr; + } else { + maskLeft = (1ULL << leftMaxBits) - 1; + } + if (auto* c = right->dynCast()) { + uint64_t constantValue = c->value.getInteger(); + if ((constantValue & maskLeft) == 0) { + return getDroppedChildrenAndAppend( + curr, LiteralUtils::makeZero(left->type, *getModule())); + } + } + + return nullptr; + } + // We can combine `or` operations, e.g. // (x > y) | (x == y) ==> x >= y // (x != 0) | (y != 0) ==> (x | y) != 0 diff --git a/test/lit/passes/optimize-instructions-mvp.wast b/test/lit/passes/optimize-instructions-mvp.wast index 9f505a7586f..77fc0121a28 100644 --- a/test/lit/passes/optimize-instructions-mvp.wast +++ b/test/lit/passes/optimize-instructions-mvp.wast @@ -1442,9 +1442,12 @@ ) ;; CHECK: (func $canonicalize-consts-vars (param $x i32) (param $y i32) ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (i32.and - ;; CHECK-NEXT: (i32.const 1) ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: (i32.const 1) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop @@ -1482,6 +1485,7 @@ ;; CHECK-NEXT: ) (func $canonicalize-consts-vars (param $x i32) (param $y i32) (drop (i32.and (i32.const 1) (i32.const 2))) + (drop (i32.and (i32.const 2) (i32.const 1))) (drop (i32.and (local.get $x) (i32.const 3))) (drop (i32.and (i32.const 4) (local.get $x))) (drop (i32.and (local.get $x) (local.get $y))) @@ -2865,18 +2869,18 @@ (i32.const 24) ) ) - ;; CHECK: (func $sext-24-and-127-128 (result i32) + ;; CHECK: (func $sext-24-and-127-unknown (param $x i32) (result i32) ;; CHECK-NEXT: (i32.and + ;; CHECK-NEXT: (local.get $x) ;; CHECK-NEXT: (i32.const 127) - ;; CHECK-NEXT: (i32.const 128) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - (func $sext-24-and-127-128 (result i32) + (func $sext-24-and-127-unknown (param $x i32) (result i32) (i32.shr_s (i32.shl (i32.and ;; takes the min, here it is ok (i32.const 127) - (i32.const 128) + (local.get $x) ) (i32.const 24) ) @@ -6972,7 +6976,7 @@ ) ) ) - ;; CHECK: (func $de-morgan-2 (param $x i32) (param $y i32) + ;; CHECK: (func $de-morgan-2 (param $x i32) (param $y i32) (param $z i64) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (i32.eqz ;; CHECK-NEXT: (i32.or @@ -7022,7 +7026,9 @@ ;; CHECK-NEXT: (i32.eqz ;; CHECK-NEXT: (local.get $x) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: (i32.wrap_i64 + ;; CHECK-NEXT: (local.get $z) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop @@ -7031,7 +7037,7 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - (func $de-morgan-2 (param $x i32) (param $y i32) + (func $de-morgan-2 (param $x i32) (param $y i32) (param $z i64) (drop (i32.and (i32.eqz (local.get $x)) (i32.eqz (local.get $y))) ) @@ -7048,7 +7054,7 @@ (i32.and (local.get $x) (i32.eqz (local.get $y))) ) (drop - (i32.and (i32.eqz (local.get $x)) (i32.wrap_i64 (i64.const 2))) + (i32.and (i32.eqz (local.get $x)) (i32.wrap_i64 (local.get $z))) ) (drop (i32.and (i32.wrap_i64 (i64.const 1)) (i32.eqz (local.get $y))) @@ -17773,4 +17779,190 @@ (i32.const 1) ) ) + ;; CHECK: (func $add-op-no-overlapping-bits-corner-case (param $0 i32) (param $1 i64) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i64.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i64.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i64.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $add-op-no-overlapping-bits-corner-case (param $0 i32) (param $1 i64) + ;; optimizeAndNoOverlappingBits simplifies AND operations where + ;; - the left value covers bits in [0, n) + ;; - the right operand is a constant with no bits in [0, n) + ;; Result is simplified to zero. + ;; No bit overlaps, so we optimize. + (drop + (i32.and + (i32.const 1) + (i32.const 2) + ) + ) + (drop + (i64.and + (i64.const 1) + (i64.const 2) + ) + ) + (drop + (i64.and + (i64.const 0x7fffffff) + (i64.const 0x80000000) + ) + ) + ;; We know something (but not constant) about the bits + ;; on the left, so we can optimize. + (drop + (i32.and + (i32.and + (local.get $0) + (i32.const 0xff) + ) + (i32.const 0xff00) + ) + ) + (drop + (i64.and + (i64.and + (local.get $1) + (i64.const 0xff) + ) + (i64.const 0xff00) + ) + ) + ) + ;; CHECK: (func $add-op-overlapping-bits-corner-case + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.and + ;; CHECK-NEXT: (i32.const 2147483647) + ;; CHECK-NEXT: (i32.const -2147483647) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i64.and + ;; CHECK-NEXT: (i64.const 2147483647) + ;; CHECK-NEXT: (i64.const 2147483649) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $add-op-overlapping-bits-corner-case + ;; One bit overlaps, so we cannot optimize. + (drop + (i32.and + (i32.const 0x7fffffff) + (i32.const 0x80000001) + ) + ) + (drop + (i64.and + (i64.const 0x7fffffff) + (i64.const 0x80000001) + ) + ) + ) + ;; CHECK: (func $add-op-no-overlapping-skipped + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.and + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i64.and + ;; CHECK-NEXT: (i64.const 2) + ;; CHECK-NEXT: (i64.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i64.and + ;; CHECK-NEXT: (i64.const 2147483648) + ;; CHECK-NEXT: (i64.const 2147483647) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $add-op-no-overlapping-skipped + ;; Both-constant cases which do not meet the condition (mask of left has no + ;; overlap with right) is left for Precompute. + (drop + (i32.and + (i32.const 2) + (i32.const 1) + ) + ) + (drop + (i64.and + (i64.const 2) + (i64.const 1) + ) + ) + (drop + (i64.and + (i64.const 0x80000000) + (i64.const 0x7fffffff) + ) + ) + ) + ;; CHECK: (func $add-op-unknown-useful (param $0 i32) (param $1 i64) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.and + ;; CHECK-NEXT: (i32.const -2147483648) + ;; CHECK-NEXT: (i32.const 2147483647) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i64.and + ;; CHECK-NEXT: (i64.const -9223372036854775808) + ;; CHECK-NEXT: (i64.const 9223372036854775807) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.and + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i64.and + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: (i64.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $add-op-unknown-useful (param $0 i32) (param $1 i64) + ;; We know nothing useful about the bits on the left, so we cannot optimize. + (drop + (i32.and + (i32.const 0x80000000) + (i32.const 0x7fffffff) + ) + ) + (drop + (i64.and + (i64.const 0x8000000000000000) + (i64.const 0x7fffffffffffffff) + ) + ) + (drop + (i32.and + (local.get $0) + (i32.const 1) + ) + ) + (drop + (i64.and + (local.get $1) + (i64.const 1) + ) + ) + ) )