Skip to content

Commit e37726b

Browse files
committed
[analyzer] Implemented RangeSet::Factory::castTo function to perform promotions, truncations and conversions.
Summary: Handle casts for ranges working similarly to APSIntType::apply function but for the whole range set. Support promotions, truncations and conversions. Example: promotion: char [0, 42] -> short [0, 42] -> int [0, 42] -> llong [0, 42] truncation: llong [4295033088, 4295033130] -> int [65792, 65834] -> short [256, 298] -> char [0, 42] conversion: char [-42, 42] -> uint [0, 42]U[4294967254, 4294967295] -> short[-42, 42] Differential Revision: https://reviews.llvm.org/D103094
1 parent 25c218b commit e37726b

File tree

4 files changed

+734
-15
lines changed

4 files changed

+734
-15
lines changed

clang/include/clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ class APSIntType {
2121
bool IsUnsigned;
2222

2323
public:
24-
APSIntType(uint32_t Width, bool Unsigned)
25-
: BitWidth(Width), IsUnsigned(Unsigned) {}
24+
constexpr APSIntType(uint32_t Width, bool Unsigned)
25+
: BitWidth(Width), IsUnsigned(Unsigned) {}
2626

2727
/* implicit */ APSIntType(const llvm::APSInt &Value)
2828
: BitWidth(Value.getBitWidth()), IsUnsigned(Value.isUnsigned()) {}

clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,29 @@ class RangeSet {
237237
/// Complexity: O(N)
238238
/// where N = size(What)
239239
RangeSet negate(RangeSet What);
240+
/// Performs promotions, truncations and conversions of the given set.
241+
///
242+
/// This function is optimized for each of the six cast cases:
243+
/// - noop
244+
/// - conversion
245+
/// - truncation
246+
/// - truncation-conversion
247+
/// - promotion
248+
/// - promotion-conversion
249+
///
250+
/// NOTE: This function is NOT self-inverse for truncations, because of
251+
/// the higher bits loss:
252+
/// - castTo(castTo(OrigRangeOfInt, char), int) != OrigRangeOfInt.
253+
/// - castTo(castTo(OrigRangeOfChar, int), char) == OrigRangeOfChar.
254+
/// But it is self-inverse for all the rest casts.
255+
///
256+
/// Complexity:
257+
/// - Noop O(1);
258+
/// - Truncation O(N^2);
259+
/// - Another case O(N);
260+
/// where N = size(What)
261+
RangeSet castTo(RangeSet What, APSIntType Ty);
262+
RangeSet castTo(RangeSet What, QualType T);
240263

241264
/// Return associated value factory.
242265
BasicValueFactory &getValueFactory() const { return ValueFactory; }
@@ -252,6 +275,22 @@ class RangeSet {
252275
/// containers are persistent (created via BasicValueFactory::getValue).
253276
ContainerType unite(const ContainerType &LHS, const ContainerType &RHS);
254277

278+
/// This is a helper function for `castTo` method. Implies not to be used
279+
/// separately.
280+
/// Performs a truncation case of a cast operation.
281+
ContainerType truncateTo(RangeSet What, APSIntType Ty);
282+
283+
/// This is a helper function for `castTo` method. Implies not to be used
284+
/// separately.
285+
/// Performs a conversion case and a promotion-conversion case for signeds
286+
/// of a cast operation.
287+
ContainerType convertTo(RangeSet What, APSIntType Ty);
288+
289+
/// This is a helper function for `castTo` method. Implies not to be used
290+
/// separately.
291+
/// Performs a promotion for unsigneds only.
292+
ContainerType promoteTo(RangeSet What, APSIntType Ty);
293+
255294
// Many operations include producing new APSInt values and that's why
256295
// we need this factory.
257296
BasicValueFactory &ValueFactory;
@@ -303,6 +342,10 @@ class RangeSet {
303342
/// Complexity: O(1)
304343
const llvm::APSInt &getMaxValue() const;
305344

345+
bool isUnsigned() const;
346+
uint32_t getBitWidth() const;
347+
APSIntType getAPSIntType() const;
348+
306349
/// Test whether the given point is contained by any of the ranges.
307350
///
308351
/// Complexity: O(logN)

clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,21 @@ const llvm::APSInt &RangeSet::getMaxValue() const {
353353
return std::prev(end())->To();
354354
}
355355

356+
bool clang::ento::RangeSet::isUnsigned() const {
357+
assert(!isEmpty());
358+
return begin()->From().isUnsigned();
359+
}
360+
361+
uint32_t clang::ento::RangeSet::getBitWidth() const {
362+
assert(!isEmpty());
363+
return begin()->From().getBitWidth();
364+
}
365+
366+
APSIntType clang::ento::RangeSet::getAPSIntType() const {
367+
assert(!isEmpty());
368+
return APSIntType(begin()->From());
369+
}
370+
356371
bool RangeSet::containsImpl(llvm::APSInt &Point) const {
357372
if (isEmpty() || !pin(Point))
358373
return false;
@@ -655,6 +670,181 @@ RangeSet RangeSet::Factory::negate(RangeSet What) {
655670
return makePersistent(std::move(Result));
656671
}
657672

673+
// Convert range set to the given integral type using truncation and promotion.
674+
// This works similar to APSIntType::apply function but for the range set.
675+
RangeSet RangeSet::Factory::castTo(RangeSet What, APSIntType Ty) {
676+
// Set is empty or NOOP (aka cast to the same type).
677+
if (What.isEmpty() || What.getAPSIntType() == Ty)
678+
return What;
679+
680+
const bool IsConversion = What.isUnsigned() != Ty.isUnsigned();
681+
const bool IsTruncation = What.getBitWidth() > Ty.getBitWidth();
682+
const bool IsPromotion = What.getBitWidth() < Ty.getBitWidth();
683+
684+
if (IsTruncation)
685+
return makePersistent(truncateTo(What, Ty));
686+
687+
// Here we handle 2 cases:
688+
// - IsConversion && !IsPromotion.
689+
// In this case we handle changing a sign with same bitwidth: char -> uchar,
690+
// uint -> int. Here we convert negatives to positives and positives which
691+
// is out of range to negatives. We use convertTo function for that.
692+
// - IsConversion && IsPromotion && !What.isUnsigned().
693+
// In this case we handle changing a sign from signeds to unsigneds with
694+
// higher bitwidth: char -> uint, int-> uint64. The point is that we also
695+
// need convert negatives to positives and use convertTo function as well.
696+
// For example, we don't need such a convertion when converting unsigned to
697+
// signed with higher bitwidth, because all the values of unsigned is valid
698+
// for the such signed.
699+
if (IsConversion && (!IsPromotion || !What.isUnsigned()))
700+
return makePersistent(convertTo(What, Ty));
701+
702+
assert(IsPromotion && "Only promotion operation from unsigneds left.");
703+
return makePersistent(promoteTo(What, Ty));
704+
}
705+
706+
RangeSet RangeSet::Factory::castTo(RangeSet What, QualType T) {
707+
assert(T->isIntegralOrEnumerationType() && "T shall be an integral type.");
708+
return castTo(What, ValueFactory.getAPSIntType(T));
709+
}
710+
711+
RangeSet::ContainerType RangeSet::Factory::truncateTo(RangeSet What,
712+
APSIntType Ty) {
713+
using llvm::APInt;
714+
using llvm::APSInt;
715+
ContainerType Result;
716+
ContainerType Dummy;
717+
// CastRangeSize is an amount of all possible values of cast type.
718+
// Example: `char` has 256 values; `short` has 65536 values.
719+
// But in fact we use `amount of values` - 1, because
720+
// we can't keep `amount of values of UINT64` inside uint64_t.
721+
// E.g. 256 is an amount of all possible values of `char` and we can't keep
722+
// it inside `char`.
723+
// And it's OK, it's enough to do correct calculations.
724+
uint64_t CastRangeSize = APInt::getMaxValue(Ty.getBitWidth()).getZExtValue();
725+
for (const Range &R : What) {
726+
// Get bounds of the given range.
727+
APSInt FromInt = R.From();
728+
APSInt ToInt = R.To();
729+
// CurrentRangeSize is an amount of all possible values of the current
730+
// range minus one.
731+
uint64_t CurrentRangeSize = (ToInt - FromInt).getZExtValue();
732+
// This is an optimization for a specific case when this Range covers
733+
// the whole range of the target type.
734+
Dummy.clear();
735+
if (CurrentRangeSize >= CastRangeSize) {
736+
Dummy.emplace_back(ValueFactory.getMinValue(Ty),
737+
ValueFactory.getMaxValue(Ty));
738+
Result = std::move(Dummy);
739+
break;
740+
}
741+
// Cast the bounds.
742+
Ty.apply(FromInt);
743+
Ty.apply(ToInt);
744+
const APSInt &PersistentFrom = ValueFactory.getValue(FromInt);
745+
const APSInt &PersistentTo = ValueFactory.getValue(ToInt);
746+
if (FromInt > ToInt) {
747+
Dummy.emplace_back(ValueFactory.getMinValue(Ty), PersistentTo);
748+
Dummy.emplace_back(PersistentFrom, ValueFactory.getMaxValue(Ty));
749+
} else
750+
Dummy.emplace_back(PersistentFrom, PersistentTo);
751+
// Every range retrieved after truncation potentialy has garbage values.
752+
// So, we have to unite every next range with the previouses.
753+
Result = unite(Result, Dummy);
754+
}
755+
756+
return Result;
757+
}
758+
759+
// Divide the convertion into two phases (presented as loops here).
760+
// First phase(loop) works when casted values go in ascending order.
761+
// E.g. char{1,3,5,127} -> uint{1,3,5,127}
762+
// Interrupt the first phase and go to second one when casted values start
763+
// go in descending order. That means that we crossed over the middle of
764+
// the type value set (aka 0 for signeds and MAX/2+1 for unsigneds).
765+
// For instance:
766+
// 1: uchar{1,3,5,128,255} -> char{1,3,5,-128,-1}
767+
// Here we put {1,3,5} to one array and {-128, -1} to another
768+
// 2: char{-128,-127,-1,0,1,2} -> uchar{128,129,255,0,1,3}
769+
// Here we put {128,129,255} to one array and {0,1,3} to another.
770+
// After that we unite both arrays.
771+
// NOTE: We don't just concatenate the arrays, because they may have
772+
// adjacent ranges, e.g.:
773+
// 1: char(-128, 127) -> uchar -> arr1(128, 255), arr2(0, 127) ->
774+
// unite -> uchar(0, 255)
775+
// 2: uchar(0, 1)U(254, 255) -> char -> arr1(0, 1), arr2(-2, -1) ->
776+
// unite -> uchar(-2, 1)
777+
RangeSet::ContainerType RangeSet::Factory::convertTo(RangeSet What,
778+
APSIntType Ty) {
779+
using llvm::APInt;
780+
using llvm::APSInt;
781+
using Bounds = std::pair<const APSInt &, const APSInt &>;
782+
ContainerType AscendArray;
783+
ContainerType DescendArray;
784+
auto CastRange = [Ty, &VF = ValueFactory](const Range &R) -> Bounds {
785+
// Get bounds of the given range.
786+
APSInt FromInt = R.From();
787+
APSInt ToInt = R.To();
788+
// Cast the bounds.
789+
Ty.apply(FromInt);
790+
Ty.apply(ToInt);
791+
return {VF.getValue(FromInt), VF.getValue(ToInt)};
792+
};
793+
// Phase 1. Fill the first array.
794+
APSInt LastConvertedInt = Ty.getMinValue();
795+
const auto *It = What.begin();
796+
const auto *E = What.end();
797+
while (It != E) {
798+
Bounds NewBounds = CastRange(*(It++));
799+
// If values stop going acsending order, go to the second phase(loop).
800+
if (NewBounds.first < LastConvertedInt) {
801+
DescendArray.emplace_back(NewBounds.first, NewBounds.second);
802+
break;
803+
}
804+
// If the range contains a midpoint, then split the range.
805+
// E.g. char(-5, 5) -> uchar(251, 5)
806+
// Here we shall add a range (251, 255) to the first array and (0, 5) to the
807+
// second one.
808+
if (NewBounds.first > NewBounds.second) {
809+
DescendArray.emplace_back(ValueFactory.getMinValue(Ty), NewBounds.second);
810+
AscendArray.emplace_back(NewBounds.first, ValueFactory.getMaxValue(Ty));
811+
} else
812+
// Values are going acsending order.
813+
AscendArray.emplace_back(NewBounds.first, NewBounds.second);
814+
LastConvertedInt = NewBounds.first;
815+
}
816+
// Phase 2. Fill the second array.
817+
while (It != E) {
818+
Bounds NewBounds = CastRange(*(It++));
819+
DescendArray.emplace_back(NewBounds.first, NewBounds.second);
820+
}
821+
// Unite both arrays.
822+
return unite(AscendArray, DescendArray);
823+
}
824+
825+
/// Promotion from unsigneds to signeds/unsigneds left.
826+
RangeSet::ContainerType RangeSet::Factory::promoteTo(RangeSet What,
827+
APSIntType Ty) {
828+
ContainerType Result;
829+
// We definitely know the size of the result set.
830+
Result.reserve(What.size());
831+
832+
// Each unsigned value fits every larger type without any changes,
833+
// whether the larger type is signed or unsigned. So just promote and push
834+
// back each range one by one.
835+
for (const Range &R : What) {
836+
// Get bounds of the given range.
837+
llvm::APSInt FromInt = R.From();
838+
llvm::APSInt ToInt = R.To();
839+
// Cast the bounds.
840+
Ty.apply(FromInt);
841+
Ty.apply(ToInt);
842+
Result.emplace_back(ValueFactory.getValue(FromInt),
843+
ValueFactory.getValue(ToInt));
844+
}
845+
return Result;
846+
}
847+
658848
RangeSet RangeSet::Factory::deletePoint(RangeSet From,
659849
const llvm::APSInt &Point) {
660850
if (!From.contains(Point))

0 commit comments

Comments
 (0)