From 8fb477e899611028f5bc11501cda9694c852a763 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20K=C3=A9ri?= Date: Wed, 2 Jul 2025 11:09:58 +0200 Subject: [PATCH] [clang][analyzer] Add checker 'unix.cstring.MissingTerminatingZero' --- clang/docs/analyzer/checkers.rst | 52 +++ .../clang/StaticAnalyzer/Checkers/Checkers.td | 28 ++ .../StaticAnalyzer/Checkers/CMakeLists.txt | 1 + .../MissingTerminatingZeroChecker.cpp | 295 ++++++++++++++++++ clang/test/Analysis/analyzer-config.c | 2 + .../test/Analysis/analyzer-enabled-checkers.c | 1 + .../cstring-missingterminatingzero-ignore.c | 27 ++ .../Analysis/cstring-missingterminatingzero.c | 91 ++++++ ...c-library-functions-arg-enabled-checkers.c | 1 + 9 files changed, 498 insertions(+) create mode 100644 clang/lib/StaticAnalyzer/Checkers/MissingTerminatingZeroChecker.cpp create mode 100644 clang/test/Analysis/cstring-missingterminatingzero-ignore.c create mode 100644 clang/test/Analysis/cstring-missingterminatingzero.c diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst index 26c5028e04955..16265a9742f16 100644 --- a/clang/docs/analyzer/checkers.rst +++ b/clang/docs/analyzer/checkers.rst @@ -2098,6 +2098,58 @@ Check the size argument passed into C string functions for common erroneous patt // warn: potential buffer overflow } +.. _unix-cstring-MissingTerminatingZero: + +unix.cstring.MissingTerminatingZero (C) +""""""""""""""""""""""""""""""""""""""" +Check for string arguments passed to C library functions where the terminating +zero is missing. + +The checker can only follow initializations with constant values and assignment +of constant values to string elements. + +.. code-block:: c + + int test1() { + char buf[4] = {1, 2, 3, 4}; + return strlen(buf); // warn + } + + int test2() { + char buf[] = "abcd"; + buf[4] = 'e'; + return strlen(buf); // warn + } + + int test3() { + char buf[4]; + buf[3] = 100; + return strlen(buf + 3); // warn + } + +**Options** + +By default the checker assumes that any parameter of type ``const char *`` to a +global C system function should be a null-terminated string. Additionally there +is a list of exceptions which are identified by the function name and parameter +index. This list is called "ignore list" and contains these default values: +(``stpncpy``, 1), (``strncat``, 1), (``strncmp``, 0), (``strncmp``, 1), +(``strncpy``, 1), (``strndup``, 0), (``strnlen``, 0) +These functions are ignored because they have a length parameter and can work +with non-null terminated strings. The list can be changed by the following +options: + +* ``OmitDefaultIgnoreFunctions`` (boolean). If true, the default ignore list is + cleared. (Independently of ``IgnoreFunctionArgs`` contains values or not.) + +* ``IgnoreFunctionArgs`` (string). Can be used to add functions to the ignore + list. It should contain entries in form of " " + separated by comma. These values are added to the ignore list. For example + ``strlen 0, strcpy 0, strcpy 1`` adds ``strlen`` and ``strcpy`` (both + parameters) to the ignore list. A function name can be used at most 2 times + (with different parameter values). Default value of the option is an empty + string. + .. _unix-cstring-NotNullTerminated: unix.cstring.NotNullTerminated (C) diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td index 2a96df80d1001..31a67c2992edf 100644 --- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td +++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td @@ -480,6 +480,34 @@ def CStringSyntaxChecker : Checker<"BadSizeArg">, Dependencies<[CStringModeling]>, Documentation; +def MissingTerminatingZeroChecker + : Checker<"MissingTerminatingZero">, + HelpText< + "Check for string arguments passed to C library functions where the " + "terminating zero is missing">, + CheckerOptions< + [CmdLineOption< + Boolean, "OmitDefaultIgnoreFunctions", + "The checker checks by default all 'const char *' arguments " + "of system library C functions, except for a built-in list " + "of functions. If this parameter is set to true, this ignore " + "list is not used, but functions can be still specified by " + "the 'IgnoreFunctionArgs' option.", + "false", Released>, + CmdLineOption< + String, "IgnoreFunctionArgs", + "Specifies a list of functions to ignore by the checker. This " + "list should contain comma-separated values in the format " + "' ' where is the function name and " + " is the zero-based index of the parameter (with " + "'const char *' type). The same function can be specified " + "with different parameters at most 2 times. These functions " + "are added to the default ignore list, unless " + "'OmitDefaultIgnoreFunctions' is true.", + "", Released>, +]>, + Documentation; + } // end "unix.cstring" let ParentPackage = CStringAlpha in { diff --git a/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt b/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt index 22dd3f0374849..d4960431402c2 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt +++ b/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt @@ -64,6 +64,7 @@ add_clang_library(clangStaticAnalyzerCheckers MallocChecker.cpp MallocSizeofChecker.cpp MismatchedIteratorChecker.cpp + MissingTerminatingZeroChecker.cpp MmapWriteExecChecker.cpp MIGChecker.cpp MoveChecker.cpp diff --git a/clang/lib/StaticAnalyzer/Checkers/MissingTerminatingZeroChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MissingTerminatingZeroChecker.cpp new file mode 100644 index 0000000000000..292f0c809ac1a --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/MissingTerminatingZeroChecker.cpp @@ -0,0 +1,295 @@ +//=== MissingTerminatingZeroChecker.cpp -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Check for string arguments passed to C library functions where the +// terminating zero is missing. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h" +#include "llvm/ADT/BitVector.h" +#include + +using namespace clang; +using namespace ento; + +namespace { + +struct StringData { + const MemRegion *StrRegion; + int64_t StrLength; + unsigned int Offset; + const llvm::BitVector *NonNullData; +}; + +class MissingTerminatingZeroChecker + : public Checker { +public: + void checkBind(SVal L, SVal V, const Stmt *S, CheckerContext &C) const; + void checkPreCall(const CallEvent &Call, CheckerContext &C) const; + + void initOptions(bool NoDefaultIgnore, StringRef IgnoreList); + +private: + const BugType BT{this, "Missing terminating zero"}; + + using IgnoreEntry = std::pair; + /// Functions (identified by name only) to ignore. + /// The entry stores a parameter index, or -1. + llvm::StringMap FunctionsToIgnore = { + {"stpncpy", {1, -1}}, {"strncat", {1, -1}}, {"strncmp", {0, 1}}, + {"strncpy", {1, -1}}, {"strndup", {0, -1}}, {"strnlen", {0, -1}}, + }; + + bool checkArg(unsigned int ArgI, CheckerContext &C, + const CallEvent &Call) const; + bool getStringData(StringData &DataOut, ProgramStateRef State, + SValBuilder &SVB, const MemRegion *StrReg) const; + ProgramStateRef setStringData(ProgramStateRef State, Loc L, + const llvm::BitVector &NonNullData) const; + void reportBug(ExplodedNode *N, const Expr *E, CheckerContext &C, + const char Msg[]) const; +}; + +} // namespace + +namespace llvm { +template <> struct FoldingSetTrait { + static inline void Profile(llvm::BitVector X, FoldingSetNodeID &ID) { + ID.AddInteger(X.size()); + for (unsigned int I = 0; I < X.size(); ++I) + ID.AddBoolean(X[I]); + } +}; +} // end namespace llvm + +// Contains for a "string" (character array) region if elements are known to be +// non-zero. The bit vector is indexed by the array element index and is true +// if the element is known to be non-zero. Size of the vector does not +// correspond to the extent of the memory region (can be smaller), the missing +// elements are considered to be false. +// (A value of 'false' means that the string element is zero or unknown.) +REGISTER_MAP_WITH_PROGRAMSTATE(NonNullnessData, const MemRegion *, + llvm::BitVector) + +void MissingTerminatingZeroChecker::checkBind(SVal L, SVal V, const Stmt *S, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + const MemRegion *MR = L.getAsRegion(); + + if (const ElementRegion *ER = dyn_cast_or_null(MR)) { + // Assign value to the index of an array. + // Check for applicable array type. + QualType ElemType = ER->getValueType().getCanonicalType(); + if (!ElemType->isCharType()) + return; + if (C.getASTContext().getTypeSizeInChars(ElemType).getQuantity() != 1) + return; + + RegionRawOffset ROffset = ER->getAsArrayOffset(); + unsigned int Index = ROffset.getOffset().getQuantity(); + + // If the checker has data about the value to bind, use this information. + // Otherwise try to get it from the analyzer. + auto GetKnownToBeNonNull = [this, State, &C](SVal V) -> ConditionTruthVal { + StringData ExistingSData; + if (getStringData(ExistingSData, State, C.getSValBuilder(), + V.getAsRegion()) && + ExistingSData.Offset < ExistingSData.NonNullData->size()) { + return ExistingSData.NonNullData->test(ExistingSData.Offset); + } else { + return State->isNonNull(V); + } + }; + ConditionTruthVal VKnownToBeNonNull = GetKnownToBeNonNull(V); + + if (const llvm::BitVector *NNData = + State->get(ROffset.getRegion())) { + // Update existing data. + unsigned int NewSize = + Index < NNData->size() ? NNData->size() : Index + 1; + // Only extend the vector with 'true' value. + if (NewSize > NNData->size() && !VKnownToBeNonNull.isConstrainedTrue()) + return; + llvm::BitVector NNData1(NewSize); + NNData1 |= *NNData; + NNData1[Index] = VKnownToBeNonNull.isConstrainedTrue(); + State = State->set(ROffset.getRegion(), NNData1); + } else { + // Only add new data if 'true' is found. + if (!VKnownToBeNonNull.isConstrainedTrue()) + return; + llvm::BitVector NNData1(Index + 1); + NNData1[Index] = true; + State = State->set(ROffset.getRegion(), NNData1); + } + } else if (const TypedValueRegion *TR = + dyn_cast_or_null(MR)) { + // Initialize a region with compound value from list or string literal. + QualType Type = TR->getValueType().getCanonicalType(); + if (!Type->isArrayType()) + return; + if (!Type->castAsArrayTypeUnsafe()->getElementType()->isCharType()) + return; + + if (auto CVal = V.getAs()) { + llvm::BitVector NNData; + for (auto Val = CVal->begin(); Val != CVal->end(); ++Val) + NNData.push_back(State->isNonNull(*Val).isConstrainedTrue()); + State = State->set(MR, NNData); + } else if (auto MRV = V.getAs()) { + if (auto *StrReg = MRV->stripCasts()->getAs()) { + StringRef Str = StrReg->getStringLiteral()->getString(); + size_t StrL = Str.size(); + llvm::BitVector NNData(StrL + 1, true); + for (unsigned int I = 0; I < StrL; ++I) + if (Str[I] == 0) + NNData.reset(I); + NNData.reset(StrL); + State = State->set(MR, NNData); + } + } + } + C.addTransition(State); +} + +void MissingTerminatingZeroChecker::checkPreCall(const CallEvent &Call, + CheckerContext &C) const { + if (!Call.isInSystemHeader() || !Call.isGlobalCFunction()) + return; + + auto Ignore = [this](StringRef Name) -> IgnoreEntry { + auto FIgnore = FunctionsToIgnore.find(Name); + if (FIgnore != FunctionsToIgnore.end()) + return FIgnore->getValue(); + return IgnoreEntry{-1, -1}; + }(cast(Call.getDecl())->getNameAsString()); + + for (const auto &[ArgI, ParmD] : enumerate(Call.parameters())) { + QualType ArgTy = ParmD->getType(); + if (!ArgTy->isPointerType()) + continue; + QualType ArgPointeeTy = ArgTy->getPointeeType(); + if (!ArgPointeeTy->isCharType() || !ArgPointeeTy.isConstQualified()) + continue; + if (static_cast(ArgI) == Ignore.first || + static_cast(ArgI) == Ignore.second) + continue; + + if (checkArg(ArgI, C, Call)) + return; + } +} + +bool MissingTerminatingZeroChecker::checkArg(unsigned int ArgI, + CheckerContext &C, + const CallEvent &Call) const { + SVal StrArgVal = Call.getArgSVal(ArgI); + + StringData SData; + if (!getStringData(SData, C.getState(), C.getSValBuilder(), + StrArgVal.getAsRegion())) + return false; + + // Check if all elements in the string are known to be non-zero. + // The stored bitvector can have a smaller size. + if (SData.NonNullData->size() < SData.StrLength) + return false; + for (int64_t I = SData.Offset; I < SData.StrLength; ++I) + if (!SData.NonNullData->test(I)) + return false; + + reportBug(C.getPredecessor(), Call.getArgExpr(ArgI), C, + "String contains no terminating zero; At this place a " + "null-terminated string is expected"); + return true; +} + +bool MissingTerminatingZeroChecker::getStringData( + StringData &DataOut, ProgramStateRef State, SValBuilder &SVB, + const MemRegion *StrReg) const { + if (!StrReg) + return false; + + unsigned int Offset = 0; + if (const auto *ElemReg = dyn_cast_or_null(StrReg)) { + RegionRawOffset ROffset = ElemReg->getAsArrayOffset(); + StrReg = ROffset.getRegion(); + if (!StrReg) + return false; + Offset = ROffset.getOffset().getQuantity(); + } + + const llvm::BitVector *NNData = State->get(StrReg); + if (!NNData) + return false; + + DefinedOrUnknownSVal Extent = getDynamicExtent(State, StrReg, SVB); + if (Extent.isUnknown()) + return false; + const llvm::APSInt *KnownExtent = SVB.getKnownValue(State, Extent); + if (!KnownExtent) + return false; + + DataOut.StrRegion = StrReg; + DataOut.StrLength = KnownExtent->getExtValue(); + DataOut.Offset = Offset; + DataOut.NonNullData = NNData; + + return true; +} + +void MissingTerminatingZeroChecker::reportBug(ExplodedNode *N, const Expr *E, + CheckerContext &C, + const char Msg[]) const { + auto R = std::make_unique(BT, Msg, N); + bugreporter::trackExpressionValue(N, E, *R); + C.emitReport(std::move(R)); +} + +void MissingTerminatingZeroChecker::initOptions(bool NoDefaultIgnore, + StringRef IgnoreList) { + if (NoDefaultIgnore) + FunctionsToIgnore.clear(); + std::istringstream IgnoreInput{std::string(IgnoreList)}; + std::array I; + while (IgnoreInput.getline(&I[0], 100, ';')) { + std::istringstream FunctionInput{std::string(&I[0])}; + std::string FName; + int FArg; + if (FunctionInput >> FName >> FArg) { + IgnoreEntry &E = + FunctionsToIgnore.insert({FName, {-1, -1}}).first->getValue(); + if (E.first == -1) + E.first = FArg; + else if (E.second == -1) + E.second = FArg; + } + } +} + +void ento::registerMissingTerminatingZeroChecker(CheckerManager &Mgr) { + auto *Checker = Mgr.registerChecker(); + bool NoDefaultIgnore = Mgr.getAnalyzerOptions().getCheckerBooleanOption( + Checker, "OmitDefaultIgnoreFunctions"); + StringRef IgnoreList = Mgr.getAnalyzerOptions().getCheckerStringOption( + Checker, "IgnoreFunctionArgs"); + Checker->initOptions(NoDefaultIgnore, IgnoreList); +} + +bool ento::shouldRegisterMissingTerminatingZeroChecker( + const CheckerManager &Mgr) { + // Check if the char type has size of 8 bits (to avoid indexing differences)? + return true; +} diff --git a/clang/test/Analysis/analyzer-config.c b/clang/test/Analysis/analyzer-config.c index 7936273415ad4..d477987e51642 100644 --- a/clang/test/Analysis/analyzer-config.c +++ b/clang/test/Analysis/analyzer-config.c @@ -137,6 +137,8 @@ // CHECK-NEXT: unix.StdCLibraryFunctions:DisplayLoadedSummaries = false // CHECK-NEXT: unix.StdCLibraryFunctions:ModelPOSIX = true // CHECK-NEXT: unix.Stream:Pedantic = false +// CHECK-NEXT: unix.cstring.MissingTerminatingZero:IgnoreFunctionArgs = "" +// CHECK-NEXT: unix.cstring.MissingTerminatingZero:OmitDefaultIgnoreFunctions = false // CHECK-NEXT: unroll-loops = false // CHECK-NEXT: verbose-report-filename = false // CHECK-NEXT: widen-loops = false diff --git a/clang/test/Analysis/analyzer-enabled-checkers.c b/clang/test/Analysis/analyzer-enabled-checkers.c index 66b9be9795f12..127bd3298c76c 100644 --- a/clang/test/Analysis/analyzer-enabled-checkers.c +++ b/clang/test/Analysis/analyzer-enabled-checkers.c @@ -57,6 +57,7 @@ // CHECK-NEXT: unix.StdCLibraryFunctions // CHECK-NEXT: unix.Vfork // CHECK-NEXT: unix.cstring.BadSizeArg +// CHECK-NEXT: unix.cstring.MissingTerminatingZero // CHECK-NEXT: unix.cstring.NotNullTerminated // CHECK-NEXT: unix.cstring.NullArg diff --git a/clang/test/Analysis/cstring-missingterminatingzero-ignore.c b/clang/test/Analysis/cstring-missingterminatingzero-ignore.c new file mode 100644 index 0000000000000..8b96bf81c55cd --- /dev/null +++ b/clang/test/Analysis/cstring-missingterminatingzero-ignore.c @@ -0,0 +1,27 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=unix.cstring.MissingTerminatingZero \ +// RUN: -analyzer-config unix.cstring.MissingTerminatingZero:IgnoreFunctionArgs='strlen 0;strcpy 1' -verify=all,ignore %s +// RUN: %clang_analyze_cc1 -analyzer-checker=unix.cstring.MissingTerminatingZero \ +// RUN: -analyzer-config unix.cstring.MissingTerminatingZero:IgnoreFunctionArgs='strlen 0;strcpy 1' \ +// RUN: -analyzer-config unix.cstring.MissingTerminatingZero:OmitDefaultIgnoreFunctions=true -verify=all,omitdefault %s + +#include "Inputs/system-header-simulator.h" + +size_t test1(int i) { + char buf[1] = {1}; + return strlen(buf); +} + +void test2(char *dst) { + char src[1] = {1}; + strcpy(dst, src); +} + +int test3() { + const char buf[1] = {1}; + return execl("path", buf, 4); // all-warning{{String contains no terminating zero}} +} + +void test4(char *dst) { + char src[3] = {1, 2, 3}; + strncpy(dst, src, 3); // omitdefault-warning{{String contains no terminating zero}} +} diff --git a/clang/test/Analysis/cstring-missingterminatingzero.c b/clang/test/Analysis/cstring-missingterminatingzero.c new file mode 100644 index 0000000000000..d0ae6cb19db87 --- /dev/null +++ b/clang/test/Analysis/cstring-missingterminatingzero.c @@ -0,0 +1,91 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=unix.cstring.MissingTerminatingZero -verify %s + +#include "Inputs/system-header-simulator.h" + +void clang_analyzer_eval(int); + +size_t test_init_compound(int i) { + char src1[6] = {1,2,3,4,5,6}; + char src2[6] = {1,2,3,0,5,6}; + switch (i) { + case 1: + return strlen(src1); // expected-warning{{String contains no terminating zero}} + case 2: + return strlen(src1 + 1); // expected-warning{{String contains no terminating zero}} + case 3: + return strlen(src2); + case 4: + return strlen(src2 + 4); // expected-warning{{String contains no terminating zero}} + case 5: + return strlen(src2 + 3); + } + src1[5] = 0; + return strlen(src1); +} + +typedef char CHAR; + +size_t test_init_literal(int i) { + CHAR src1[] = "abcdef"; + int l = strlen(src1); + src1[6] = '.'; + src1[3] = 0; + switch (i) { + case 1: + return strlen(src1); + case 2: + return strlen(src1 + 4); // expected-warning{{String contains no terminating zero}} + } + return l; +} + +size_t test_init_assign(int i, char a) { + char src[6]; + src[1] = '1'; + src[2] = '2'; + src[4] = '4'; + src[5] = '5'; + + switch (i) { + case 0: + return strlen(src); + case 1: + return strlen(src + 1); + case 2: + return strlen(src + 2); + case 3: + return strlen(src + 3); + case 4: + return strlen(src + 4); // expected-warning{{String contains no terminating zero}} + } + src[5] = a; + size_t l = strlen(src + 4); + src[5] = 0; + l += strlen(src + 4); + src[5] = '5'; + return l + strlen(src + 4); // expected-warning{{String contains no terminating zero}} +} + +size_t test_assign1() { + char str1[5] = {'0','1','2','3','4'}; + char str2[5]; + str2[0] = str1[0]; + str2[1] = str1[1]; + str2[4] = str1[4]; + size_t l = strlen(str2); + return l + strlen(str2 + 4); // expected-warning{{String contains no terminating zero}} +} + +size_t test_assign2() { + char str1[5] = {1,2,3,4,5}; + char str2[5]; + str2[0] = str1[0]; + str2[4] = str2[0]; + return strlen(str2 + 4); // expected-warning{{String contains no terminating zero}} +} + +void test_ignore(char *dst) { + char str1[5] = {1,2,3,4,5}; + strncpy(dst, str1, 5); + strcpy(dst, str1); // expected-warning{{String contains no terminating zero}} +} diff --git a/clang/test/Analysis/std-c-library-functions-arg-enabled-checkers.c b/clang/test/Analysis/std-c-library-functions-arg-enabled-checkers.c index 8c6078a49c231..958d4d9298a1b 100644 --- a/clang/test/Analysis/std-c-library-functions-arg-enabled-checkers.c +++ b/clang/test/Analysis/std-c-library-functions-arg-enabled-checkers.c @@ -65,6 +65,7 @@ // CHECK-NEXT: unix.StdCLibraryFunctions // CHECK-NEXT: unix.Vfork // CHECK-NEXT: unix.cstring.BadSizeArg +// CHECK-NEXT: unix.cstring.MissingTerminatingZero // CHECK-NEXT: unix.cstring.NotNullTerminated // CHECK-NEXT: unix.cstring.NullArg