|
| 1 | +//===--- NoRecursionCheck.cpp - clang-tidy --------------------------------===// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +//===----------------------------------------------------------------------===// |
| 8 | + |
| 9 | +#include "NoRecursionCheck.h" |
| 10 | +#include "clang/AST/ASTContext.h" |
| 11 | +#include "clang/ASTMatchers/ASTMatchFinder.h" |
| 12 | +#include "clang/Analysis/CallGraph.h" |
| 13 | +#include "llvm/ADT/DenseMapInfo.h" |
| 14 | +#include "llvm/ADT/SCCIterator.h" |
| 15 | + |
| 16 | +using namespace clang::ast_matchers; |
| 17 | + |
| 18 | +namespace clang { |
| 19 | +namespace tidy { |
| 20 | +namespace misc { |
| 21 | + |
| 22 | +namespace { |
| 23 | + |
| 24 | +/// Much like SmallSet, with two differences: |
| 25 | +/// 1. It can *only* be constructed from an ArrayRef<>. If the element count |
| 26 | +/// is small, there is no copy and said storage *must* outlive us. |
| 27 | +/// 2. it is immutable, the way it was constructed it will stay. |
| 28 | +template <typename T, unsigned SmallSize> class ImmutableSmallSet { |
| 29 | + ArrayRef<T> Vector; |
| 30 | + llvm::DenseSet<T> Set; |
| 31 | + |
| 32 | + static_assert(SmallSize <= 32, "N should be small"); |
| 33 | + |
| 34 | + bool isSmall() const { return Set.empty(); } |
| 35 | + |
| 36 | +public: |
| 37 | + using size_type = size_t; |
| 38 | + |
| 39 | + ImmutableSmallSet() = delete; |
| 40 | + ImmutableSmallSet(const ImmutableSmallSet &) = delete; |
| 41 | + ImmutableSmallSet(ImmutableSmallSet &&) = delete; |
| 42 | + T &operator=(const ImmutableSmallSet &) = delete; |
| 43 | + T &operator=(ImmutableSmallSet &&) = delete; |
| 44 | + |
| 45 | + // WARNING: Storage *must* outlive us if we decide that the size is small. |
| 46 | + ImmutableSmallSet(ArrayRef<T> Storage) { |
| 47 | + // Is size small-enough to just keep using the existing storage? |
| 48 | + if (Storage.size() <= SmallSize) { |
| 49 | + Vector = Storage; |
| 50 | + return; |
| 51 | + } |
| 52 | + |
| 53 | + // We've decided that it isn't performant to keep using vector. |
| 54 | + // Let's migrate the data into Set. |
| 55 | + Set.reserve(Storage.size()); |
| 56 | + Set.insert(Storage.begin(), Storage.end()); |
| 57 | + } |
| 58 | + |
| 59 | + /// count - Return 1 if the element is in the set, 0 otherwise. |
| 60 | + size_type count(const T &V) const { |
| 61 | + if (isSmall()) { |
| 62 | + // Since the collection is small, just do a linear search. |
| 63 | + return llvm::find(Vector, V) == Vector.end() ? 0 : 1; |
| 64 | + } |
| 65 | + |
| 66 | + return Set.count(V); |
| 67 | + } |
| 68 | +}; |
| 69 | + |
| 70 | +/// Much like SmallSetVector, but with one difference: |
| 71 | +/// when the size is \p SmallSize or less, when checking whether an element is |
| 72 | +/// already in the set or not, we perform linear search over the vector, |
| 73 | +/// but if the size is larger than \p SmallSize, we look in set. |
| 74 | +/// FIXME: upstream this into SetVector/SmallSetVector itself. |
| 75 | +template <typename T, unsigned SmallSize> class SmartSmallSetVector { |
| 76 | +public: |
| 77 | + using size_type = size_t; |
| 78 | + |
| 79 | +private: |
| 80 | + SmallVector<T, SmallSize> Vector; |
| 81 | + llvm::DenseSet<T> Set; |
| 82 | + |
| 83 | + static_assert(SmallSize <= 32, "N should be small"); |
| 84 | + |
| 85 | + // Are we still using Vector for uniqness tracking? |
| 86 | + bool isSmall() const { return Set.empty(); } |
| 87 | + |
| 88 | + // Will one more entry cause Vector to switch away from small-size storage? |
| 89 | + bool entiretyOfVectorSmallSizeIsOccupied() const { |
| 90 | + assert(isSmall() && Vector.size() <= SmallSize && |
| 91 | + "Shouldn't ask if we have already [should have] migrated into Set."); |
| 92 | + return Vector.size() == SmallSize; |
| 93 | + } |
| 94 | + |
| 95 | + void populateSet() { |
| 96 | + assert(Set.empty() && "Should not have already utilized the Set."); |
| 97 | + // Magical growth factor prediction - to how many elements do we expect to |
| 98 | + // sanely grow after switching away from small-size storage? |
| 99 | + const size_t NewMaxElts = 4 * Vector.size(); |
| 100 | + Vector.reserve(NewMaxElts); |
| 101 | + Set.reserve(NewMaxElts); |
| 102 | + Set.insert(Vector.begin(), Vector.end()); |
| 103 | + } |
| 104 | + |
| 105 | + /// count - Return 1 if the element is in the set, 0 otherwise. |
| 106 | + size_type count(const T &V) const { |
| 107 | + if (isSmall()) { |
| 108 | + // Since the collection is small, just do a linear search. |
| 109 | + return llvm::find(Vector, V) == Vector.end() ? 0 : 1; |
| 110 | + } |
| 111 | + // Look-up in the Set. |
| 112 | + return Set.count(V); |
| 113 | + } |
| 114 | + |
| 115 | + bool setInsert(const T &V) { |
| 116 | + if (count(V) != 0) |
| 117 | + return false; // Already exists. |
| 118 | + // Does not exist, Can/need to record it. |
| 119 | + if (isSmall()) { // Are we still using Vector for uniqness tracking? |
| 120 | + // Will one more entry fit within small-sized Vector? |
| 121 | + if (!entiretyOfVectorSmallSizeIsOccupied()) |
| 122 | + return true; // We'll insert into vector right afterwards anyway. |
| 123 | + // Time to switch to Set. |
| 124 | + populateSet(); |
| 125 | + } |
| 126 | + // Set time! |
| 127 | + // Note that this must be after `populateSet()` might have been called. |
| 128 | + bool SetInsertionSucceeded = Set.insert(V).second; |
| 129 | + (void)SetInsertionSucceeded; |
| 130 | + assert(SetInsertionSucceeded && "We did check that no such value existed"); |
| 131 | + return true; |
| 132 | + } |
| 133 | + |
| 134 | +public: |
| 135 | + /// Insert a new element into the SmartSmallSetVector. |
| 136 | + /// \returns true if the element was inserted into the SmartSmallSetVector. |
| 137 | + bool insert(const T &X) { |
| 138 | + bool result = setInsert(X); |
| 139 | + if (result) |
| 140 | + Vector.push_back(X); |
| 141 | + return result; |
| 142 | + } |
| 143 | + |
| 144 | + /// Clear the SmartSmallSetVector and return the underlying vector. |
| 145 | + decltype(Vector) takeVector() { |
| 146 | + Set.clear(); |
| 147 | + return std::move(Vector); |
| 148 | + } |
| 149 | +}; |
| 150 | + |
| 151 | +constexpr unsigned SmallCallStackSize = 16; |
| 152 | +constexpr unsigned SmallSCCSize = 32; |
| 153 | + |
| 154 | +using CallStackTy = |
| 155 | + llvm::SmallVector<CallGraphNode::CallRecord, SmallCallStackSize>; |
| 156 | + |
| 157 | +// In given SCC, find *some* call stack that will be cyclic. |
| 158 | +// This will only find *one* such stack, it might not be the smallest one, |
| 159 | +// and there may be other loops. |
| 160 | +CallStackTy PathfindSomeCycle(ArrayRef<CallGraphNode *> SCC) { |
| 161 | + // We'll need to be able to performantly look up whether some CallGraphNode |
| 162 | + // is in SCC or not, so cache all the SCC elements in a set. |
| 163 | + const ImmutableSmallSet<CallGraphNode *, SmallSCCSize> SCCElts(SCC); |
| 164 | + |
| 165 | + // Is node N part if the current SCC? |
| 166 | + auto NodeIsPartOfSCC = [&SCCElts](CallGraphNode *N) { |
| 167 | + return SCCElts.count(N) != 0; |
| 168 | + }; |
| 169 | + |
| 170 | + // Track the call stack that will cause a cycle. |
| 171 | + SmartSmallSetVector<CallGraphNode::CallRecord, SmallCallStackSize> |
| 172 | + CallStackSet; |
| 173 | + |
| 174 | + // Arbitrairly take the first element of SCC as entry point. |
| 175 | + CallGraphNode::CallRecord EntryNode(SCC.front(), /*CallExpr=*/nullptr); |
| 176 | + // Continue recursing into subsequent callees that are part of this SCC, |
| 177 | + // and are thus known to be part of the call graph loop, until loop forms. |
| 178 | + CallGraphNode::CallRecord *Node = &EntryNode; |
| 179 | + while (true) { |
| 180 | + // Did we see this node before? |
| 181 | + if (!CallStackSet.insert(*Node)) |
| 182 | + break; // Cycle completed! Note that didn't insert the node into stack! |
| 183 | + // Else, perform depth-first traversal: out of all callees, pick first one |
| 184 | + // that is part of this SCC. This is not guaranteed to yield shortest cycle. |
| 185 | + Node = llvm::find_if(Node->Callee->callees(), NodeIsPartOfSCC); |
| 186 | + } |
| 187 | + |
| 188 | + // Note that we failed to insert the last node, that completes the cycle. |
| 189 | + // But we really want to have it. So insert it manually into stack only. |
| 190 | + CallStackTy CallStack = CallStackSet.takeVector(); |
| 191 | + CallStack.emplace_back(*Node); |
| 192 | + |
| 193 | + return CallStack; |
| 194 | +} |
| 195 | + |
| 196 | +} // namespace |
| 197 | + |
| 198 | +void NoRecursionCheck::registerMatchers(MatchFinder *Finder) { |
| 199 | + Finder->addMatcher(translationUnitDecl().bind("TUDecl"), this); |
| 200 | +} |
| 201 | + |
| 202 | +void NoRecursionCheck::handleSCC(ArrayRef<CallGraphNode *> SCC) { |
| 203 | + assert(!SCC.empty() && "Empty SCC does not make sense."); |
| 204 | + |
| 205 | + // First of all, call out every stongly connected function. |
| 206 | + for (CallGraphNode *N : SCC) { |
| 207 | + Decl *D = N->getDecl(); |
| 208 | + diag(D->getLocation(), "function %0 is within a recursive call chain") |
| 209 | + << cast<NamedDecl>(D); |
| 210 | + } |
| 211 | + |
| 212 | + // Now, SCC only tells us about strongly connected function declarations in |
| 213 | + // the call graph. It doesn't *really* tell us about the cycles they form. |
| 214 | + // And there may be more than one cycle in SCC. |
| 215 | + // So let's form a call stack that eventually exposes *some* cycle. |
| 216 | + const CallStackTy EventuallyCyclicCallStack = PathfindSomeCycle(SCC); |
| 217 | + assert(!EventuallyCyclicCallStack.empty() && "We should've found the cycle"); |
| 218 | + |
| 219 | + // While last node of the call stack does cause a loop, due to the way we |
| 220 | + // pathfind the cycle, the loop does not nessesairly begin at the first node |
| 221 | + // of the call stack, so drop front nodes of the call stack until it does. |
| 222 | + const auto CyclicCallStack = |
| 223 | + ArrayRef<CallGraphNode::CallRecord>(EventuallyCyclicCallStack) |
| 224 | + .drop_until([LastNode = EventuallyCyclicCallStack.back()]( |
| 225 | + CallGraphNode::CallRecord FrontNode) { |
| 226 | + return FrontNode == LastNode; |
| 227 | + }); |
| 228 | + assert(CyclicCallStack.size() >= 2 && "Cycle requires at least 2 frames"); |
| 229 | + |
| 230 | + // Which function we decided to be the entry point that lead to the recursion? |
| 231 | + Decl *CycleEntryFn = CyclicCallStack.front().Callee->getDecl(); |
| 232 | + // And now, for ease of understanding, let's print the call sequence that |
| 233 | + // forms the cycle in question. |
| 234 | + diag(CycleEntryFn->getLocation(), |
| 235 | + "example recursive call chain, starting from function %0", |
| 236 | + DiagnosticIDs::Note) |
| 237 | + << cast<NamedDecl>(CycleEntryFn); |
| 238 | + for (int CurFrame = 1, NumFrames = CyclicCallStack.size(); |
| 239 | + CurFrame != NumFrames; ++CurFrame) { |
| 240 | + CallGraphNode::CallRecord PrevNode = CyclicCallStack[CurFrame - 1]; |
| 241 | + CallGraphNode::CallRecord CurrNode = CyclicCallStack[CurFrame]; |
| 242 | + |
| 243 | + Decl *PrevDecl = PrevNode.Callee->getDecl(); |
| 244 | + Decl *CurrDecl = CurrNode.Callee->getDecl(); |
| 245 | + |
| 246 | + diag(CurrNode.CallExpr->getBeginLoc(), |
| 247 | + "Frame #%0: function %1 calls function %2 here:", DiagnosticIDs::Note) |
| 248 | + << CurFrame << cast<NamedDecl>(PrevDecl) << cast<NamedDecl>(CurrDecl); |
| 249 | + } |
| 250 | + |
| 251 | + diag(CyclicCallStack.back().CallExpr->getBeginLoc(), |
| 252 | + "... which was the starting point of the recursive call chain; there " |
| 253 | + "may be other cycles", |
| 254 | + DiagnosticIDs::Note); |
| 255 | +} |
| 256 | + |
| 257 | +void NoRecursionCheck::check(const MatchFinder::MatchResult &Result) { |
| 258 | + // Build call graph for the entire translation unit. |
| 259 | + const auto *TU = Result.Nodes.getNodeAs<TranslationUnitDecl>("TUDecl"); |
| 260 | + CallGraph CG; |
| 261 | + CG.addToCallGraph(const_cast<TranslationUnitDecl *>(TU)); |
| 262 | + |
| 263 | + // Look for cycles in call graph, |
| 264 | + // by looking for Strongly Connected Comonents (SCC's) |
| 265 | + for (llvm::scc_iterator<CallGraph *> SCCI = llvm::scc_begin(&CG), |
| 266 | + SCCE = llvm::scc_end(&CG); |
| 267 | + SCCI != SCCE; ++SCCI) { |
| 268 | + if (!SCCI.hasLoop()) // We only care about cycles, not standalone nodes. |
| 269 | + continue; |
| 270 | + handleSCC(*SCCI); |
| 271 | + } |
| 272 | +} |
| 273 | + |
| 274 | +} // namespace misc |
| 275 | +} // namespace tidy |
| 276 | +} // namespace clang |
0 commit comments