Skip to content

Commit d9a78c9

Browse files
committed
RuntimeCallsiteTrie
1 parent e04d739 commit d9a78c9

File tree

5 files changed

+286
-1
lines changed

5 files changed

+286
-1
lines changed

compiler-rt/lib/ctx_profile/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,13 @@ add_compiler_rt_component(ctx_profile)
22

33
set(CTX_PROFILE_SOURCES
44
CtxInstrProfiling.cpp
5+
RootAutoDetector.cpp
56
)
67

78
set(CTX_PROFILE_HEADERS
89
CtxInstrContextNode.h
910
CtxInstrProfiling.h
11+
RootAutoDetector.h
1012
)
1113

1214
include_directories(..)
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
//===- RootAutodetector.cpp - detect contextual profiling roots -----------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "RootAutoDetector.h"
10+
#include "CtxInstrProfiling.h"
11+
12+
#include "sanitizer_common/sanitizer_common.h"
13+
#include "sanitizer_common/sanitizer_mutex.h"
14+
#include "sanitizer_common/sanitizer_placement_new.h"
15+
#include <assert.h>
16+
#include <dlfcn.h>
17+
#include <pthread.h>
18+
19+
using namespace __ctx_profile;
20+
21+
uptr PerThreadCallsiteTrie::getFctStartAddr(uptr CallsiteAddress) const {
22+
// this requires --linkopt=-Wl,--export-dynamic
23+
Dl_info Info;
24+
if (dladdr(reinterpret_cast<const void *>(CallsiteAddress), &Info) != 0)
25+
return reinterpret_cast<uptr>(Info.dli_saddr);
26+
return 0;
27+
}
28+
29+
void PerThreadCallsiteTrie::insertStack(const StackTrace &ST) {
30+
auto *Current = &TheTrie;
31+
// the stack is backwards - the first callsite is at the top.
32+
for (int I = ST.size - 1; I >= 0; --I) {
33+
auto ChildAddr = ST.trace[I];
34+
auto [Iter, _] = Current->Children.insert({ChildAddr, Trie(ChildAddr)});
35+
++Current->Count;
36+
Current = &Iter->second;
37+
}
38+
}
39+
40+
DenseMap<uptr, uint64_t> PerThreadCallsiteTrie::determineRoots() const {
41+
// Assuming a message pump design, roots are those functions called by the
42+
// message pump. The message pump is an infinite loop (for all practical
43+
// considerations) fetching data from a queue. The root functions return -
44+
// otherwise the message pump doesn't work. This function detects roots as the
45+
// first place in the trie (starting from the root) where a function calls 2
46+
// or more functions.
47+
//
48+
// We start with a callsite trie - the nodes are callsites. Different child
49+
// nodes may actually correspond to the same function.
50+
//
51+
// For example: using function(callsite)
52+
// f1(csf1_1) -> f2(csf2_1) -> f3
53+
// -> f2(csf2_2) -> f4
54+
//
55+
// would be represented in our trie as:
56+
// csf1_1 -> csf2_1 -> f3
57+
// -> csf2_2 -> f4
58+
//
59+
// While we can assert the control flow returns to f2, we don't know if it
60+
// ever returns to f1. f2 could be the message pump.
61+
//
62+
// We need to convert our callsite tree into a function tree. We can also,
63+
// more economically, just see how many distinct functions there are at a
64+
// certain depth. When that count is greater than 1, we got to potential roots
65+
// and everything above should be considered as non-roots.
66+
DenseMap<uptr, uint64_t> Result;
67+
Set<const Trie *> Worklist;
68+
Worklist.insert({&start(), {}});
69+
70+
while (!Worklist.empty()) {
71+
Set<const Trie *> NextWorklist;
72+
DenseMap<uptr, uint64_t> Candidates;
73+
Worklist.forEach([&](auto &KVP) {
74+
auto [Node, _] = KVP;
75+
auto SA = getFctStartAddr(Node->CallsiteAddress);
76+
Candidates[SA] += Node->Count;
77+
Node->Children.forEach([&](auto &ChildKVP) {
78+
NextWorklist.insert({&ChildKVP.second, {}});
79+
return true;
80+
});
81+
return true;
82+
});
83+
if (Candidates.size() > 1) {
84+
Result.swap(Candidates);
85+
break;
86+
}
87+
Worklist.swap(NextWorklist);
88+
}
89+
return Result;
90+
}
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/*===- RootAutodetector.h- auto-detect roots for ctxprof -----------------===*\
2+
|*
3+
|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
|* See https://llvm.org/LICENSE.txt for license information.
5+
|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
|*
7+
\*===----------------------------------------------------------------------===*/
8+
9+
#ifndef CTX_PROFILE_ROOTAUTODETECTOR_H_
10+
#define CTX_PROFILE_ROOTAUTODETECTOR_H_
11+
12+
#include "sanitizer_common/sanitizer_dense_map.h"
13+
#include "sanitizer_common/sanitizer_internal_defs.h"
14+
#include "sanitizer_common/sanitizer_stacktrace.h"
15+
#include <pthread.h>
16+
#include <sanitizer/common_interface_defs.h>
17+
18+
using namespace __asan;
19+
using namespace __sanitizer;
20+
21+
namespace __ctx_profile {
22+
23+
template <typename T> using Set = DenseMap<T, bool>;
24+
25+
/// A trie. A node is the address of a callsite in a function activation. A
26+
/// child is a callsite in the activation made from the callsite corresponding
27+
/// to the parent.
28+
class Trie final {
29+
friend class PerThreadCallsiteTrie;
30+
const uptr CallsiteAddress;
31+
uint64_t Count = 0;
32+
DenseMap<uptr, Trie> Children;
33+
34+
public:
35+
uptr address() const { return CallsiteAddress; }
36+
uint64_t count() const { return Count; }
37+
const DenseMap<uptr, Trie> &children() const { return Children; }
38+
39+
Trie(uptr CallsiteAddress = 0) : CallsiteAddress(CallsiteAddress) {}
40+
};
41+
42+
/// Capture all the stack traces observed for a specific thread. The "for a
43+
/// specific thread" part is not enforced, but assumed in determineRoots.
44+
class PerThreadCallsiteTrie {
45+
Trie TheTrie;
46+
47+
protected:
48+
/// Return the runtime start address of the function that contains the call at
49+
/// the runtime address CallsiteAddress. May be overriden for easy testing.
50+
virtual uptr getFctStartAddr(uptr CallsiteAddress) const;
51+
52+
public:
53+
PerThreadCallsiteTrie(const PerThreadCallsiteTrie &) = delete;
54+
PerThreadCallsiteTrie(PerThreadCallsiteTrie &&) = default;
55+
PerThreadCallsiteTrie() = default;
56+
57+
virtual ~PerThreadCallsiteTrie() = default;
58+
59+
void insertStack(const StackTrace &ST);
60+
61+
/// Return the runtime address of root functions, as determined for this
62+
/// thread, together with the number of samples that included them.
63+
DenseMap<uptr, uint64_t> determineRoots() const;
64+
65+
const Trie &start() const { return TheTrie; }
66+
};
67+
} // namespace __ctx_profile
68+
#endif

compiler-rt/lib/ctx_profile/tests/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,12 @@ append_list_if(COMPILER_RT_HAS_WVARIADIC_MACROS_FLAG -Wno-variadic-macros CTX_PR
2222
file(GLOB CTX_PROFILE_HEADERS ../*.h)
2323

2424
set(CTX_PROFILE_SOURCES
25-
../CtxInstrProfiling.cpp)
25+
../CtxInstrProfiling.cpp
26+
../RootAutoDetector.cpp)
2627

2728
set(CTX_PROFILE_UNITTESTS
2829
CtxInstrProfilingTest.cpp
30+
RootAutoDetectorTest.cpp
2931
driver.cpp)
3032

3133
include_directories(../../../include)
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
#include "../RootAutoDetector.h"
2+
#include "sanitizer_common/sanitizer_array_ref.h"
3+
#include "gtest/gtest.h"
4+
5+
using namespace __ctx_profile;
6+
7+
class MockCallsiteTree final : public PerThreadCallsiteTrie {
8+
// Return the first multiple of 100.
9+
uptr getFctStartAddr(uptr CallsiteAddress) const override {
10+
return (CallsiteAddress / 100) * 100;
11+
}
12+
};
13+
14+
class Marker {
15+
enum class Kind { End, Value, Split };
16+
const uptr Value;
17+
const Kind K;
18+
Marker(uptr V, Kind S) : Value(V), K(S) {}
19+
20+
public:
21+
Marker(uptr V) : Marker(V, Kind::Value) {}
22+
23+
static Marker split(uptr V) { return Marker(V, Kind::Split); }
24+
static Marker term() { return Marker(0, Kind::End); }
25+
26+
bool isSplit() const { return K == Kind::Split; }
27+
bool isTerm() const { return K == Kind::End; }
28+
bool isVal() const { return K == Kind::Value; }
29+
30+
bool operator==(const Marker &M) const {
31+
return Value == M.Value && K == M.K;
32+
}
33+
};
34+
35+
void popAndCheck(ArrayRef<Marker> &Preorder, Marker M) {
36+
ASSERT_FALSE(Preorder.empty());
37+
ASSERT_EQ(Preorder[0], M);
38+
Preorder = Preorder.drop_front();
39+
}
40+
41+
void checkSameImpl(const Trie &T, ArrayRef<Marker> &Preorder) {
42+
popAndCheck(Preorder, T.address());
43+
44+
if (T.children().size() == 0) {
45+
popAndCheck(Preorder, Marker::term());
46+
return;
47+
}
48+
49+
if (T.children().size() > 1)
50+
popAndCheck(Preorder, Marker::split(T.children().size()));
51+
52+
T.children().forEach([&](const auto &KVP) {
53+
checkSameImpl(KVP.second, Preorder);
54+
return true;
55+
});
56+
}
57+
58+
void checkSame(const PerThreadCallsiteTrie &RCT, ArrayRef<Marker> Preorder) {
59+
checkSameImpl(RCT.start(), Preorder);
60+
ASSERT_TRUE(Preorder.empty());
61+
}
62+
63+
TEST(PerThreadCallsiteTrieTest, Insert) {
64+
PerThreadCallsiteTrie R;
65+
uptr Stack1[]{4, 3, 2, 1};
66+
R.insertStack(StackTrace(Stack1, 4));
67+
checkSame(R, ArrayRef<Marker>({0, 1, 2, 3, 4, Marker::term()}));
68+
69+
uptr Stack2[]{5, 4, 3, 2, 1};
70+
R.insertStack(StackTrace(Stack2, 5));
71+
checkSame(R, ArrayRef<Marker>({0, 1, 2, 3, 4, 5, Marker::term()}));
72+
73+
uptr Stack3[]{6, 3, 2, 1};
74+
R.insertStack(StackTrace(Stack3, 4));
75+
checkSame(R, ArrayRef<Marker>({0, 1, 2, 3, Marker::split(2), 4, 5,
76+
Marker::term(), 6, Marker::term()}));
77+
78+
uptr Stack4[]{7, 2, 1};
79+
R.insertStack(StackTrace(Stack4, 3));
80+
checkSame(R, ArrayRef<Marker>({0, 1, 2, Marker::split(2), 7, Marker::term(),
81+
3, Marker::split(2), 4, 5, Marker::term(), 6,
82+
Marker::term()}));
83+
}
84+
85+
TEST(PerThreadCallsiteTrieTest, DetectRoots) {
86+
MockCallsiteTree T;
87+
88+
uptr Stack1[]{501, 302, 202, 102};
89+
uptr Stack2[]{601, 402, 203, 102};
90+
T.insertStack({Stack1, 4});
91+
T.insertStack({Stack2, 4});
92+
93+
auto R = T.determineRoots();
94+
EXPECT_EQ(R.size(), 2U);
95+
EXPECT_TRUE(R.contains(300));
96+
EXPECT_TRUE(R.contains(400));
97+
}
98+
99+
TEST(PerThreadCallsiteTrieTest, DetectRootsNoBranches) {
100+
MockCallsiteTree T;
101+
102+
uptr Stack1[]{501, 302, 202, 102};
103+
T.insertStack({Stack1, 4});
104+
105+
auto R = T.determineRoots();
106+
EXPECT_EQ(R.size(), 0U);
107+
}
108+
109+
TEST(PerThreadCallsiteTrieTest, DetectRootsUnknownFct) {
110+
MockCallsiteTree T;
111+
112+
uptr Stack1[]{501, 302, 202, 102};
113+
// The MockCallsiteTree address resolver resolves addresses over 100, so 40
114+
// will be mapped to 0.
115+
uptr Stack2[]{601, 40, 203, 102};
116+
T.insertStack({Stack1, 4});
117+
T.insertStack({Stack2, 4});
118+
119+
auto R = T.determineRoots();
120+
EXPECT_EQ(R.size(), 2U);
121+
EXPECT_TRUE(R.contains(300));
122+
EXPECT_TRUE(R.contains(0));
123+
}

0 commit comments

Comments
 (0)