Skip to content

Commit fe91905

Browse files
committed
RuntimeCallsiteTrie
1 parent 68571f9 commit fe91905

File tree

5 files changed

+295
-1
lines changed

5 files changed

+295
-1
lines changed

compiler-rt/lib/ctx_profile/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,13 @@ add_compiler_rt_component(ctx_profile)
22

33
set(CTX_PROFILE_SOURCES
44
CtxInstrProfiling.cpp
5+
RootAutoDetector.cpp
56
)
67

78
set(CTX_PROFILE_HEADERS
89
CtxInstrContextNode.h
910
CtxInstrProfiling.h
11+
RootAutoDetector.h
1012
)
1113

1214
include_directories(..)
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
//===- RootAutodetector.cpp - detect contextual profiling roots -----------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "RootAutoDetector.h"
10+
11+
#include "sanitizer_common/sanitizer_common.h"
12+
#include "sanitizer_common/sanitizer_placement_new.h"
13+
#include <assert.h>
14+
#include <dlfcn.h>
15+
#include <pthread.h>
16+
17+
using namespace __ctx_profile;
18+
template <typename T> using Set = DenseMap<T, bool>;
19+
20+
uptr PerThreadCallsiteTrie::getFctStartAddr(uptr CallsiteAddress) const {
21+
// this requires --linkopt=-Wl,--export-dynamic
22+
Dl_info Info;
23+
if (dladdr(reinterpret_cast<const void *>(CallsiteAddress), &Info) != 0)
24+
return reinterpret_cast<uptr>(Info.dli_saddr);
25+
return 0;
26+
}
27+
28+
void PerThreadCallsiteTrie::insertStack(const StackTrace &ST) {
29+
auto *Current = &TheTrie;
30+
// the stack is backwards - the first callsite is at the top.
31+
for (int I = ST.size - 1; I >= 0; --I) {
32+
uptr ChildAddr = ST.trace[I];
33+
auto [Iter, _] = Current->Children.insert({ChildAddr, Trie(ChildAddr)});
34+
++Current->Count;
35+
Current = &Iter->second;
36+
}
37+
}
38+
39+
DenseMap<uptr, uint64_t> PerThreadCallsiteTrie::determineRoots() const {
40+
// Assuming a message pump design, roots are those functions called by the
41+
// message pump. The message pump is an infinite loop (for all practical
42+
// considerations) fetching data from a queue. The root functions return -
43+
// otherwise the message pump doesn't work. This function detects roots as the
44+
// first place in the trie (starting from the root) where a function calls 2
45+
// or more functions.
46+
//
47+
// We start with a callsite trie - the nodes are callsites. Different child
48+
// nodes may actually correspond to the same function.
49+
//
50+
// For example: using function(callsite)
51+
// f1(csf1_1) -> f2(csf2_1) -> f3
52+
// -> f2(csf2_2) -> f4
53+
//
54+
// would be represented in our trie as:
55+
// csf1_1 -> csf2_1 -> f3
56+
// -> csf2_2 -> f4
57+
//
58+
// While we can assert the control flow returns to f2, we don't know if it
59+
// ever returns to f1. f2 could be the message pump.
60+
//
61+
// We need to convert our callsite tree into a function tree. We can also,
62+
// more economically, just see how many distinct functions there are at a
63+
// certain depth. When that count is greater than 1, we got to potential roots
64+
// and everything above should be considered as non-roots.
65+
DenseMap<uptr, uint64_t> Result;
66+
Set<const Trie *> Worklist;
67+
Worklist.insert({&start(), {}});
68+
69+
while (!Worklist.empty()) {
70+
Set<const Trie *> NextWorklist;
71+
DenseMap<uptr, uint64_t> Candidates;
72+
Worklist.forEach([&](auto &KVP) {
73+
auto [Node, _] = KVP;
74+
auto SA = getFctStartAddr(Node->address());
75+
Candidates[SA] += Node->count();
76+
Node->children().forEach([&](auto &ChildKVP) {
77+
NextWorklist.insert({&ChildKVP.second, true});
78+
return true;
79+
});
80+
return true;
81+
});
82+
if (Candidates.size() > 1) {
83+
Result.swap(Candidates);
84+
break;
85+
}
86+
Worklist.swap(NextWorklist);
87+
}
88+
return Result;
89+
}
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
/*===- RootAutodetector.h- auto-detect roots for ctxprof -----------------===*\
2+
|*
3+
|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
|* See https://llvm.org/LICENSE.txt for license information.
5+
|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
|*
7+
\*===----------------------------------------------------------------------===*/
8+
9+
#ifndef CTX_PROFILE_ROOTAUTODETECTOR_H_
10+
#define CTX_PROFILE_ROOTAUTODETECTOR_H_
11+
12+
#include "sanitizer_common/sanitizer_dense_map.h"
13+
#include "sanitizer_common/sanitizer_internal_defs.h"
14+
#include "sanitizer_common/sanitizer_stacktrace.h"
15+
#include <pthread.h>
16+
#include <sanitizer/common_interface_defs.h>
17+
18+
using namespace __asan;
19+
using namespace __sanitizer;
20+
21+
namespace __ctx_profile {
22+
23+
/// A trie. A node is the address of a callsite in a function activation. A
24+
/// child is a callsite in the activation made from the callsite corresponding
25+
/// to the parent.
26+
class Trie final {
27+
friend class PerThreadCallsiteTrie;
28+
const uptr CallsiteAddress;
29+
uint64_t Count = 0;
30+
DenseMap<uptr, Trie> Children;
31+
32+
public:
33+
uptr address() const { return CallsiteAddress; }
34+
uint64_t count() const { return Count; }
35+
const DenseMap<uptr, Trie> &children() const { return Children; }
36+
37+
Trie(uptr CallsiteAddress = 0) : CallsiteAddress(CallsiteAddress) {}
38+
};
39+
40+
/// Capture all the stack traces observed for a specific thread. The "for a
41+
/// specific thread" part is not enforced, but assumed in determineRoots.
42+
class PerThreadCallsiteTrie {
43+
Trie TheTrie;
44+
45+
protected:
46+
/// Return the runtime start address of the function that contains the call at
47+
/// the runtime address CallsiteAddress. May be overriden for easy testing.
48+
virtual uptr getFctStartAddr(uptr CallsiteAddress) const;
49+
50+
public:
51+
PerThreadCallsiteTrie(const PerThreadCallsiteTrie &) = delete;
52+
PerThreadCallsiteTrie(PerThreadCallsiteTrie &&) = default;
53+
PerThreadCallsiteTrie() = default;
54+
55+
virtual ~PerThreadCallsiteTrie() = default;
56+
57+
void insertStack(const StackTrace &ST);
58+
59+
/// Return the runtime address of root functions, as determined for this
60+
/// thread, together with the number of samples that included them.
61+
DenseMap<uptr, uint64_t> determineRoots() const;
62+
63+
const Trie &start() const { return TheTrie; }
64+
};
65+
} // namespace __ctx_profile
66+
#endif

compiler-rt/lib/ctx_profile/tests/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,12 @@ append_list_if(COMPILER_RT_HAS_WVARIADIC_MACROS_FLAG -Wno-variadic-macros CTX_PR
2222
file(GLOB CTX_PROFILE_HEADERS ../*.h)
2323

2424
set(CTX_PROFILE_SOURCES
25-
../CtxInstrProfiling.cpp)
25+
../CtxInstrProfiling.cpp
26+
../RootAutoDetector.cpp)
2627

2728
set(CTX_PROFILE_UNITTESTS
2829
CtxInstrProfilingTest.cpp
30+
RootAutoDetectorTest.cpp
2931
driver.cpp)
3032

3133
include_directories(../../../include)
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
#include "../RootAutoDetector.h"
2+
#include "sanitizer_common/sanitizer_array_ref.h"
3+
#include "gmock/gmock.h"
4+
#include "gtest/gtest.h"
5+
6+
using namespace __ctx_profile;
7+
using ::testing::IsEmpty;
8+
using ::testing::Not;
9+
using ::testing::SizeIs;
10+
class MockCallsiteTree final : public PerThreadCallsiteTrie {
11+
// Return the first multiple of 100.
12+
uptr getFctStartAddr(uptr CallsiteAddress) const override {
13+
return (CallsiteAddress / 100) * 100;
14+
}
15+
};
16+
17+
// Utility for describing a preorder traversal. By default it captures a value -
18+
// the value of a node. Implicitly nodes are expected to have 1 child. If they
19+
// have none, we place a Marker::term and if they have more than one, we place a
20+
// Marker::split(nr_of_children)
21+
// For example, using lists: (1 (2 3) (4 (5 6)))
22+
// is a list of markers:
23+
// 1, split(2), 2, term, 3, term, 4, split(2), 5, term, 6, term
24+
class Marker {
25+
enum class Kind { End, Value, Split };
26+
const uptr Value;
27+
const Kind K;
28+
Marker(uptr V, Kind S) : Value(V), K(S) {}
29+
30+
public:
31+
Marker(uptr V) : Marker(V, Kind::Value) {}
32+
33+
static Marker split(uptr V) { return Marker(V, Kind::Split); }
34+
static Marker term() { return Marker(0, Kind::End); }
35+
36+
bool isSplit() const { return K == Kind::Split; }
37+
bool isTerm() const { return K == Kind::End; }
38+
bool isVal() const { return K == Kind::Value; }
39+
40+
bool operator==(const Marker &M) const {
41+
return Value == M.Value && K == M.K;
42+
}
43+
};
44+
45+
void popAndCheck(ArrayRef<Marker> &Preorder, Marker M) {
46+
ASSERT_THAT(Preorder, Not(IsEmpty()));
47+
ASSERT_EQ(Preorder[0], M);
48+
Preorder = Preorder.drop_front();
49+
}
50+
51+
void checkSameImpl(const Trie &T, ArrayRef<Marker> &Preorder) {
52+
popAndCheck(Preorder, T.address());
53+
54+
if (T.children().empty()) {
55+
popAndCheck(Preorder, Marker::term());
56+
return;
57+
}
58+
59+
if (T.children().size() > 1)
60+
popAndCheck(Preorder, Marker::split(T.children().size()));
61+
62+
T.children().forEach([&](const auto &KVP) {
63+
checkSameImpl(KVP.second, Preorder);
64+
return true;
65+
});
66+
}
67+
68+
void checkSame(const PerThreadCallsiteTrie &RCT, ArrayRef<Marker> Preorder) {
69+
checkSameImpl(RCT.start(), Preorder);
70+
ASSERT_THAT(Preorder, IsEmpty());
71+
}
72+
73+
TEST(PerThreadCallsiteTrieTest, Insert) {
74+
PerThreadCallsiteTrie R;
75+
uptr Stack1[]{4, 3, 2, 1};
76+
R.insertStack(StackTrace(Stack1, 4));
77+
checkSame(R, ArrayRef<Marker>({0, 1, 2, 3, 4, Marker::term()}));
78+
CHECK_EQ(R.start().count(), 1);
79+
80+
uptr Stack2[]{5, 4, 3, 2, 1};
81+
R.insertStack(StackTrace(Stack2, 5));
82+
checkSame(R, ArrayRef<Marker>({0, 1, 2, 3, 4, 5, Marker::term()}));
83+
CHECK_EQ(R.start().count(), 2);
84+
85+
uptr Stack3[]{6, 3, 2, 1};
86+
R.insertStack(StackTrace(Stack3, 4));
87+
checkSame(R, ArrayRef<Marker>({0, 1, 2, 3, Marker::split(2), 4, 5,
88+
Marker::term(), 6, Marker::term()}));
89+
90+
uptr Stack4[]{7, 2, 1};
91+
R.insertStack(StackTrace(Stack4, 3));
92+
checkSame(R, ArrayRef<Marker>({0, 1, 2, Marker::split(2), 7, Marker::term(),
93+
3, Marker::split(2), 4, 5, Marker::term(), 6,
94+
Marker::term()}));
95+
}
96+
97+
TEST(PerThreadCallsiteTrieTest, DetectRoots) {
98+
MockCallsiteTree T;
99+
100+
uptr Stack1[]{501, 302, 202, 102};
101+
uptr Stack2[]{601, 402, 203, 102};
102+
T.insertStack({Stack1, 4});
103+
T.insertStack({Stack2, 4});
104+
105+
auto R = T.determineRoots();
106+
EXPECT_THAT(R, SizeIs(2U));
107+
EXPECT_TRUE(R.contains(300));
108+
EXPECT_TRUE(R.contains(400));
109+
}
110+
111+
TEST(PerThreadCallsiteTrieTest, DetectRootsNoBranches) {
112+
MockCallsiteTree T;
113+
114+
uptr Stack1[]{501, 302, 202, 102};
115+
T.insertStack({Stack1, 4});
116+
117+
auto R = T.determineRoots();
118+
EXPECT_THAT(R, IsEmpty());
119+
}
120+
121+
TEST(PerThreadCallsiteTrieTest, DetectRootsUnknownFct) {
122+
MockCallsiteTree T;
123+
124+
uptr Stack1[]{501, 302, 202, 102};
125+
// The MockCallsiteTree address resolver resolves addresses over 100, so 40
126+
// will be mapped to 0.
127+
uptr Stack2[]{601, 40, 203, 102};
128+
T.insertStack({Stack1, 4});
129+
T.insertStack({Stack2, 4});
130+
131+
auto R = T.determineRoots();
132+
ASSERT_THAT(R, SizeIs(2U));
133+
EXPECT_TRUE(R.contains(300));
134+
EXPECT_TRUE(R.contains(0));
135+
}

0 commit comments

Comments
 (0)