Skip to content

Commit f9ad249

Browse files
authored
[StableHash] Implement stable global name for the hash computation (#106156)
LLVM often extends global names by adding suffixes to distinguish unique identities. However, these suffixes are not always stable across different runs and build environments. To address this issue, I implemented `get_stable_name` to ignore such suffixes and obtain the original name. This approach is not new, as PGO or Bolt already handle this issue similarly. Using the stable name obtained from `get_stable_name`, I implemented `stable_hash_name` while utilizing the same underlying `xxh3_64bit` algorithm as before.
1 parent f2f78b2 commit f9ad249

File tree

4 files changed

+181
-11
lines changed

4 files changed

+181
-11
lines changed

llvm/include/llvm/ADT/StableHashing.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,23 @@ inline stable_hash stable_hash_combine(stable_hash A, stable_hash B,
5050
return stable_hash_combine(Hashes);
5151
}
5252

53+
// Removes suffixes introduced by LLVM from the name to enhance stability and
54+
// maintain closeness to the original name across different builds.
55+
inline StringRef get_stable_name(StringRef Name) {
56+
auto [P1, S1] = Name.rsplit(".llvm.");
57+
auto [P2, S2] = P1.rsplit(".__uniq.");
58+
return P2;
59+
}
60+
61+
// Generates a consistent hash value for a given input name across different
62+
// program executions and environments. This function first converts the input
63+
// name into a stable form using the `get_stable_name` function, and then
64+
// computes a hash of this stable name. For instance, `foo.llvm.1234` would have
65+
// the same hash as `foo.llvm.5678.
66+
inline stable_hash stable_hash_name(StringRef Name) {
67+
return xxh3_64bits(get_stable_name(Name));
68+
}
69+
5370
} // namespace llvm
5471

5572
#endif

llvm/lib/CodeGen/MachineStableHash.cpp

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -84,25 +84,33 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
8484
}
8585

8686
case MachineOperand::MO_MachineBasicBlock:
87-
StableHashBailingMachineBasicBlock++;
87+
++StableHashBailingMachineBasicBlock;
8888
return 0;
8989
case MachineOperand::MO_ConstantPoolIndex:
90-
StableHashBailingConstantPoolIndex++;
90+
++StableHashBailingConstantPoolIndex;
9191
return 0;
9292
case MachineOperand::MO_BlockAddress:
93-
StableHashBailingBlockAddress++;
93+
++StableHashBailingBlockAddress;
9494
return 0;
9595
case MachineOperand::MO_Metadata:
96-
StableHashBailingMetadataUnsupported++;
97-
return 0;
98-
case MachineOperand::MO_GlobalAddress:
99-
StableHashBailingGlobalAddress++;
96+
++StableHashBailingMetadataUnsupported;
10097
return 0;
98+
case MachineOperand::MO_GlobalAddress: {
99+
const GlobalValue *GV = MO.getGlobal();
100+
if (!GV->hasName()) {
101+
++StableHashBailingGlobalAddress;
102+
return 0;
103+
}
104+
auto Name = GV->getName();
105+
return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
106+
stable_hash_name(Name), MO.getOffset());
107+
}
108+
101109
case MachineOperand::MO_TargetIndex: {
102110
if (const char *Name = MO.getTargetIndexName())
103111
return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
104-
xxh3_64bits(Name), MO.getOffset());
105-
StableHashBailingTargetIndexNoName++;
112+
stable_hash_name(Name), MO.getOffset());
113+
++StableHashBailingTargetIndexNoName;
106114
return 0;
107115
}
108116

@@ -113,7 +121,8 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
113121

114122
case MachineOperand::MO_ExternalSymbol:
115123
return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
116-
MO.getOffset(), xxh3_64bits(MO.getSymbolName()));
124+
MO.getOffset(),
125+
stable_hash_name(MO.getSymbolName()));
117126

118127
case MachineOperand::MO_RegisterMask:
119128
case MachineOperand::MO_RegisterLiveOut: {
@@ -149,7 +158,7 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
149158
case MachineOperand::MO_MCSymbol: {
150159
auto SymbolName = MO.getMCSymbol()->getName();
151160
return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
152-
xxh3_64bits(SymbolName));
161+
stable_hash_name(SymbolName));
153162
}
154163
case MachineOperand::MO_CFIIndex:
155164
return stable_hash_combine(MO.getType(), MO.getTargetFlags(),

llvm/unittests/MIR/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ set(LLVM_LINK_COMPONENTS
1515

1616
add_llvm_unittest(MIRTests
1717
MachineMetadata.cpp
18+
MachineStableHashTest.cpp
1819
)
1920

2021
target_link_libraries(MIRTests PRIVATE LLVMTestingSupport)
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
//===- MachineStableHashTest.cpp ------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "llvm/CodeGen/MachineStableHash.h"
10+
#include "llvm/CodeGen/MIRParser/MIRParser.h"
11+
#include "llvm/CodeGen/MachineFunction.h"
12+
#include "llvm/CodeGen/MachineModuleInfo.h"
13+
#include "llvm/FileCheck/FileCheck.h"
14+
#include "llvm/IR/Module.h"
15+
#include "llvm/MC/TargetRegistry.h"
16+
#include "llvm/Support/SourceMgr.h"
17+
#include "llvm/Support/TargetSelect.h"
18+
#include "llvm/Target/TargetMachine.h"
19+
#include "gtest/gtest.h"
20+
21+
using namespace llvm;
22+
23+
class MachineStableHashTest : public testing::Test {
24+
public:
25+
MachineStableHashTest() {}
26+
27+
protected:
28+
LLVMContext Context;
29+
std::unique_ptr<Module> M;
30+
std::unique_ptr<MIRParser> MIR;
31+
32+
static void SetUpTestCase() {
33+
InitializeAllTargetInfos();
34+
InitializeAllTargets();
35+
InitializeAllTargetMCs();
36+
}
37+
38+
void SetUp() override { M = std::make_unique<Module>("Dummy", Context); }
39+
40+
std::unique_ptr<LLVMTargetMachine>
41+
createTargetMachine(std::string TT, StringRef CPU, StringRef FS) {
42+
std::string Error;
43+
const Target *T = TargetRegistry::lookupTarget(TT, Error);
44+
if (!T)
45+
return nullptr;
46+
TargetOptions Options;
47+
return std::unique_ptr<LLVMTargetMachine>(
48+
static_cast<LLVMTargetMachine *>(T->createTargetMachine(
49+
TT, CPU, FS, Options, std::nullopt, std::nullopt)));
50+
}
51+
52+
std::unique_ptr<Module> parseMIR(const TargetMachine &TM, StringRef MIRCode,
53+
MachineModuleInfo &MMI) {
54+
SMDiagnostic Diagnostic;
55+
std::unique_ptr<MemoryBuffer> MBuffer = MemoryBuffer::getMemBuffer(MIRCode);
56+
MIR = createMIRParser(std::move(MBuffer), Context);
57+
if (!MIR)
58+
return nullptr;
59+
60+
std::unique_ptr<Module> Mod = MIR->parseIRModule();
61+
if (!Mod)
62+
return nullptr;
63+
64+
Mod->setDataLayout(TM.createDataLayout());
65+
66+
if (MIR->parseMachineFunctions(*Mod, MMI)) {
67+
M.reset();
68+
return nullptr;
69+
}
70+
71+
return Mod;
72+
}
73+
};
74+
75+
TEST_F(MachineStableHashTest, StableGlobalName) {
76+
auto TM = createTargetMachine(("aarch64--"), "", "");
77+
if (!TM)
78+
GTEST_SKIP();
79+
StringRef MIRString = R"MIR(
80+
--- |
81+
define void @f1() { ret void }
82+
define void @f2() { ret void }
83+
define void @f3() { ret void }
84+
define void @f4() { ret void }
85+
declare void @goo()
86+
declare void @goo.llvm.123()
87+
declare void @goo.__uniq.456()
88+
declare void @goo.invalid.789()
89+
...
90+
---
91+
name: f1
92+
alignment: 16
93+
tracksRegLiveness: true
94+
frameInfo:
95+
maxAlignment: 16
96+
machineFunctionInfo: {}
97+
body: |
98+
bb.0:
99+
liveins: $lr
100+
BL @goo
101+
RET undef $lr
102+
103+
...
104+
---
105+
name: f2
106+
body: |
107+
bb.0:
108+
liveins: $lr
109+
BL @goo.llvm.123
110+
RET undef $lr
111+
...
112+
---
113+
name: f3
114+
body: |
115+
bb.0:
116+
liveins: $lr
117+
BL @goo.__uniq.456
118+
RET undef $lr
119+
...
120+
---
121+
name: f4
122+
body: |
123+
bb.0:
124+
liveins: $lr
125+
BL @goo.invalid.789
126+
RET undef $lr
127+
...
128+
)MIR";
129+
MachineModuleInfo MMI(TM.get());
130+
M = parseMIR(*TM, MIRString, MMI);
131+
ASSERT_TRUE(M);
132+
auto *MF1 = MMI.getMachineFunction(*M->getFunction("f1"));
133+
auto *MF2 = MMI.getMachineFunction(*M->getFunction("f2"));
134+
auto *MF3 = MMI.getMachineFunction(*M->getFunction("f3"));
135+
auto *MF4 = MMI.getMachineFunction(*M->getFunction("f4"));
136+
137+
EXPECT_EQ(stableHashValue(*MF1), stableHashValue(*MF2))
138+
<< "Expect the suffix, `.llvm.{number}` to be ignored.";
139+
EXPECT_EQ(stableHashValue(*MF1), stableHashValue(*MF3))
140+
<< "Expect the suffix, `.__uniq.{number}` to be ignored.";
141+
// Do not ignore `.invalid.{number}`.
142+
EXPECT_NE(stableHashValue(*MF1), stableHashValue(*MF4));
143+
}

0 commit comments

Comments
 (0)