Skip to content

Commit fa0cf3d

Browse files
authored
[llvm][aarch64] Fix Arm64EC name mangling algorithm (#115567)
Arm64EC uses a special name mangling mode that adds `$$h` between the symbol name and its type. In MSVC's name mangling `@` is used to separate the name and type BUT it is also used for other purposes, such as the separator between paths in a fully qualified name. The original algorithm was quite fragile and made assumptions that didn't hold true for all MSVC mangled symbols, so instead of trying to improve this algorithm we are now using the demangler to indicate where the insertion point should be (i.e., to parse the fully-qualified name and return the current string offset). Also fixed `isArm64ECMangledFunctionName` to search for `@$$h` since the `$$h` must always be after a `@`. Fixes #115231
1 parent ec066d3 commit fa0cf3d

File tree

6 files changed

+113
-12
lines changed

6 files changed

+113
-12
lines changed

llvm/include/llvm/Demangle/Demangle.h

+4
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#define LLVM_DEMANGLE_DEMANGLE_H
1111

1212
#include <cstddef>
13+
#include <optional>
1314
#include <string>
1415
#include <string_view>
1516

@@ -54,6 +55,9 @@ enum MSDemangleFlags {
5455
char *microsoftDemangle(std::string_view mangled_name, size_t *n_read,
5556
int *status, MSDemangleFlags Flags = MSDF_None);
5657

58+
std::optional<size_t>
59+
getArm64ECInsertionPointInMangledName(std::string_view MangledName);
60+
5761
// Demangles a Rust v0 mangled symbol.
5862
char *rustDemangle(std::string_view MangledName);
5963

llvm/include/llvm/Demangle/MicrosoftDemangle.h

+4
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#ifndef LLVM_DEMANGLE_MICROSOFTDEMANGLE_H
1010
#define LLVM_DEMANGLE_MICROSOFTDEMANGLE_H
1111

12+
#include "llvm/Demangle/Demangle.h"
1213
#include "llvm/Demangle/MicrosoftDemangleNodes.h"
1314

1415
#include <cassert>
@@ -141,6 +142,9 @@ enum class FunctionIdentifierCodeGroup { Basic, Under, DoubleUnder };
141142
// It has a set of functions to parse mangled symbols into Type instances.
142143
// It also has a set of functions to convert Type instances to strings.
143144
class Demangler {
145+
friend std::optional<size_t>
146+
llvm::getArm64ECInsertionPointInMangledName(std::string_view MangledName);
147+
144148
public:
145149
Demangler() = default;
146150
virtual ~Demangler() = default;

llvm/include/llvm/IR/Mangler.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ std::optional<std::string> getArm64ECDemangledFunctionName(StringRef Name);
6464
/// Check if an ARM64EC function name is mangled.
6565
bool inline isArm64ECMangledFunctionName(StringRef Name) {
6666
return Name[0] == '#' ||
67-
(Name[0] == '?' && Name.find("$$h") != StringRef::npos);
67+
(Name[0] == '?' && Name.find("@$$h") != StringRef::npos);
6868
}
6969

7070
} // End llvm namespace

llvm/lib/Demangle/MicrosoftDemangle.cpp

+19
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <array>
2525
#include <cctype>
2626
#include <cstdio>
27+
#include <optional>
2728
#include <string_view>
2829
#include <tuple>
2930

@@ -2428,6 +2429,24 @@ void Demangler::dumpBackReferences() {
24282429
std::printf("\n");
24292430
}
24302431

2432+
std::optional<size_t>
2433+
llvm::getArm64ECInsertionPointInMangledName(std::string_view MangledName) {
2434+
std::string_view ProcessedName{MangledName};
2435+
2436+
// We only support this for MSVC-style C++ symbols.
2437+
if (!consumeFront(ProcessedName, '?'))
2438+
return std::nullopt;
2439+
2440+
// The insertion point is just after the name of the symbol, so parse that to
2441+
// remove it from the processed name.
2442+
Demangler D;
2443+
D.demangleFullyQualifiedSymbolName(ProcessedName);
2444+
if (D.Error)
2445+
return std::nullopt;
2446+
2447+
return MangledName.length() - ProcessedName.length();
2448+
}
2449+
24312450
char *llvm::microsoftDemangle(std::string_view MangledName, size_t *NMangled,
24322451
int *Status, MSDemangleFlags Flags) {
24332452
Demangler D;

llvm/lib/IR/Mangler.cpp

+8-11
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "llvm/ADT/SmallString.h"
1515
#include "llvm/ADT/StringExtras.h"
1616
#include "llvm/ADT/Twine.h"
17+
#include "llvm/Demangle/Demangle.h"
1718
#include "llvm/IR/DataLayout.h"
1819
#include "llvm/IR/DerivedTypes.h"
1920
#include "llvm/IR/Function.h"
@@ -299,21 +300,17 @@ std::optional<std::string> llvm::getArm64ECMangledFunctionName(StringRef Name) {
299300
return std::optional<std::string>(("#" + Name).str());
300301
}
301302

302-
// Insert the ARM64EC "$$h" tag after the mangled function name.
303+
// If the name contains $$h, then it is already mangled.
303304
if (Name.contains("$$h"))
304305
return std::nullopt;
305-
size_t InsertIdx = Name.find("@@");
306-
size_t ThreeAtSignsIdx = Name.find("@@@");
307-
if (InsertIdx != std::string::npos && InsertIdx != ThreeAtSignsIdx) {
308-
InsertIdx += 2;
309-
} else {
310-
InsertIdx = Name.find("@");
311-
if (InsertIdx != std::string::npos)
312-
InsertIdx++;
313-
}
306+
307+
// Ask the demangler where we should insert "$$h".
308+
auto InsertIdx = getArm64ECInsertionPointInMangledName(Name);
309+
if (!InsertIdx)
310+
return std::nullopt;
314311

315312
return std::optional<std::string>(
316-
(Name.substr(0, InsertIdx) + "$$h" + Name.substr(InsertIdx)).str());
313+
(Name.substr(0, *InsertIdx) + "$$h" + Name.substr(*InsertIdx)).str());
317314
}
318315

319316
std::optional<std::string>

llvm/unittests/IR/ManglerTest.cpp

+77
Original file line numberDiff line numberDiff line change
@@ -172,4 +172,81 @@ TEST(ManglerTest, GOFF) {
172172
"L#foo");
173173
}
174174

175+
TEST(ManglerTest, Arm64EC) {
176+
constexpr std::string_view Arm64ECNames[] = {
177+
// Basic C name.
178+
"#Foo",
179+
180+
// Basic C++ name.
181+
"?foo@@$$hYAHXZ",
182+
183+
// Regression test: https://github.com/llvm/llvm-project/issues/115231
184+
"?GetValue@?$Wrapper@UA@@@@$$hQEBAHXZ",
185+
186+
// Symbols from:
187+
// ```
188+
// namespace A::B::C::D {
189+
// struct Base {
190+
// virtual int f() { return 0; }
191+
// };
192+
// }
193+
// struct Derived : public A::B::C::D::Base {
194+
// virtual int f() override { return 1; }
195+
// };
196+
// A::B::C::D::Base* MakeObj() { return new Derived(); }
197+
// ```
198+
// void * __cdecl operator new(unsigned __int64)
199+
"??2@$$hYAPEAX_K@Z",
200+
// public: virtual int __cdecl A::B::C::D::Base::f(void)
201+
"?f@Base@D@C@B@A@@$$hUEAAHXZ",
202+
// public: __cdecl A::B::C::D::Base::Base(void)
203+
"??0Base@D@C@B@A@@$$hQEAA@XZ",
204+
// public: virtual int __cdecl Derived::f(void)
205+
"?f@Derived@@$$hUEAAHXZ",
206+
// public: __cdecl Derived::Derived(void)
207+
"??0Derived@@$$hQEAA@XZ",
208+
// struct A::B::C::D::Base * __cdecl MakeObj(void)
209+
"?MakeObj@@$$hYAPEAUBase@D@C@B@A@@XZ",
210+
211+
// Symbols from:
212+
// ```
213+
// template <typename T> struct WW { struct Z{}; };
214+
// template <typename X> struct Wrapper {
215+
// int GetValue(typename WW<X>::Z) const;
216+
// };
217+
// struct A { };
218+
// template <typename X> int Wrapper<X>::GetValue(typename WW<X>::Z) const
219+
// { return 3; }
220+
// template class Wrapper<A>;
221+
// ```
222+
// public: int __cdecl Wrapper<struct A>::GetValue(struct WW<struct
223+
// A>::Z)const
224+
"?GetValue@?$Wrapper@UA@@@@$$hQEBAHUZ@?$WW@UA@@@@@Z",
225+
};
226+
227+
for (const auto &Arm64ECName : Arm64ECNames) {
228+
// Check that this is a mangled name.
229+
EXPECT_TRUE(isArm64ECMangledFunctionName(Arm64ECName))
230+
<< "Test case: " << Arm64ECName;
231+
// Refuse to mangle it again.
232+
EXPECT_FALSE(getArm64ECMangledFunctionName(Arm64ECName).has_value())
233+
<< "Test case: " << Arm64ECName;
234+
235+
// Demangle.
236+
auto Arm64Name = getArm64ECDemangledFunctionName(Arm64ECName);
237+
EXPECT_TRUE(Arm64Name.has_value()) << "Test case: " << Arm64ECName;
238+
// Check that it is not mangled.
239+
EXPECT_FALSE(isArm64ECMangledFunctionName(Arm64Name.value()))
240+
<< "Test case: " << Arm64ECName;
241+
// Refuse to demangle it again.
242+
EXPECT_FALSE(getArm64ECDemangledFunctionName(Arm64Name.value()).has_value())
243+
<< "Test case: " << Arm64ECName;
244+
245+
// Round-trip.
246+
auto RoundTripArm64ECName =
247+
getArm64ECMangledFunctionName(Arm64Name.value());
248+
EXPECT_EQ(RoundTripArm64ECName, Arm64ECName);
249+
}
250+
}
251+
175252
} // end anonymous namespace

0 commit comments

Comments
 (0)