Skip to content

Commit 8c45e80

Browse files
committed
[lld/mac] Port typo correction for undefined symbols from ELF port
Ports: - core feature: https://reviews.llvm.org/D67039 - case mismatch: https://reviews.llvm.org/D70506 - extern "C" suggestions: https://reviews.llvm.org/D69592, https://reviews.llvm.org/D69650 Does not port https://reviews.llvm.org/D71735 since I believe that that doesn't apply to lld/Mach-O. Differential Revision: https://reviews.llvm.org/D135038
1 parent d55dd57 commit 8c45e80

File tree

4 files changed

+264
-3
lines changed

4 files changed

+264
-3
lines changed

lld/MachO/SymbolTable.cpp

Lines changed: 145 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -392,8 +392,138 @@ void macho::reportPendingDuplicateSymbols() {
392392
}
393393
}
394394

395+
// Check whether the definition name def is a mangled function name that matches
396+
// the reference name ref.
397+
static bool canSuggestExternCForCXX(StringRef ref, StringRef def) {
398+
llvm::ItaniumPartialDemangler d;
399+
std::string name = def.str();
400+
if (d.partialDemangle(name.c_str()))
401+
return false;
402+
char *buf = d.getFunctionName(nullptr, nullptr);
403+
if (!buf)
404+
return false;
405+
bool ret = ref == buf;
406+
free(buf);
407+
return ret;
408+
}
409+
410+
// Suggest an alternative spelling of an "undefined symbol" diagnostic. Returns
411+
// the suggested symbol, which is either in the symbol table, or in the same
412+
// file of sym.
413+
static const Symbol *getAlternativeSpelling(const Undefined &sym,
414+
std::string &pre_hint,
415+
std::string &post_hint) {
416+
DenseMap<StringRef, const Symbol *> map;
417+
if (sym.getFile() && sym.getFile()->kind() == InputFile::ObjKind) {
418+
// Build a map of local defined symbols.
419+
for (const Symbol *s : sym.getFile()->symbols)
420+
if (auto *defined = dyn_cast<Defined>(s))
421+
if (!defined->isExternal())
422+
map.try_emplace(s->getName(), s);
423+
}
424+
425+
auto suggest = [&](StringRef newName) -> const Symbol * {
426+
// If defined locally.
427+
if (const Symbol *s = map.lookup(newName))
428+
return s;
429+
430+
// If in the symbol table and not undefined.
431+
if (const Symbol *s = symtab->find(newName))
432+
if (dyn_cast<Undefined>(s) == nullptr)
433+
return s;
434+
435+
return nullptr;
436+
};
437+
438+
// This loop enumerates all strings of Levenshtein distance 1 as typo
439+
// correction candidates and suggests the one that exists as a non-undefined
440+
// symbol.
441+
StringRef name = sym.getName();
442+
for (size_t i = 0, e = name.size(); i != e + 1; ++i) {
443+
// Insert a character before name[i].
444+
std::string newName = (name.substr(0, i) + "0" + name.substr(i)).str();
445+
for (char c = '0'; c <= 'z'; ++c) {
446+
newName[i] = c;
447+
if (const Symbol *s = suggest(newName))
448+
return s;
449+
}
450+
if (i == e)
451+
break;
452+
453+
// Substitute name[i].
454+
newName = std::string(name);
455+
for (char c = '0'; c <= 'z'; ++c) {
456+
newName[i] = c;
457+
if (const Symbol *s = suggest(newName))
458+
return s;
459+
}
460+
461+
// Transpose name[i] and name[i+1]. This is of edit distance 2 but it is
462+
// common.
463+
if (i + 1 < e) {
464+
newName[i] = name[i + 1];
465+
newName[i + 1] = name[i];
466+
if (const Symbol *s = suggest(newName))
467+
return s;
468+
}
469+
470+
// Delete name[i].
471+
newName = (name.substr(0, i) + name.substr(i + 1)).str();
472+
if (const Symbol *s = suggest(newName))
473+
return s;
474+
}
475+
476+
// Case mismatch, e.g. Foo vs FOO.
477+
for (auto &it : map)
478+
if (name.equals_insensitive(it.first))
479+
return it.second;
480+
for (Symbol *sym : symtab->getSymbols())
481+
if (dyn_cast<Undefined>(sym) == nullptr &&
482+
name.equals_insensitive(sym->getName()))
483+
return sym;
484+
485+
// The reference may be a mangled name while the definition is not. Suggest a
486+
// missing extern "C".
487+
if (name.startswith("__Z")) {
488+
std::string buf = name.str();
489+
llvm::ItaniumPartialDemangler d;
490+
if (!d.partialDemangle(buf.c_str()))
491+
if (char *buf = d.getFunctionName(nullptr, nullptr)) {
492+
const Symbol *s = suggest((Twine("_") + buf).str());
493+
free(buf);
494+
if (s) {
495+
pre_hint = ": extern \"C\" ";
496+
return s;
497+
}
498+
}
499+
} else {
500+
StringRef name_without_underscore = name;
501+
name_without_underscore.consume_front("_");
502+
const Symbol *s = nullptr;
503+
for (auto &it : map)
504+
if (canSuggestExternCForCXX(name_without_underscore, it.first)) {
505+
s = it.second;
506+
break;
507+
}
508+
if (!s)
509+
for (Symbol *sym : symtab->getSymbols())
510+
if (canSuggestExternCForCXX(name_without_underscore, sym->getName())) {
511+
s = sym;
512+
break;
513+
}
514+
if (s) {
515+
pre_hint = " to declare ";
516+
post_hint = " as extern \"C\"?";
517+
return s;
518+
}
519+
}
520+
521+
return nullptr;
522+
}
523+
395524
static void reportUndefinedSymbol(const Undefined &sym,
396-
const UndefinedDiag &locations) {
525+
const UndefinedDiag &locations,
526+
bool correctSpelling) {
397527
std::string message = "undefined symbol";
398528
if (config->archMultiple)
399529
message += (" for arch " + getArchitectureName(config->arch())).str();
@@ -426,6 +556,17 @@ static void reportUndefinedSymbol(const Undefined &sym,
426556
("\n>>> referenced " + Twine(totalReferences - i) + " more times")
427557
.str();
428558

559+
if (correctSpelling) {
560+
std::string pre_hint = ": ", post_hint;
561+
if (const Symbol *corrected =
562+
getAlternativeSpelling(sym, pre_hint, post_hint)) {
563+
message +=
564+
"\n>>> did you mean" + pre_hint + toString(*corrected) + post_hint;
565+
if (corrected->getFile())
566+
message += "\n>>> defined in: " + toString(corrected->getFile());
567+
}
568+
}
569+
429570
if (config->undefinedSymbolTreatment == UndefinedSymbolTreatment::error)
430571
error(message);
431572
else if (config->undefinedSymbolTreatment ==
@@ -436,8 +577,9 @@ static void reportUndefinedSymbol(const Undefined &sym,
436577
}
437578

438579
void macho::reportPendingUndefinedSymbols() {
439-
for (const auto &undef : undefs)
440-
reportUndefinedSymbol(*undef.first, undef.second);
580+
// Enable spell corrector for the first 2 diagnostics.
581+
for (const auto &[i, undef] : llvm::enumerate(undefs))
582+
reportUndefinedSymbol(*undef.first, undef.second, i < 2);
441583

442584
// This function is called multiple times during execution. Clear the printed
443585
// diagnostics to avoid printing the same things again the next time.
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# REQUIRES: x86
2+
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos %s -o %t.o
3+
4+
## Insert a character.
5+
## The spell corrector is enabled for the first two "undefined symbol" diagnostics.
6+
# RUN: echo 'call bcde; call abcd; call abde' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
7+
# RUN: not %lld %t.o %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=INSERT %s -DFILE=%t.o
8+
9+
## Symbols defined in DSO can be suggested.
10+
# RUN: %lld %t.o -dylib -o %t.dylib
11+
# RUN: not %lld %t.dylib %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=INSERT %s -DFILE=%t.dylib
12+
13+
# INSERT: error: undefined symbol: abde
14+
# INSERT-NEXT: >>> referenced by {{.*}}
15+
# INSERT-NEXT: >>> did you mean: abcde
16+
# INSERT-NEXT: >>> defined in: [[FILE]]
17+
# INSERT: error: undefined symbol: abcd
18+
# INSERT-NEXT: >>> referenced by {{.*}}
19+
# INSERT-NEXT: >>> did you mean: abcde
20+
# INSERT-NEXT: >>> defined in: [[FILE]]
21+
# INSERT: error: undefined symbol: bcde
22+
# INSERT-NEXT: >>> referenced by {{.*}}
23+
# INSERT-NOT: >>>
24+
25+
## Substitute a character.
26+
# RUN: echo 'call bbcde; call abcdd' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
27+
# RUN: not %lld %t.o %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=SUBST %s
28+
29+
# SUBST: error: undefined symbol: abcdd
30+
# SUBST-NEXT: >>> referenced by {{.*}}
31+
# SUBST-NEXT: >>> did you mean: abcde
32+
# SUBST: error: undefined symbol: bbcde
33+
# SUBST-NEXT: >>> referenced by {{.*}}
34+
# SUBST-NEXT: >>> did you mean: abcde
35+
36+
## Delete a character.
37+
# RUN: echo 'call aabcde; call abcdee' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
38+
# RUN: not %lld %t.o %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=DELETE %s
39+
40+
# DELETE: error: undefined symbol: abcdee
41+
# DELETE-NEXT: >>> referenced by {{.*}}
42+
# DELETE-NEXT: >>> did you mean: abcde
43+
# DELETE: error: undefined symbol: aabcde
44+
# DELETE-NEXT: >>> referenced by {{.*}}
45+
# DELETE-NEXT: >>> did you mean: abcde
46+
47+
## Transpose.
48+
# RUN: echo 'call bacde' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
49+
# RUN: not %lld %t.o %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=TRANSPOSE %s
50+
51+
# TRANSPOSE: error: undefined symbol: bacde
52+
# TRANSPOSE-NEXT: >>> referenced by {{.*}}
53+
# TRANSPOSE-NEXT: >>> did you mean: abcde
54+
55+
## Missing const qualifier.
56+
# RUN: echo 'call __Z3fooPi' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
57+
# RUN: not %lld %t.o %t1.o -demangle -o /dev/null 2>&1 | FileCheck --check-prefix=CONST %s
58+
## Local defined symbols.
59+
# RUN: echo '__Z3fooPKi: call __Z3fooPi' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
60+
# RUN: not %lld %t1.o -demangle -o /dev/null 2>&1 | FileCheck --check-prefix=CONST %s
61+
62+
# CONST: error: undefined symbol: foo(int*)
63+
# CONST-NEXT: >>> referenced by {{.*}}
64+
# CONST-NEXT: >>> did you mean: foo(int const*)
65+
66+
## Case mismatch.
67+
# RUN: echo 'call __Z3FOOPKi' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
68+
# RUN: not %lld %t.o %t1.o -demangle -o /dev/null 2>&1 | FileCheck --check-prefix=CASE %s
69+
# RUN: echo '__Z3fooPKi: call __Z3FOOPKi' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
70+
# RUN: not %lld %t1.o -demangle -o /dev/null 2>&1 | FileCheck --check-prefix=CASE %s
71+
72+
# CASE: error: undefined symbol: FOO(int const*)
73+
# CASE-NEXT: >>> referenced by {{.*}}
74+
# CASE-NEXT: >>> did you mean: foo(int const*)
75+
76+
.globl _main, abcde, __Z3fooPKi
77+
_main:
78+
abcde:
79+
__Z3fooPKi:
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# REQUIRES: x86
2+
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos %s -o %t.o
3+
4+
## The reference is mangled while the definition is not, suggest a missing
5+
## extern "C".
6+
# RUN: echo 'call __Z3fooi' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
7+
# RUN: not %lld %t.o %t1.o -demangle -o /dev/null 2>&1 | FileCheck %s
8+
9+
# CHECK: error: undefined symbol: foo(int)
10+
# CHECK-NEXT: >>> referenced by {{.*}}
11+
# CHECK-NEXT: >>> did you mean: extern "C" _foo
12+
13+
## Don't suggest for nested names like F::foo() and foo::foo().
14+
# RUN: echo 'call __ZN1F3fooEv; call __ZN3fooC1Ev' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t2.o
15+
# RUN: not ld.lld %t.o %t2.o -o /dev/null 2>&1 | FileCheck /dev/null --implicit-check-not='did you mean'
16+
17+
.globl _start, _foo
18+
_start:
19+
_foo:
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# REQUIRES: x86
2+
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos %s -o %t.o
3+
4+
## The definition is mangled while the reference is not, suggest an arbitrary
5+
## C++ overload.
6+
# RUN: echo '.globl __Z3fooi; __Z3fooi:' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o
7+
# RUN: not %lld %t.o %t1.o -demangle -o /dev/null 2>&1 | FileCheck %s
8+
9+
## Check that we can suggest a local definition.
10+
# RUN: echo '__Z3fooi: call _foo' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t2.o
11+
# RUN: not %lld %t2.o -demangle -o /dev/null 2>&1 | FileCheck %s
12+
13+
# CHECK: error: undefined symbol: _foo
14+
# CHECK-NEXT: >>> referenced by {{.*}}
15+
# CHECK-NEXT: >>> did you mean to declare foo(int) as extern "C"?
16+
17+
## Don't suggest nested names whose base name is "foo", e.g. F::foo().
18+
# RUN: echo '.globl __ZN1F3fooEv; __ZN1F3fooEv:' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t3.o
19+
# RUN: not %lld %t.o %t3.o -o /dev/null 2>&1 | FileCheck /dev/null --implicit-check-not='did you mean'
20+
21+
call _foo

0 commit comments

Comments
 (0)