Skip to content

Commit 213dbdb

Browse files
committed
[lld-macho] Overhaul map file code
The previous map file code left out was modeled after LLD-ELF's implementation. However, ld64's map file differs quite a bit from LLD-ELF's. I've revamped our map file implementation so it is better able to emit ld64-style map files. Notable differences: * ld64 doesn't demangle symbols in map files, regardless of whether `-demangle` is passed. So we don't have to bother with `getSymbolStrings()`. * ld64 doesn't emit symbols in cstring sections; it emits just the literal values. Moreover, it emits these literal values regardless of whether they are labeled with a symbol. * ld64 emits map file entries for things that are not strictly symbols, such as unwind info, GOT entries, etc. That isn't handled in this diff, but this redesign makes them easy to implement. Additionally, the previous implementation sorted the symbols so as to emit them in address order. This was slow and unnecessary -- the symbols can already be traversed in address order by walking the list of OutputSections. This brings significant speedups. Here's the numbers from the chromium_framework_less_dwarf benchmark on my Mac Pro, with the `-map` argument added to the response file: base diff difference (95% CI) sys_time 2.922 ± 0.059 2.950 ± 0.085 [ -0.7% .. +2.5%] user_time 11.464 ± 0.191 8.290 ± 0.123 [ -28.7% .. -26.7%] wall_time 11.235 ± 0.175 9.184 ± 0.169 [ -19.3% .. -17.2%] samples 16 23 (It's worth noting that map files are written in parallel with the output binary, but they often took longer to write than the binary itself.) Finally, I did further cleanups to the map-file.s test -- there was no real need to have a custom-named section. There were also alt_entry symbol declarations that had no corresponding definition. Either way, neither custom-named sections nor alt_entry symbols trigger special code paths in our map file implementation. Reviewed By: #lld-macho, Roger Differential Revision: https://reviews.llvm.org/D137368
1 parent 70633a8 commit 213dbdb

File tree

2 files changed

+126
-95
lines changed

2 files changed

+126
-95
lines changed

lld/MachO/MapFile.cpp

Lines changed: 88 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,10 @@
66
//
77
//===----------------------------------------------------------------------===//
88
//
9-
// This file implements the -map option. It shows lists in order and
10-
// hierarchically the outputFile, arch, input files, output sections and
11-
// symbols:
9+
// This file implements the -map option, which maps address ranges to their
10+
// respective contents, plus the input file these contents were originally from.
11+
// The contents (typically symbols) are listed in address order. Dead-stripped
12+
// contents are included as well.
1213
//
1314
// # Path: test
1415
// # Arch: x86_84
@@ -28,15 +29,16 @@
2829
//===----------------------------------------------------------------------===//
2930

3031
#include "MapFile.h"
32+
#include "ConcatOutputSection.h"
3133
#include "Config.h"
3234
#include "InputFiles.h"
3335
#include "InputSection.h"
34-
#include "OutputSection.h"
3536
#include "OutputSegment.h"
3637
#include "Symbols.h"
3738
#include "SyntheticSections.h"
3839
#include "Target.h"
3940
#include "lld/Common/ErrorHandler.h"
41+
#include "llvm/ADT/DenseMap.h"
4042
#include "llvm/Support/Parallel.h"
4143
#include "llvm/Support/TimeProfiler.h"
4244

@@ -45,69 +47,75 @@ using namespace llvm::sys;
4547
using namespace lld;
4648
using namespace lld::macho;
4749

50+
struct CStringInfo {
51+
uint32_t fileIndex;
52+
StringRef str;
53+
};
54+
4855
struct MapInfo {
4956
SmallVector<InputFile *> files;
50-
SmallVector<Defined *> liveSymbols;
5157
SmallVector<Defined *> deadSymbols;
58+
DenseMap<const OutputSection *,
59+
SmallVector<std::pair<uint64_t /*addr*/, CStringInfo>>>
60+
liveCStringsForSection;
61+
SmallVector<CStringInfo> deadCStrings;
5262
};
5363

5464
static MapInfo gatherMapInfo() {
5565
MapInfo info;
5666
for (InputFile *file : inputFiles)
5767
if (isa<ObjFile>(file) || isa<BitcodeFile>(file)) {
58-
bool hasEmittedSymbol = false;
68+
uint32_t fileIndex = info.files.size() + 1;
69+
bool isReferencedFile = false;
70+
71+
// Gather the dead symbols. We don't have to bother with the live ones
72+
// because we will pick them up as we iterate over the OutputSections
73+
// later.
5974
for (Symbol *sym : file->symbols) {
6075
if (auto *d = dyn_cast_or_null<Defined>(sym))
61-
if (d->isec && d->getFile() == file) {
62-
if (d->isLive()) {
63-
assert(!shouldOmitFromOutput(d->isec));
64-
info.liveSymbols.push_back(d);
65-
} else {
76+
// Only emit the prevailing definition of a symbol. Also, don't emit
77+
// the symbol if it is part of a cstring section (we use the literal
78+
// value instead, similar to ld64)
79+
if (d->isec && d->getFile() == file &&
80+
!isa<CStringInputSection>(d->isec)) {
81+
isReferencedFile = true;
82+
if (!d->isLive())
6683
info.deadSymbols.push_back(d);
84+
}
85+
}
86+
87+
// Gather all the cstrings (both live and dead). A CString(Output)Section
88+
// doesn't provide us a way of figuring out which InputSections its
89+
// cstring contents came from, so we need to build up that mapping here.
90+
for (const Section *sec : file->sections) {
91+
for (const Subsection &subsec : sec->subsections) {
92+
if (auto isec = dyn_cast<CStringInputSection>(subsec.isec)) {
93+
auto &liveCStrings = info.liveCStringsForSection[isec->parent];
94+
for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) {
95+
if (piece.live)
96+
liveCStrings.push_back({isec->parent->addr + piece.outSecOff,
97+
{fileIndex, isec->getStringRef(i)}});
98+
else
99+
info.deadCStrings.push_back({fileIndex, isec->getStringRef(i)});
100+
isReferencedFile = true;
67101
}
68-
hasEmittedSymbol = true;
102+
} else {
103+
break;
69104
}
105+
}
70106
}
71-
if (hasEmittedSymbol)
72-
info.files.push_back(file);
73-
}
74-
parallelSort(info.liveSymbols.begin(), info.liveSymbols.end(),
75-
[](Defined *a, Defined *b) { return a->getVA() < b->getVA(); });
76-
return info;
77-
}
78107

79-
// Construct a map from symbols to their stringified representations.
80-
// Demangling symbols (which is what toString() does) is slow, so
81-
// we do that in batch using parallel-for.
82-
static DenseMap<Symbol *, std::string>
83-
getSymbolStrings(ArrayRef<Defined *> syms) {
84-
std::vector<std::string> str(syms.size());
85-
parallelFor(0, syms.size(), [&](size_t i) {
86-
raw_string_ostream os(str[i]);
87-
Defined *sym = syms[i];
88-
89-
switch (sym->isec->kind()) {
90-
case InputSection::CStringLiteralKind: {
91-
// Output "literal string: <string literal>"
92-
const auto *isec = cast<CStringInputSection>(sym->isec);
93-
const StringPiece &piece = isec->getStringPiece(sym->value);
94-
assert(
95-
sym->value == piece.inSecOff &&
96-
"We expect symbols to always point to the start of a StringPiece.");
97-
StringRef str = isec->getStringRef(&piece - &(*isec->pieces.begin()));
98-
(os << "literal string: ").write_escaped(str);
99-
break;
100-
}
101-
case InputSection::ConcatKind:
102-
case InputSection::WordLiteralKind:
103-
os << toString(*sym);
108+
if (isReferencedFile)
109+
info.files.push_back(file);
104110
}
105-
});
106111

107-
DenseMap<Symbol *, std::string> ret;
108-
for (size_t i = 0, e = syms.size(); i < e; ++i)
109-
ret[syms[i]] = std::move(str[i]);
110-
return ret;
112+
// cstrings are not stored in sorted order in their OutputSections, so we sort
113+
// them here.
114+
for (auto &liveCStrings : info.liveCStringsForSection)
115+
parallelSort(liveCStrings.second, [](const auto &p1, const auto &p2) {
116+
return p1.first < p2.first;
117+
});
118+
return info;
111119
}
112120

113121
void macho::writeMapFile() {
@@ -124,16 +132,12 @@ void macho::writeMapFile() {
124132
return;
125133
}
126134

127-
// Dump output path.
128135
os << format("# Path: %s\n", config->outputFile.str().c_str());
129-
130-
// Dump output architecture.
131136
os << format("# Arch: %s\n",
132137
getArchitectureName(config->arch()).str().c_str());
133138

134139
MapInfo info = gatherMapInfo();
135140

136-
// Dump table of object files.
137141
os << "# Object files:\n";
138142
os << format("[%3u] %s\n", 0, (const char *)"linker synthesized");
139143
uint32_t fileIndex = 1;
@@ -143,7 +147,6 @@ void macho::writeMapFile() {
143147
readerToFileOrdinal[file] = fileIndex++;
144148
}
145149

146-
// Dump table of sections
147150
os << "# Sections:\n";
148151
os << "# Address\tSize \tSegment\tSection\n";
149152
for (OutputSegment *seg : outputSegments)
@@ -155,28 +158,48 @@ void macho::writeMapFile() {
155158
seg->name.str().c_str(), osec->name.str().c_str());
156159
}
157160

158-
// Dump table of symbols
159-
DenseMap<Symbol *, std::string> liveSymbolStrings =
160-
getSymbolStrings(info.liveSymbols);
161161
os << "# Symbols:\n";
162162
os << "# Address\tSize \tFile Name\n";
163-
for (Defined *sym : info.liveSymbols) {
164-
assert(sym->isLive());
165-
os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(), sym->size,
166-
readerToFileOrdinal[sym->getFile()],
167-
liveSymbolStrings[sym].c_str());
163+
for (const OutputSegment *seg : outputSegments) {
164+
for (const OutputSection *osec : seg->getSections()) {
165+
if (auto *concatOsec = dyn_cast<ConcatOutputSection>(osec)) {
166+
for (const InputSection *isec : concatOsec->inputs) {
167+
for (Defined *sym : isec->symbols)
168+
os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(),
169+
sym->size, readerToFileOrdinal[sym->getFile()],
170+
sym->getName().str().data());
171+
}
172+
} else if (osec == in.cStringSection || osec == in.objcMethnameSection) {
173+
const auto &liveCStrings = info.liveCStringsForSection.lookup(osec);
174+
uint64_t lastAddr = 0; // strings will never start at address 0, so this
175+
// is a sentinel value
176+
for (const auto &[addr, info] : liveCStrings) {
177+
uint64_t size = 0;
178+
if (addr != lastAddr)
179+
size = info.str.size() + 1; // include null terminator
180+
lastAddr = addr;
181+
os << format("0x%08llX\t0x%08llX\t[%3u] literal string: ", addr, size,
182+
info.fileIndex);
183+
os.write_escaped(info.str) << "\n";
184+
}
185+
}
186+
// TODO print other synthetic sections
187+
}
168188
}
169189

170190
if (config->deadStrip) {
171-
DenseMap<Symbol *, std::string> deadSymbolStrings =
172-
getSymbolStrings(info.deadSymbols);
173191
os << "# Dead Stripped Symbols:\n";
174192
os << "# \tSize \tFile Name\n";
175193
for (Defined *sym : info.deadSymbols) {
176194
assert(!sym->isLive());
177195
os << format("<<dead>>\t0x%08llX\t[%3u] %s\n", sym->size,
178196
readerToFileOrdinal[sym->getFile()],
179-
deadSymbolStrings[sym].c_str());
197+
sym->getName().str().data());
198+
}
199+
for (CStringInfo &cstrInfo : info.deadCStrings) {
200+
os << format("<<dead>>\t0x%08llX\t[%3u] literal string: ",
201+
cstrInfo.str.size() + 1, cstrInfo.fileIndex);
202+
os.write_escaped(cstrInfo.str) << "\n";
180203
}
181204
}
182205
}

lld/test/MachO/map-file.s

Lines changed: 38 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -4,23 +4,24 @@
44
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o
55
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/c-string-literal.s -o %t/c-string-literal.o
66

7-
# RUN: %lld -map %t/map %t/test.o %t/foo.o %t/c-string-literal.o --time-trace -o %t/test
7+
# RUN: %lld -demangle -map %t/map %t/test.o %t/foo.o %t/c-string-literal.o \
8+
# RUN: --time-trace -o %t/test
89
# RUN: llvm-objdump --syms --section-headers %t/test > %t/objdump
9-
# RUN: cat %t/objdump %t/map > %t/out
10-
# RUN: FileCheck %s < %t/out
10+
## Check that symbols in cstring sections aren't emitted
11+
# RUN: cat %t/objdump %t/map | FileCheck %s --implicit-check-not _hello_world
1112
# RUN: FileCheck %s --check-prefix=MAPFILE < %t/test.time-trace
1213

1314
# CHECK: Sections:
14-
# CHECK-NEXT: Idx Name Size VMA Type
15-
# CHECK-NEXT: 0 __text {{[0-9a-f]+}} [[#%x,TEXT:]] TEXT
16-
# CHECK-NEXT: 1 obj {{[0-9a-f]+}} [[#%x,DATA:]] TEXT
17-
# CHECK-NEXT: 2 __cstring {{[0-9a-f]+}} [[#%x,CSTR:]] DATA
18-
# CHECK-NEXT: 3 __common {{[0-9a-f]+}} [[#%x,BSS:]] BSS
15+
# CHECK-NEXT: Idx Name Size VMA Type
16+
# CHECK-NEXT: 0 __text {{[0-9a-f]+}} [[#%x,TEXT:]] TEXT
17+
# CHECK-NEXT: 1 __cstring {{[0-9a-f]+}} [[#%x,CSTR:]] DATA
18+
# CHECK-NEXT: 2 __common {{[0-9a-f]+}} [[#%x,BSS:]] BSS
1919

2020
# CHECK: SYMBOL TABLE:
2121
# CHECK-DAG: [[#%x,MAIN:]] g F __TEXT,__text _main
2222
# CHECK-DAG: [[#%x,NUMBER:]] g O __DATA,__common _number
23-
# CHECK-DAG: [[#%x,FOO:]] g F __TEXT,obj _foo
23+
# CHECK-DAG: [[#%x,BAR:]] g F __TEXT,__text _bar
24+
# CHECK-DAG: [[#%x,FOO:]] g F __TEXT,__text __ZTIN3foo3bar4MethE
2425
# CHECK-DAG: [[#%x,HIWORLD:]] g O __TEXT,__cstring _hello_world
2526
# CHECK-DAG: [[#%x,HIITSME:]] g O __TEXT,__cstring _hello_its_me
2627

@@ -35,56 +36,61 @@
3536
# CHECK-NEXT: # Sections:
3637
# CHECK-NEXT: # Address Size Segment Section
3738
# CHECK-NEXT: 0x[[#%X,TEXT]] 0x{{[0-9A-F]+}} __TEXT __text
38-
# CHECK-NEXT: 0x[[#%X,DATA]] 0x{{[0-9A-F]+}} __TEXT obj
3939
# CHECK-NEXT: 0x[[#%X,CSTR]] 0x{{[0-9A-F]+}} __TEXT __cstring
4040
# CHECK-NEXT: 0x[[#%X,BSS]] 0x{{[0-9A-F]+}} __DATA __common
4141

4242
# CHECK-NEXT: # Symbols:
43-
# CHECK-NEXT: # Address Size File Name
44-
# CHECK-DAG: 0x[[#%X,MAIN]] 0x00000001 [ 1] _main
45-
# CHECK-DAG: 0x[[#%X,FOO]] 0x00000001 [ 2] _foo
46-
# CHECK-DAG: 0x[[#%X,HIWORLD]] 0x0000000E [ 3] literal string: Hello world!\n
47-
# CHECK-DAG: 0x[[#%X,HIITSME]] 0x0000000F [ 3] literal string: Hello, it's me
48-
# CHECK-DAG: 0x[[#%X,NUMBER]] 0x00000001 [ 1] _number
43+
# CHECK-NEXT: # Address Size File Name
44+
# CHECK-DAG: 0x[[#%X,MAIN]] 0x00000001 [ 1] _main
45+
# CHECK-DAG: 0x[[#%X,BAR]] 0x00000001 [ 1] _bar
46+
# CHECK-DAG: 0x[[#%X,FOO]] 0x00000001 [ 2] __ZTIN3foo3bar4MethE
47+
# CHECK-DAG: 0x[[#%X,HIWORLD]] 0x0000000E [ 3] literal string: Hello world!\n
48+
# CHECK-DAG: 0x[[#%X,HIITSME]] 0x0000000F [ 3] literal string: Hello, it's me
49+
# CHECK-DAG: 0x[[#%X,HIITSME + 0xf]] 0x0000000E [ 3] literal string: Hello world!\n
50+
# CHECK-DAG: 0x[[#%X,NUMBER]] 0x00000001 [ 1] _number
4951

5052
# MAPFILE: "name":"Total Write map file"
5153

52-
# RUN: %lld -dead_strip -map %t/stripped-map %t/test.o %t/foo.o %t/c-string-literal.o -o %t/stripped
54+
# RUN: %lld -demangle -dead_strip -map %t/stripped-map %t/test.o %t/foo.o %t/c-string-literal.o -o %t/stripped
5355
# RUN: FileCheck --check-prefix=STRIPPED %s < %t/stripped-map
5456

5557
## C-string literals should be printed as "literal string: <C string literal>"
5658
# STRIPPED-LABEL: Dead Stripped Symbols:
57-
# STRIPPED-DAG: <<dead>> 0x00000001 [ 2] _foo
58-
# STRIPPED-DAG: <<dead>> 0x0000000E [ 3] literal string: Hello world!\n
59-
# STRIPPED-DAG: <<dead>> 0x0000000F [ 3] literal string: Hello, it's me
60-
# STRIPPED-DAG: <<dead>> 0x00000001 [ 1] _number
59+
# STRIPPED-DAG: <<dead>> 0x00000001 [ 1] _bar
60+
# STRIPPED-DAG: <<dead>> 0x00000001 [ 1] _number
61+
# STRIPPED-DAG: <<dead>> 0x00000001 [ 2] __ZTIN3foo3bar4MethE
62+
# STRIPPED-DAG: <<dead>> 0x0000000E [ 3] literal string: Hello world!\n
63+
# STRIPPED-DAG: <<dead>> 0x0000000F [ 3] literal string: Hello, it's me
64+
# STRIPPED-DAG: <<dead>> 0x0000000E [ 3] literal string: Hello world!\n
6165

6266
# RUN: %lld --icf=all -map %t/icf-map %t/test.o %t/foo.o %t/c-string-literal.o -o %t/icf
6367
# RUN: FileCheck --check-prefix=ICF %s < %t/icf-map
6468

69+
## Verify that folded symbols and cstrings have size zero. Note that ld64 prints
70+
## folded symbols but not folded cstrings; we print both.
71+
6572
# ICF: Symbols:
66-
# ICF-DAG: 0x[[#%X,FOO:]] 0x00000000 [ 2] _foo
67-
# ICF-DAG: 0x[[#FOO]] 0x00000001 [ 1] _bar
73+
# ICF-DAG: 0x[[#%X,FOO:]] 0x00000000 [ 2] __ZTIN3foo3bar4MethE
74+
# ICF-DAG: 0x[[#FOO]] 0x00000001 [ 1] _bar
75+
# ICF-DAG: 0x[[#%X,HIWORLD:]] 0x0000000E [ 3] literal string: Hello world!\n
76+
# ICF-DAG: 0x[[#%X,HIWORLD]] 0x00000000 [ 3] literal string: Hello world!\n
6877

6978
#--- foo.s
70-
## ICF will only fold sections marked as pure_instructions
71-
.section __TEXT,obj,regular,pure_instructions
72-
.globl _foo
73-
.alt_entry _alt_foo
74-
_foo:
79+
.globl __ZTIN3foo3bar4MethE
80+
## This C++ symbol makes it clear that we do not print the demangled name in
81+
## the map file, even if `-demangle` is passed.
82+
__ZTIN3foo3bar4MethE:
7583
nop
7684

7785
.subsections_via_symbols
7886

7987
#--- test.s
8088
.comm _number, 1
8189
.globl _main, _bar
82-
.alt_entry _alt_bar
8390

8491
_main:
8592
ret
8693

87-
.section __TEXT,obj,regular,pure_instructions
8894
_bar:
8995
nop
9096

@@ -101,4 +107,6 @@ _hello_world:
101107
_hello_its_me:
102108
.asciz "Hello, it's me"
103109

110+
.asciz "Hello world!\n"
111+
104112
.subsections_via_symbols

0 commit comments

Comments
 (0)