Skip to content

Commit 0540d2c

Browse files
committed
finalize: Refactor C string extraction code. NFC.
This is a pure refactor in preparation for change that will enable stripping or at least zeroing segments that only contain EM_JS/EM_ASM strings.
1 parent 7bff209 commit 0540d2c

File tree

1 file changed

+85
-81
lines changed

1 file changed

+85
-81
lines changed

src/wasm/wasm-emscripten.cpp

+85-81
Original file line numberDiff line numberDiff line change
@@ -76,62 +76,6 @@ Global* getStackPointerGlobal(Module& wasm) {
7676

7777
const Address UNKNOWN_OFFSET(uint32_t(-1));
7878

79-
std::vector<Address> getSegmentOffsets(Module& wasm) {
80-
std::unordered_map<Index, Address> passiveOffsets;
81-
if (wasm.features.hasBulkMemory()) {
82-
// Fetch passive segment offsets out of memory.init instructions
83-
struct OffsetSearcher : PostWalker<OffsetSearcher> {
84-
std::unordered_map<Index, Address>& offsets;
85-
OffsetSearcher(std::unordered_map<unsigned, Address>& offsets)
86-
: offsets(offsets) {}
87-
void visitMemoryInit(MemoryInit* curr) {
88-
// The desitination of the memory.init is either a constant
89-
// or the result of an addition with __memory_base in the
90-
// case of PIC code.
91-
auto* dest = curr->dest->dynCast<Const>();
92-
if (!dest) {
93-
auto* add = curr->dest->dynCast<Binary>();
94-
if (!add) {
95-
return;
96-
}
97-
dest = add->left->dynCast<Const>();
98-
if (!dest) {
99-
return;
100-
}
101-
}
102-
auto it = offsets.find(curr->segment);
103-
if (it != offsets.end()) {
104-
Fatal() << "Cannot get offset of passive segment initialized "
105-
"multiple times";
106-
}
107-
offsets[curr->segment] = dest->value.geti32();
108-
}
109-
} searcher(passiveOffsets);
110-
searcher.walkModule(&wasm);
111-
}
112-
std::vector<Address> segmentOffsets;
113-
for (unsigned i = 0; i < wasm.memory.segments.size(); ++i) {
114-
auto& segment = wasm.memory.segments[i];
115-
if (segment.isPassive) {
116-
auto it = passiveOffsets.find(i);
117-
if (it != passiveOffsets.end()) {
118-
segmentOffsets.push_back(it->second);
119-
} else {
120-
// This was a non-constant offset (perhaps TLS)
121-
segmentOffsets.push_back(UNKNOWN_OFFSET);
122-
}
123-
} else if (auto* addrConst = segment.offset->dynCast<Const>()) {
124-
auto address = addrConst->value.geti32();
125-
segmentOffsets.push_back(address);
126-
} else {
127-
// TODO(sbc): Wasm shared libraries have data segments with non-const
128-
// offset.
129-
segmentOffsets.push_back(0);
130-
}
131-
}
132-
return segmentOffsets;
133-
}
134-
13579
std::string escape(const char* input) {
13680
std::string code = input;
13781
// replace newlines quotes with escaped newlines
@@ -161,29 +105,89 @@ std::string escape(const char* input) {
161105
return code;
162106
}
163107

164-
const char* stringAtAddr(Module& wasm,
165-
std::vector<Address> const& segmentOffsets,
166-
Address address) {
167-
for (unsigned i = 0; i < wasm.memory.segments.size(); ++i) {
168-
Memory::Segment& segment = wasm.memory.segments[i];
169-
Address offset = segmentOffsets[i];
170-
if (offset != UNKNOWN_OFFSET && address >= offset &&
171-
address < offset + segment.data.size()) {
172-
return &segment.data[address - offset];
108+
class StringConstantTracker {
109+
public:
110+
StringConstantTracker(Module& wasm)
111+
: wasm(wasm) { calcSegmentOffsets(); }
112+
113+
std::string codeForConstAddr(int64_t address) {
114+
const char* str = stringAtAddr(address);
115+
if (!str) {
116+
Fatal() << "unable to find data for ASM/EM_JS const at: " << address;
117+
}
118+
return escape(str);
119+
}
120+
121+
private:
122+
void calcSegmentOffsets() {
123+
std::unordered_map<Index, Address> passiveOffsets;
124+
if (wasm.features.hasBulkMemory()) {
125+
// Fetch passive segment offsets out of memory.init instructions
126+
struct OffsetSearcher : PostWalker<OffsetSearcher> {
127+
std::unordered_map<Index, Address>& offsets;
128+
OffsetSearcher(std::unordered_map<unsigned, Address>& offsets)
129+
: offsets(offsets) {}
130+
void visitMemoryInit(MemoryInit* curr) {
131+
// The desitination of the memory.init is either a constant
132+
// or the result of an addition with __memory_base in the
133+
// case of PIC code.
134+
auto* dest = curr->dest->dynCast<Const>();
135+
if (!dest) {
136+
auto* add = curr->dest->dynCast<Binary>();
137+
if (!add) {
138+
return;
139+
}
140+
dest = add->left->dynCast<Const>();
141+
if (!dest) {
142+
return;
143+
}
144+
}
145+
auto it = offsets.find(curr->segment);
146+
if (it != offsets.end()) {
147+
Fatal() << "Cannot get offset of passive segment initialized "
148+
"multiple times";
149+
}
150+
offsets[curr->segment] = dest->value.geti32();
151+
}
152+
} searcher(passiveOffsets);
153+
searcher.walkModule(&wasm);
154+
}
155+
for (unsigned i = 0; i < wasm.memory.segments.size(); ++i) {
156+
auto& segment = wasm.memory.segments[i];
157+
if (segment.isPassive) {
158+
auto it = passiveOffsets.find(i);
159+
if (it != passiveOffsets.end()) {
160+
segmentOffsets.push_back(it->second);
161+
} else {
162+
// This was a non-constant offset (perhaps TLS)
163+
segmentOffsets.push_back(UNKNOWN_OFFSET);
164+
}
165+
} else if (auto* addrConst = segment.offset->dynCast<Const>()) {
166+
auto address = addrConst->value.geti32();
167+
segmentOffsets.push_back(address);
168+
} else {
169+
// TODO(sbc): Wasm shared libraries have data segments with non-const
170+
// offset.
171+
segmentOffsets.push_back(0);
172+
}
173173
}
174174
}
175-
return nullptr;
176-
}
177175

178-
std::string codeForConstAddr(Module& wasm,
179-
std::vector<Address> const& segmentOffsets,
180-
int64_t address) {
181-
const char* str = stringAtAddr(wasm, segmentOffsets, address);
182-
if (!str) {
183-
Fatal() << "unable to find data for ASM/EM_JS const at: " << address;
176+
const char* stringAtAddr(Address address) {
177+
for (unsigned i = 0; i < wasm.memory.segments.size(); ++i) {
178+
Memory::Segment& segment = wasm.memory.segments[i];
179+
Address offset = segmentOffsets[i];
180+
if (offset != UNKNOWN_OFFSET && address >= offset &&
181+
address < offset + segment.data.size()) {
182+
return &segment.data[address - offset];
183+
}
184+
}
185+
return nullptr;
184186
}
185-
return escape(str);
186-
}
187+
188+
Module& wasm;
189+
std::vector<Address> segmentOffsets; // segment index => address offset
190+
};
187191

188192
enum class Proxying {
189193
None,
@@ -206,7 +210,7 @@ std::string proxyingSuffix(Proxying proxy) {
206210
struct AsmConstWalker : public LinearExecutionWalker<AsmConstWalker> {
207211
Module& wasm;
208212
bool minimizeWasmChanges;
209-
std::vector<Address> segmentOffsets; // segment index => address offset
213+
StringConstantTracker stringTracker;
210214

211215
struct AsmConst {
212216
std::set<Signature> sigs;
@@ -222,7 +226,7 @@ struct AsmConstWalker : public LinearExecutionWalker<AsmConstWalker> {
222226

223227
AsmConstWalker(Module& _wasm, bool minimizeWasmChanges)
224228
: wasm(_wasm), minimizeWasmChanges(minimizeWasmChanges),
225-
segmentOffsets(getSegmentOffsets(wasm)) {}
229+
stringTracker(_wasm) {}
226230

227231
void noteNonLinear(Expression* curr);
228232

@@ -316,7 +320,7 @@ void AsmConstWalker::visitCall(Call* curr) {
316320

317321
auto* value = arg->cast<Const>();
318322
int64_t address = value->value.getInteger();
319-
auto code = codeForConstAddr(wasm, segmentOffsets, address);
323+
auto code = stringTracker.codeForConstAddr(address);
320324
createAsmConst(address, code, sig, importName);
321325
}
322326

@@ -373,13 +377,13 @@ static AsmConstWalker fixEmAsmConstsAndReturnWalker(Module& wasm,
373377

374378
struct EmJsWalker : public PostWalker<EmJsWalker> {
375379
Module& wasm;
376-
std::vector<Address> segmentOffsets; // segment index => address offset
380+
StringConstantTracker stringTracker;
377381
std::vector<Export> toRemove;
378382

379383
std::map<std::string, std::string> codeByName;
380384

381385
EmJsWalker(Module& _wasm)
382-
: wasm(_wasm), segmentOffsets(getSegmentOffsets(wasm)) {}
386+
: wasm(_wasm), stringTracker(_wasm) {}
383387

384388
void visitExport(Export* curr) {
385389
if (curr->kind != ExternalKind::Function) {
@@ -400,7 +404,7 @@ struct EmJsWalker : public PostWalker<EmJsWalker> {
400404
}
401405
auto* addrConst = consts.list[0];
402406
int64_t address = addrConst->value.getInteger();
403-
auto code = codeForConstAddr(wasm, segmentOffsets, address);
407+
auto code = stringTracker.codeForConstAddr(address);
404408
codeByName[funcName] = code;
405409
}
406410
};

0 commit comments

Comments
 (0)