@@ -76,8 +76,7 @@ Global* getStackPointerGlobal(Module& wasm) {
76
76
77
77
const Address UNKNOWN_OFFSET (uint32_t (-1 ));
78
78
79
- std::string escape (const char * input) {
80
- std::string code = input;
79
+ std::string escape (std::string code) {
81
80
// replace newlines quotes with escaped newlines
82
81
size_t curr = 0 ;
83
82
while ((curr = code.find (" \\ n" , curr)) != std::string::npos) {
@@ -109,14 +108,21 @@ class StringConstantTracker {
109
108
public:
110
109
StringConstantTracker (Module& wasm) : wasm(wasm) { calcSegmentOffsets (); }
111
110
112
- std::string codeForConstAddr (int64_t address) {
113
- const char * str = stringAtAddr (address);
114
- if (!str) {
115
- Fatal () << " unable to find data for ASM/EM_JS const at: " << address;
111
+ const char * stringAtAddr (Address address) {
112
+ for (unsigned i = 0 ; i < wasm.memory .segments .size (); ++i) {
113
+ Memory::Segment& segment = wasm.memory .segments [i];
114
+ Address offset = segmentOffsets[i];
115
+ if (offset != UNKNOWN_OFFSET && address >= offset &&
116
+ address < offset + segment.data .size ()) {
117
+ return &segment.data [address - offset];
118
+ }
116
119
}
117
- return escape (str);
120
+ Fatal () << " unable to find data for ASM/EM_JS const at: " << address;
121
+ return nullptr ;
118
122
}
119
123
124
+ std::vector<Address> segmentOffsets; // segment index => address offset
125
+
120
126
private:
121
127
void calcSegmentOffsets () {
122
128
std::unordered_map<Index, Address> passiveOffsets;
@@ -172,32 +178,19 @@ class StringConstantTracker {
172
178
}
173
179
}
174
180
175
- const char * stringAtAddr (Address address) {
176
- for (unsigned i = 0 ; i < wasm.memory .segments .size (); ++i) {
177
- Memory::Segment& segment = wasm.memory .segments [i];
178
- Address offset = segmentOffsets[i];
179
- if (offset != UNKNOWN_OFFSET && address >= offset &&
180
- address < offset + segment.data .size ()) {
181
- return &segment.data [address - offset];
182
- }
183
- }
184
- return nullptr ;
185
- }
186
-
187
181
Module& wasm;
188
- std::vector<Address> segmentOffsets; // segment index => address offset
182
+ };
183
+
184
+ struct AsmConst {
185
+ Address id;
186
+ std::string code;
189
187
};
190
188
191
189
struct AsmConstWalker : public LinearExecutionWalker <AsmConstWalker> {
192
190
Module& wasm;
193
191
bool minimizeWasmChanges;
194
192
StringConstantTracker stringTracker;
195
193
196
- struct AsmConst {
197
- Address id;
198
- std::string code;
199
- };
200
-
201
194
std::vector<AsmConst> asmConsts;
202
195
// last sets in the current basic block, per index
203
196
std::map<Index, LocalSet*> sets;
@@ -292,9 +285,8 @@ void AsmConstWalker::visitCall(Call* curr) {
292
285
}
293
286
294
287
auto * value = arg->cast <Const>();
295
- int64_t address = value->value .getInteger ();
296
- auto code = stringTracker.codeForConstAddr (address);
297
- createAsmConst (address, code);
288
+ Address address = value->value .getInteger ();
289
+ asmConsts.push_back ({address, stringTracker.stringAtAddr (address)});
298
290
}
299
291
300
292
void AsmConstWalker::process () {
@@ -305,24 +297,105 @@ void AsmConstWalker::process() {
305
297
addImports ();
306
298
}
307
299
308
- void AsmConstWalker::createAsmConst (uint64_t id, std::string code) {
309
- AsmConst asmConst;
310
- asmConst.id = id;
311
- asmConst.code = code;
312
- asmConsts.push_back (asmConst);
313
- }
314
-
315
300
void AsmConstWalker::addImports () {
316
301
for (auto & import : queuedImports) {
317
302
wasm.addFunction (import.release ());
318
303
}
319
304
}
320
305
321
- static AsmConstWalker findEmAsmConstsAndReturnWalker (Module& wasm,
322
- bool minimizeWasmChanges) {
323
- AsmConstWalker walker (wasm, minimizeWasmChanges);
324
- walker.process ();
325
- return walker;
306
+ struct SegmentRemover : WalkerPass<PostWalker<SegmentRemover>> {
307
+ SegmentRemover (Index segment) : segment(segment) {}
308
+
309
+ bool isFunctionParallel () override { return true ; }
310
+
311
+ Pass* create () override { return new SegmentRemover (segment); }
312
+
313
+ void visitMemoryInit (MemoryInit* curr) {
314
+ if (segment == curr->segment ) {
315
+ Builder builder (*getModule ());
316
+ replaceCurrent (builder.blockify (builder.makeDrop (curr->dest ),
317
+ builder.makeDrop (curr->offset ),
318
+ builder.makeDrop (curr->size )));
319
+ }
320
+ }
321
+
322
+ void visitDataDrop (DataDrop* curr) {
323
+ if (segment == curr->segment ) {
324
+ Builder builder (*getModule ());
325
+ replaceCurrent (builder.makeNop ());
326
+ }
327
+ }
328
+
329
+ Index segment;
330
+ };
331
+
332
+ static void removeSegment (Module& wasm, Index segment) {
333
+ PassRunner runner (&wasm);
334
+ SegmentRemover (segment).run (&runner, &wasm);
335
+ // Resize the segment to zero. In theory we should completely remove it
336
+ // but that would mean re-numbering the segments that follow which is
337
+ // non-trivial.
338
+ wasm.memory .segments [segment].data .resize (0 );
339
+ }
340
+
341
+ static Address getExportedAddress (Module& wasm, Export* export_) {
342
+ Global* g = wasm.getGlobal (export_->value );
343
+ auto * addrConst = g->init ->dynCast <Const>();
344
+ return addrConst->value .getInteger ();
345
+ }
346
+
347
+ static std::vector<AsmConst> findEmAsmConsts (Module& wasm,
348
+ bool minimizeWasmChanges) {
349
+ Export* start = wasm.getExportOrNull (" __start_em_asm" );
350
+ Export* end = wasm.getExportOrNull (" __stop_em_asm" );
351
+
352
+ // Older versions of emscripten don't export these symbols. Instead
353
+ // we run AsmConstWalker in an attempt to derive the string addresses
354
+ // from the code.
355
+ if (!start || !end) {
356
+ AsmConstWalker walker (wasm, minimizeWasmChanges);
357
+ walker.process ();
358
+ return walker.asmConsts ;
359
+ }
360
+
361
+ // Newer version of emscripten export this symbols and we
362
+ // can use it ot find all the EM_ASM constants. Sadly __start_em_asm and
363
+ // __stop_em_asm don't alwasy mark the start and end of segment because in
364
+ // dynamic linking we merge all data segments into one.
365
+ std::vector<AsmConst> asmConsts;
366
+ StringConstantTracker stringTracker (wasm);
367
+ Address startAddress = getExportedAddress (wasm, start);
368
+ Address endAddress = getExportedAddress (wasm, end);
369
+ for (Index i = 0 ; i < wasm.memory .segments .size (); i++) {
370
+ Address segmentStart = stringTracker.segmentOffsets [i];
371
+ size_t segmentSize = wasm.memory .segments [i].data .size ();
372
+ if (segmentStart <= startAddress &&
373
+ segmentStart + segmentSize >= endAddress) {
374
+ Address address = startAddress;
375
+ while (address < endAddress) {
376
+ auto code = stringTracker.stringAtAddr (address);
377
+ asmConsts.push_back ({address, code});
378
+ address.addr += strlen (code) + 1 ;
379
+ }
380
+
381
+ if (segmentStart == startAddress &&
382
+ segmentStart + segmentSize == endAddress) {
383
+ removeSegment (wasm, i);
384
+ } else {
385
+ // If we can't remove the whole segment then just set the string
386
+ // data to zero.
387
+ size_t segmentOffset = startAddress - segmentStart;
388
+ char * startElem = &wasm.memory .segments [i].data [segmentOffset];
389
+ memset (startElem, 0 , endAddress - startAddress);
390
+ }
391
+ break ;
392
+ }
393
+ }
394
+
395
+ assert (asmConsts.size ());
396
+ wasm.removeExport (" __start_em_asm" );
397
+ wasm.removeExport (" __stop_em_asm" );
398
+ return asmConsts;
326
399
}
327
400
328
401
struct EmJsWalker : public PostWalker <EmJsWalker> {
@@ -331,6 +404,7 @@ struct EmJsWalker : public PostWalker<EmJsWalker> {
331
404
std::vector<Export> toRemove;
332
405
333
406
std::map<std::string, std::string> codeByName;
407
+ std::map<Address, size_t > codeAddresses; // map from address to string len
334
408
335
409
EmJsWalker (Module& _wasm) : wasm(_wasm), stringTracker(_wasm) {}
336
410
@@ -353,8 +427,9 @@ struct EmJsWalker : public PostWalker<EmJsWalker> {
353
427
}
354
428
auto * addrConst = consts.list [0 ];
355
429
int64_t address = addrConst->value .getInteger ();
356
- auto code = stringTracker.codeForConstAddr (address);
430
+ auto code = stringTracker.stringAtAddr (address);
357
431
codeByName[funcName] = code;
432
+ codeAddresses[address] = strlen (code) + 1 ;
358
433
}
359
434
};
360
435
@@ -366,6 +441,27 @@ EmJsWalker findEmJsFuncsAndReturnWalker(Module& wasm) {
366
441
wasm.removeExport (exp .name );
367
442
wasm.removeFunction (exp .value );
368
443
}
444
+
445
+ // With newer versions of emscripten/llvm we pack all EM_JS strings into
446
+ // single segment.
447
+ // We can detect this by checking for segments that contain only JS strings.
448
+ // When we find such segements we remove them from the final binary.
449
+ for (Index i = 0 ; i < wasm.memory .segments .size (); i++) {
450
+ Address start = walker.stringTracker .segmentOffsets [0 ];
451
+ Address cur = start;
452
+
453
+ while (cur < start + wasm.memory .segments [i].data .size ()) {
454
+ if (walker.codeAddresses .count (cur) == 0 ) {
455
+ break ;
456
+ }
457
+ cur.addr += walker.codeAddresses [cur];
458
+ }
459
+
460
+ if (cur == start + wasm.memory .segments [i].data .size ()) {
461
+ // Entire segment is contains JS strings. Remove it.
462
+ removeSegment (wasm, i);
463
+ }
464
+ }
369
465
return walker;
370
466
}
371
467
@@ -383,16 +479,15 @@ std::string EmscriptenGlueGenerator::generateEmscriptenMetadata() {
383
479
std::stringstream meta;
384
480
meta << " {\n " ;
385
481
386
- AsmConstWalker emAsmWalker =
387
- findEmAsmConstsAndReturnWalker (wasm, minimizeWasmChanges);
482
+ std::vector<AsmConst> asmConsts = findEmAsmConsts (wasm, minimizeWasmChanges);
388
483
389
484
// print
390
485
commaFirst = true ;
391
- if (!emAsmWalker. asmConsts .empty ()) {
486
+ if (!asmConsts.empty ()) {
392
487
meta << " \" asmConsts\" : {" ;
393
- for (auto & asmConst : emAsmWalker. asmConsts ) {
488
+ for (auto & asmConst : asmConsts) {
394
489
meta << nextElement ();
395
- meta << ' "' << asmConst.id << " \" : \" " << asmConst.code << " \" " ;
490
+ meta << ' "' << asmConst.id << " \" : \" " << escape ( asmConst.code ) << " \" " ;
396
491
}
397
492
meta << " \n },\n " ;
398
493
}
@@ -405,7 +500,7 @@ std::string EmscriptenGlueGenerator::generateEmscriptenMetadata() {
405
500
auto & name = pair.first ;
406
501
auto & code = pair.second ;
407
502
meta << nextElement ();
408
- meta << ' "' << name << " \" : \" " << code << ' "' ;
503
+ meta << ' "' << name << " \" : \" " << escape ( code) << ' "' ;
409
504
}
410
505
meta << " \n },\n " ;
411
506
}
0 commit comments