diff --git a/src/binaryen-c.h b/src/binaryen-c.h
index 83424ceb6de..b0439f11c6f 100644
--- a/src/binaryen-c.h
+++ b/src/binaryen-c.h
@@ -1625,9 +1625,9 @@ BINARYEN_API void BinaryenMemoryInitSetSize(BinaryenExpressionRef expr,
// DataDrop
-// Gets the index of the segment being dropped by a `memory.drop` expression.
+// Gets the index of the segment being dropped by a `data.drop` expression.
BINARYEN_API uint32_t BinaryenDataDropGetSegment(BinaryenExpressionRef expr);
-// Sets the index of the segment being dropped by a `memory.drop` expression.
+// Sets the index of the segment being dropped by a `data.drop` expression.
BINARYEN_API void BinaryenDataDropSetSegment(BinaryenExpressionRef expr,
uint32_t segmentIndex);
diff --git a/src/wasm/wasm-emscripten.cpp b/src/wasm/wasm-emscripten.cpp
index 1fd35de290c..3fce2765706 100644
--- a/src/wasm/wasm-emscripten.cpp
+++ b/src/wasm/wasm-emscripten.cpp
@@ -117,6 +117,8 @@ class StringConstantTracker {
return escape(str);
}
+ std::vector
segmentOffsets; // segment index => address offset
+
private:
void calcSegmentOffsets() {
std::unordered_map passiveOffsets;
@@ -185,7 +187,6 @@ class StringConstantTracker {
}
Module& wasm;
- std::vector segmentOffsets; // segment index => address offset
};
enum class Proxying {
@@ -380,6 +381,7 @@ struct EmJsWalker : public PostWalker {
std::vector toRemove;
std::map codeByName;
+ std::map codeAddresses; // map from address to string len
EmJsWalker(Module& _wasm) : wasm(_wasm), stringTracker(_wasm) {}
@@ -404,7 +406,32 @@ struct EmJsWalker : public PostWalker {
int64_t address = addrConst->value.getInteger();
auto code = stringTracker.codeForConstAddr(address);
codeByName[funcName] = code;
+ codeAddresses[address] = code.size() + 1;
+ }
+};
+
+struct SegmentRemover : WalkerPass> {
+ SegmentRemover(Index segment) : segment(segment) {}
+
+ bool isFunctionParallel() override { return true; }
+
+ Pass* create() override { return new SegmentRemover(segment); }
+
+ void visitMemoryInit(MemoryInit* curr) {
+ if (segment == curr->segment) {
+ Builder builder(*getModule());
+ replaceCurrent(builder.makeNop());
+ }
}
+
+ void visitDataDrop(DataDrop* curr) {
+ if (segment == curr->segment) {
+ Builder builder(*getModule());
+ replaceCurrent(builder.makeNop());
+ }
+ }
+
+ Index segment;
};
EmJsWalker fixEmJsFuncsAndReturnWalker(Module& wasm) {
@@ -415,6 +442,32 @@ EmJsWalker fixEmJsFuncsAndReturnWalker(Module& wasm) {
wasm.removeExport(exp.name);
wasm.removeFunction(exp.value);
}
+
+ // With newer versions of emscripten/llvm we pack all EM_JS strings into
+ // single segment.
+ // We can detect this by checking for segments that contain on JS strings.
+ // When we find such segements we remove them from the final binary.
+ for (Index i = 0; i < wasm.memory.segments.size(); i++) {
+ Address start = walker.stringTracker.segmentOffsets[0];
+ Address cur = start;
+
+ while (cur < start + wasm.memory.segments[i].data.size()) {
+ if (walker.codeAddresses.count(cur) == 0) {
+ break;
+ }
+ cur.addr += walker.codeAddresses[cur];
+ }
+
+ if (cur == start + wasm.memory.segments[i].data.size()) {
+ // Enture segment is containes JS strings. Remove it.
+ PassRunner runner(&wasm);
+ SegmentRemover(i).run(&runner, &wasm);
+ // Resize the segment to zero. In theory we should completely remove it
+ // but that would mean re-numbering the segments that follow which would
+ // mean renumbering.
+ wasm.memory.segments[i].data.resize(0);
+ }
+ }
return walker;
}
diff --git a/test/lit/wasm-emscripten-finalize/em_js.wat b/test/lit/wasm-emscripten-finalize/em_js.wat
index c2dd4c17d9a..0cce1e3b5ee 100644
--- a/test/lit/wasm-emscripten-finalize/em_js.wat
+++ b/test/lit/wasm-emscripten-finalize/em_js.wat
@@ -3,20 +3,32 @@
;; RUN: wasm-emscripten-finalize %s -S | filecheck %s
-;; Both functions should be stripped from the binary
+;; All functions should be stripped from the binary, regardless
+;; of internal name
;; CHECK-NOT: (func
+;; The data section that contains only em_js strings should
+;; be stripped.
+;; CHECK-NOT: (i32.const 512) "Only em_js strings here\00")
+
+;; Data sections that also contain other stuff should not be stripped
+;; CHECK: (data (i32.const 1024) "some JS string data\00xxx")
+;; CHECK: (data (i32.const 2048) "more JS string data\00yyy")
+
;; CHECK: "emJsFuncs": {
-;; CHECK-NEXT: "bar": "more JS string dara",
-;; CHECK-NEXT: "foo": "some JS string"
+;; CHECK-NEXT: "bar": "more JS string data",
+;; CHECK-NEXT: "baz": "Only em_js strings here
+;; CHECK-NEXT: "foo": "some JS string data"
;; CHECK-NEXT: },
(module
(memory 1 1)
- (data (i32.const 1024) "some JS string\00")
- (data (i32.const 2048) "more JS string dara\00")
+ (data (i32.const 512) "Only em_js strings here\00")
+ (data (i32.const 1024) "some JS string data\00xxx")
+ (data (i32.const 2048) "more JS string data\00yyy")
(export "__em_js__foo" (func $__em_js__foo))
(export "__em_js__bar" (func $bar))
+ (export "__em_js__baz" (func $baz))
;; Name matches export name
(func $__em_js__foo (result i32)
(i32.const 1024)
@@ -25,4 +37,7 @@
(func $bar (result i32)
(i32.const 2048)
)
+ (func $baz (result i32)
+ (i32.const 512)
+ )
)