diff --git a/src/binaryen-c.h b/src/binaryen-c.h index 83424ceb6de..b0439f11c6f 100644 --- a/src/binaryen-c.h +++ b/src/binaryen-c.h @@ -1625,9 +1625,9 @@ BINARYEN_API void BinaryenMemoryInitSetSize(BinaryenExpressionRef expr, // DataDrop -// Gets the index of the segment being dropped by a `memory.drop` expression. +// Gets the index of the segment being dropped by a `data.drop` expression. BINARYEN_API uint32_t BinaryenDataDropGetSegment(BinaryenExpressionRef expr); -// Sets the index of the segment being dropped by a `memory.drop` expression. +// Sets the index of the segment being dropped by a `data.drop` expression. BINARYEN_API void BinaryenDataDropSetSegment(BinaryenExpressionRef expr, uint32_t segmentIndex); diff --git a/src/wasm/wasm-emscripten.cpp b/src/wasm/wasm-emscripten.cpp index 1fd35de290c..3fce2765706 100644 --- a/src/wasm/wasm-emscripten.cpp +++ b/src/wasm/wasm-emscripten.cpp @@ -117,6 +117,8 @@ class StringConstantTracker { return escape(str); } + std::vector
segmentOffsets; // segment index => address offset + private: void calcSegmentOffsets() { std::unordered_map passiveOffsets; @@ -185,7 +187,6 @@ class StringConstantTracker { } Module& wasm; - std::vector
segmentOffsets; // segment index => address offset }; enum class Proxying { @@ -380,6 +381,7 @@ struct EmJsWalker : public PostWalker { std::vector toRemove; std::map codeByName; + std::map codeAddresses; // map from address to string len EmJsWalker(Module& _wasm) : wasm(_wasm), stringTracker(_wasm) {} @@ -404,7 +406,32 @@ struct EmJsWalker : public PostWalker { int64_t address = addrConst->value.getInteger(); auto code = stringTracker.codeForConstAddr(address); codeByName[funcName] = code; + codeAddresses[address] = code.size() + 1; + } +}; + +struct SegmentRemover : WalkerPass> { + SegmentRemover(Index segment) : segment(segment) {} + + bool isFunctionParallel() override { return true; } + + Pass* create() override { return new SegmentRemover(segment); } + + void visitMemoryInit(MemoryInit* curr) { + if (segment == curr->segment) { + Builder builder(*getModule()); + replaceCurrent(builder.makeNop()); + } } + + void visitDataDrop(DataDrop* curr) { + if (segment == curr->segment) { + Builder builder(*getModule()); + replaceCurrent(builder.makeNop()); + } + } + + Index segment; }; EmJsWalker fixEmJsFuncsAndReturnWalker(Module& wasm) { @@ -415,6 +442,32 @@ EmJsWalker fixEmJsFuncsAndReturnWalker(Module& wasm) { wasm.removeExport(exp.name); wasm.removeFunction(exp.value); } + + // With newer versions of emscripten/llvm we pack all EM_JS strings into + // single segment. + // We can detect this by checking for segments that contain on JS strings. + // When we find such segements we remove them from the final binary. + for (Index i = 0; i < wasm.memory.segments.size(); i++) { + Address start = walker.stringTracker.segmentOffsets[0]; + Address cur = start; + + while (cur < start + wasm.memory.segments[i].data.size()) { + if (walker.codeAddresses.count(cur) == 0) { + break; + } + cur.addr += walker.codeAddresses[cur]; + } + + if (cur == start + wasm.memory.segments[i].data.size()) { + // Enture segment is containes JS strings. Remove it. + PassRunner runner(&wasm); + SegmentRemover(i).run(&runner, &wasm); + // Resize the segment to zero. In theory we should completely remove it + // but that would mean re-numbering the segments that follow which would + // mean renumbering. + wasm.memory.segments[i].data.resize(0); + } + } return walker; } diff --git a/test/lit/wasm-emscripten-finalize/em_js.wat b/test/lit/wasm-emscripten-finalize/em_js.wat index c2dd4c17d9a..0cce1e3b5ee 100644 --- a/test/lit/wasm-emscripten-finalize/em_js.wat +++ b/test/lit/wasm-emscripten-finalize/em_js.wat @@ -3,20 +3,32 @@ ;; RUN: wasm-emscripten-finalize %s -S | filecheck %s -;; Both functions should be stripped from the binary +;; All functions should be stripped from the binary, regardless +;; of internal name ;; CHECK-NOT: (func +;; The data section that contains only em_js strings should +;; be stripped. +;; CHECK-NOT: (i32.const 512) "Only em_js strings here\00") + +;; Data sections that also contain other stuff should not be stripped +;; CHECK: (data (i32.const 1024) "some JS string data\00xxx") +;; CHECK: (data (i32.const 2048) "more JS string data\00yyy") + ;; CHECK: "emJsFuncs": { -;; CHECK-NEXT: "bar": "more JS string dara", -;; CHECK-NEXT: "foo": "some JS string" +;; CHECK-NEXT: "bar": "more JS string data", +;; CHECK-NEXT: "baz": "Only em_js strings here +;; CHECK-NEXT: "foo": "some JS string data" ;; CHECK-NEXT: }, (module (memory 1 1) - (data (i32.const 1024) "some JS string\00") - (data (i32.const 2048) "more JS string dara\00") + (data (i32.const 512) "Only em_js strings here\00") + (data (i32.const 1024) "some JS string data\00xxx") + (data (i32.const 2048) "more JS string data\00yyy") (export "__em_js__foo" (func $__em_js__foo)) (export "__em_js__bar" (func $bar)) + (export "__em_js__baz" (func $baz)) ;; Name matches export name (func $__em_js__foo (result i32) (i32.const 1024) @@ -25,4 +37,7 @@ (func $bar (result i32) (i32.const 2048) ) + (func $baz (result i32) + (i32.const 512) + ) )