Skip to content

finalize: Strip "em_js" named data segment. #3553

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/binaryen-c.h
Original file line number Diff line number Diff line change
Expand Up @@ -1625,9 +1625,9 @@ BINARYEN_API void BinaryenMemoryInitSetSize(BinaryenExpressionRef expr,

// DataDrop

// Gets the index of the segment being dropped by a `memory.drop` expression.
// Gets the index of the segment being dropped by a `data.drop` expression.
BINARYEN_API uint32_t BinaryenDataDropGetSegment(BinaryenExpressionRef expr);
// Sets the index of the segment being dropped by a `memory.drop` expression.
// Sets the index of the segment being dropped by a `data.drop` expression.
BINARYEN_API void BinaryenDataDropSetSegment(BinaryenExpressionRef expr,
uint32_t segmentIndex);

Expand Down
55 changes: 54 additions & 1 deletion src/wasm/wasm-emscripten.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ class StringConstantTracker {
return escape(str);
}

std::vector<Address> segmentOffsets; // segment index => address offset

private:
void calcSegmentOffsets() {
std::unordered_map<Index, Address> passiveOffsets;
Expand Down Expand Up @@ -185,7 +187,6 @@ class StringConstantTracker {
}

Module& wasm;
std::vector<Address> segmentOffsets; // segment index => address offset
};

enum class Proxying {
Expand Down Expand Up @@ -380,6 +381,7 @@ struct EmJsWalker : public PostWalker<EmJsWalker> {
std::vector<Export> toRemove;

std::map<std::string, std::string> codeByName;
std::map<Address, size_t> codeAddresses; // map from address to string len

EmJsWalker(Module& _wasm) : wasm(_wasm), stringTracker(_wasm) {}

Expand All @@ -404,7 +406,32 @@ struct EmJsWalker : public PostWalker<EmJsWalker> {
int64_t address = addrConst->value.getInteger();
auto code = stringTracker.codeForConstAddr(address);
codeByName[funcName] = code;
codeAddresses[address] = code.size() + 1;
}
};

struct SegmentRemover : WalkerPass<PostWalker<SegmentRemover>> {
SegmentRemover(Index segment) : segment(segment) {}

bool isFunctionParallel() override { return true; }

Pass* create() override { return new SegmentRemover(segment); }

void visitMemoryInit(MemoryInit* curr) {
if (segment == curr->segment) {
Builder builder(*getModule());
replaceCurrent(builder.makeNop());
}
}

void visitDataDrop(DataDrop* curr) {
if (segment == curr->segment) {
Builder builder(*getModule());
replaceCurrent(builder.makeNop());
}
}

Index segment;
};

EmJsWalker fixEmJsFuncsAndReturnWalker(Module& wasm) {
Expand All @@ -415,6 +442,32 @@ EmJsWalker fixEmJsFuncsAndReturnWalker(Module& wasm) {
wasm.removeExport(exp.name);
wasm.removeFunction(exp.value);
}

// With newer versions of emscripten/llvm we pack all EM_JS strings into
// single segment.
// We can detect this by checking for segments that contain on JS strings.
// When we find such segements we remove them from the final binary.
for (Index i = 0; i < wasm.memory.segments.size(); i++) {
Address start = walker.stringTracker.segmentOffsets[0];
Address cur = start;

while (cur < start + wasm.memory.segments[i].data.size()) {
if (walker.codeAddresses.count(cur) == 0) {
break;
}
cur.addr += walker.codeAddresses[cur];
}

if (cur == start + wasm.memory.segments[i].data.size()) {
// Enture segment is containes JS strings. Remove it.
PassRunner runner(&wasm);
SegmentRemover(i).run(&runner, &wasm);
// Resize the segment to zero. In theory we should completely remove it
// but that would mean re-numbering the segments that follow which would
// mean renumbering.
wasm.memory.segments[i].data.resize(0);
}
}
return walker;
}

Expand Down
25 changes: 20 additions & 5 deletions test/lit/wasm-emscripten-finalize/em_js.wat
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,32 @@

;; RUN: wasm-emscripten-finalize %s -S | filecheck %s

;; Both functions should be stripped from the binary
;; All functions should be stripped from the binary, regardless
;; of internal name
;; CHECK-NOT: (func

;; The data section that contains only em_js strings should
;; be stripped.
;; CHECK-NOT: (i32.const 512) "Only em_js strings here\00")

;; Data sections that also contain other stuff should not be stripped
;; CHECK: (data (i32.const 1024) "some JS string data\00xxx")
;; CHECK: (data (i32.const 2048) "more JS string data\00yyy")

;; CHECK: "emJsFuncs": {
;; CHECK-NEXT: "bar": "more JS string dara",
;; CHECK-NEXT: "foo": "some JS string"
;; CHECK-NEXT: "bar": "more JS string data",
;; CHECK-NEXT: "baz": "Only em_js strings here
;; CHECK-NEXT: "foo": "some JS string data"
;; CHECK-NEXT: },

(module
(memory 1 1)
(data (i32.const 1024) "some JS string\00")
(data (i32.const 2048) "more JS string dara\00")
(data (i32.const 512) "Only em_js strings here\00")
(data (i32.const 1024) "some JS string data\00xxx")
(data (i32.const 2048) "more JS string data\00yyy")
(export "__em_js__foo" (func $__em_js__foo))
(export "__em_js__bar" (func $bar))
(export "__em_js__baz" (func $baz))
;; Name matches export name
(func $__em_js__foo (result i32)
(i32.const 1024)
Expand All @@ -25,4 +37,7 @@
(func $bar (result i32)
(i32.const 2048)
)
(func $baz (result i32)
(i32.const 512)
)
)