Skip to content

[WebAssembly] Support assembly parsing for new EH #108668

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Sep 17, 2024
147 changes: 137 additions & 10 deletions llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,23 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {
std::vector<unsigned> List;
};

struct CaLOpElem {
int64_t Opcode;
const MCExpr *Tag;
int64_t Dest;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume Dest here is the branching depth of the targeted block. Does it have exactly the same meaning (in the context of the asm parser) as the BrList operands? If so, maybe it should have the same type?

Actually the level of abstraction here seems slightly funny because the tag is represented as an MCExpr* (rather than a raw int) but the destination is a raw int. But I guess that makes sense in that MC explicitly models the tags as MCExprs but doesn't model basic blocks; and BrList already works this way.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I made them int64_t just because the token's parser's return value was int64_t:

int64_t getIntVal() const {
assert(Kind == Integer && "This token isn't an integer!");
return IntVal.getZExtValue();
}

And yeah I agree giving them the real type is better. Changed Opcode to uint8_t and Dest to unsigned to reflect the real binary spec.

};

struct CaLOp {
std::vector<CaLOpElem> List;
};

union {
struct TokOp Tok;
struct IntOp Int;
struct FltOp Flt;
struct SymOp Sym;
struct BrLOp BrL;
struct CaLOp CaL;
};

WebAssemblyOperand(SMLoc Start, SMLoc End, TokOp T)
Expand All @@ -85,12 +96,16 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {
: Kind(Float), StartLoc(Start), EndLoc(End), Flt(F) {}
WebAssemblyOperand(SMLoc Start, SMLoc End, SymOp S)
: Kind(Symbol), StartLoc(Start), EndLoc(End), Sym(S) {}
WebAssemblyOperand(SMLoc Start, SMLoc End)
: Kind(BrList), StartLoc(Start), EndLoc(End), BrL() {}
WebAssemblyOperand(SMLoc Start, SMLoc End, BrLOp B)
: Kind(BrList), StartLoc(Start), EndLoc(End), BrL(B) {}
WebAssemblyOperand(SMLoc Start, SMLoc End, CaLOp C)
: Kind(CatchList), StartLoc(Start), EndLoc(End), CaL(C) {}

~WebAssemblyOperand() {
if (isBrList())
BrL.~BrLOp();
if (isCatchList())
CaL.~CaLOp();
}

bool isToken() const override { return Kind == Token; }
Expand Down Expand Up @@ -153,7 +168,15 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {
}

void addCatchListOperands(MCInst &Inst, unsigned N) const {
// TODO
assert(N == 1 && isCatchList() && "Invalid CatchList!");
Inst.addOperand(MCOperand::createImm(CaL.List.size()));
for (auto Ca : CaL.List) {
Inst.addOperand(MCOperand::createImm(Ca.Opcode));
if (Ca.Opcode == wasm::WASM_OPCODE_CATCH ||
Ca.Opcode == wasm::WASM_OPCODE_CATCH_REF)
Inst.addOperand(MCOperand::createExpr(Ca.Tag));
Inst.addOperand(MCOperand::createImm(Ca.Dest));
}
}

void print(raw_ostream &OS) const override {
Expand All @@ -174,7 +197,7 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {
OS << "BrList:" << BrL.List.size();
break;
case CatchList:
// TODO
OS << "CaList:" << CaL.List.size();
break;
}
}
Expand Down Expand Up @@ -228,6 +251,7 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser {
Loop,
Try,
CatchAll,
TryTable,
If,
Else,
Undefined,
Expand Down Expand Up @@ -304,6 +328,8 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser {
return {"try", "end_try/delegate"};
case CatchAll:
return {"catch_all", "end_try"};
case TryTable:
return {"try_table", "end_try_table"};
case If:
return {"if", "end_if"};
case Else:
Expand Down Expand Up @@ -571,6 +597,7 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser {
// proper nesting.
bool ExpectBlockType = false;
bool ExpectFuncType = false;
bool ExpectCatchList = false;
std::unique_ptr<WebAssemblyOperand> FunctionTable;
if (Name == "block") {
push(Block);
Expand All @@ -593,12 +620,19 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser {
} else if (Name == "catch_all") {
if (popAndPushWithSameSignature(Name, Try, CatchAll))
return true;
} else if (Name == "try_table") {
push(TryTable);
ExpectBlockType = true;
ExpectCatchList = true;
} else if (Name == "end_if") {
if (pop(Name, If, Else))
return true;
} else if (Name == "end_try") {
if (pop(Name, Try, CatchAll))
return true;
} else if (Name == "end_try_table") {
if (pop(Name, TryTable))
return true;
} else if (Name == "delegate") {
if (pop(Name, Try))
return true;
Expand All @@ -622,7 +656,18 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser {
ExpectFuncType = true;
}

if (ExpectFuncType || (ExpectBlockType && Lexer.is(AsmToken::LParen))) {
// Returns true if the next tokens are a catch clause
auto PeekCatchList = [&]() {
if (Lexer.isNot(AsmToken::LParen))
return false;
AsmToken NextTok = Lexer.peekTok();
return NextTok.getKind() == AsmToken::Identifier &&
NextTok.getIdentifier().starts_with("catch");
};

// Parse a multivalue block type
if (ExpectFuncType ||
(Lexer.is(AsmToken::LParen) && ExpectBlockType && !PeekCatchList())) {
// This has a special TYPEINDEX operand which in text we
// represent as a signature, such that we can re-build this signature,
// attach it to an anonymous symbol, which is what WasmObjectWriter
Expand All @@ -648,6 +693,23 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser {
Loc.getLoc(), Loc.getEndLoc(), WebAssemblyOperand::SymOp{Expr}));
}

// If we are expecting a catch clause list, try to parse it here.
//
// If there is a multivalue block return type before this catch list, it
// should have been parsed above. If there is no return type before
// encountering this catch list, this means the type is void.
// The case when there is a single block return value and then a catch list
// will be handled below in the 'while' loop.
if (ExpectCatchList && PeekCatchList()) {
if (ExpectBlockType) {
ExpectBlockType = false;
addBlockTypeOperand(Operands, NameLoc, WebAssembly::BlockType::Void);
}
if (parseCatchList(Operands))
return true;
ExpectCatchList = false;
}

while (Lexer.isNot(AsmToken::EndOfStatement)) {
auto &Tok = Lexer.getTok();
switch (Tok.getKind()) {
Expand All @@ -661,7 +723,15 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser {
if (BT == WebAssembly::BlockType::Invalid)
return error("Unknown block type: ", Id);
addBlockTypeOperand(Operands, NameLoc, BT);
ExpectBlockType = false;
Parser.Lex();
// Now that we've parsed a single block return type, if we are
// expecting a catch clause list, try to parse it.
if (ExpectCatchList && PeekCatchList()) {
if (parseCatchList(Operands))
return true;
ExpectCatchList = false;
}
} else {
// Assume this identifier is a label.
const MCExpr *Val;
Expand Down Expand Up @@ -703,8 +773,8 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser {
}
case AsmToken::LCurly: {
Parser.Lex();
auto Op =
std::make_unique<WebAssemblyOperand>(Tok.getLoc(), Tok.getEndLoc());
auto Op = std::make_unique<WebAssemblyOperand>(
Tok.getLoc(), Tok.getEndLoc(), WebAssemblyOperand::BrLOp{});
if (!Lexer.is(AsmToken::RCurly))
for (;;) {
Op->BrL.List.push_back(Lexer.getTok().getIntVal());
Expand All @@ -724,10 +794,18 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser {
return true;
}
}
if (ExpectBlockType && Operands.size() == 1) {
// Support blocks with no operands as default to void.

// If we are still expecting to parse a block type or a catch list at this
// point, we set them to the default/empty state.

// Support blocks with no operands as default to void.
if (ExpectBlockType)
addBlockTypeOperand(Operands, NameLoc, WebAssembly::BlockType::Void);
}
// If no catch list has been parsed, add an empty catch list operand.
if (ExpectCatchList)
Operands.push_back(std::make_unique<WebAssemblyOperand>(
NameLoc, NameLoc, WebAssemblyOperand::CaLOp{}));

if (FunctionTable)
Operands.push_back(std::move(FunctionTable));
Parser.Lex();
Expand All @@ -752,6 +830,55 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser {
return false;
}

bool parseCatchList(OperandVector &Operands) {
auto Op = std::make_unique<WebAssemblyOperand>(
Lexer.getTok().getLoc(), SMLoc(), WebAssemblyOperand::CaLOp{});
SMLoc EndLoc;

while (Lexer.is(AsmToken::LParen)) {
if (expect(AsmToken::LParen, "("))
return true;

auto CatchStr = expectIdent();
if (CatchStr.empty())
return true;
int64_t CatchOpcode =
StringSwitch<int64_t>(CatchStr)
.Case("catch", wasm::WASM_OPCODE_CATCH)
.Case("catch_ref", wasm::WASM_OPCODE_CATCH_REF)
.Case("catch_all", wasm::WASM_OPCODE_CATCH_ALL)
.Case("catch_all_ref", wasm::WASM_OPCODE_CATCH_ALL_REF)
.Default(-1);
if (CatchOpcode == -1)
return error(
"Expected catch/catch_ref/catch_all/catch_all_ref, instead got: " +
CatchStr);

const MCExpr *Tag = nullptr;
if (CatchOpcode == wasm::WASM_OPCODE_CATCH ||
CatchOpcode == wasm::WASM_OPCODE_CATCH_REF) {
if (Parser.parseExpression(Tag))
return error("Cannot parse symbol: ", Lexer.getTok());
}

auto &DestTok = Lexer.getTok();
if (DestTok.isNot(AsmToken::Integer))
return error("Expected integer constant, instead got: ", DestTok);
int64_t Dest = DestTok.getIntVal();
Parser.Lex();

EndLoc = Lexer.getTok().getEndLoc();
if (expect(AsmToken::RParen, ")"))
return true;

Op->CaL.List.push_back({CatchOpcode, Tag, Dest});
}

Op->EndLoc = EndLoc;
Operands.push_back(std::move(Op));
return false;
}

bool CheckDataSection() {
if (CurrentState != DataSection) {
auto WS = cast<MCSectionWasm>(getStreamer().getCurrentSectionOnly());
Expand Down
Loading
Loading