Skip to content

[Strings] Add experimental StringNew variants #5459

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Jan 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions scripts/gen-s-parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -607,10 +607,13 @@
("ref.as_i31", "makeRefCast(s, Type(HeapType::i31, NonNullable))"),
("extern.internalize", "makeRefAs(s, ExternInternalize)"),
("extern.externalize", "makeRefAs(s, ExternExternalize)"),
("string.new_wtf8", "makeStringNew(s, StringNewWTF8)"),
("string.new_wtf16", "makeStringNew(s, StringNewWTF16)"),
("string.new_wtf8_array", "makeStringNew(s, StringNewWTF8Array)"),
("string.new_wtf16_array", "makeStringNew(s, StringNewWTF16Array)"),
("string.new_wtf8", "makeStringNew(s, StringNewWTF8, false)"),
("string.new_wtf16", "makeStringNew(s, StringNewWTF16, false)"),
("string.new_wtf8_array", "makeStringNew(s, StringNewWTF8Array, false)"),
("string.new_wtf16_array", "makeStringNew(s, StringNewWTF16Array, false)"),
("string.from_code_point", "makeStringNew(s, StringNewFromCodePoint, false)"),
("string.new_utf8_try", "makeStringNew(s, StringNewUTF8, true)"),
("string.new_utf8_array_try", "makeStringNew(s, StringNewUTF8Array, true)"),
("string.const", "makeStringConst(s)"),
("string.measure_wtf8", "makeStringMeasure(s, StringMeasureWTF8)"),
("string.measure_wtf16", "makeStringMeasure(s, StringMeasureWTF16)"),
Expand Down
8 changes: 6 additions & 2 deletions src/binaryen-c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1836,14 +1836,18 @@ BinaryenExpressionRef BinaryenStringNew(BinaryenModuleRef module,
BinaryenExpressionRef length,
BinaryenExpressionRef start,
BinaryenExpressionRef end) {
// TODO: add API support for this
bool try_ = false;

Builder builder(*(Module*)module);
return static_cast<Expression*>(
length ? builder.makeStringNew(
StringNewOp(op), (Expression*)ptr, (Expression*)length)
StringNewOp(op), (Expression*)ptr, (Expression*)length, try_)
: builder.makeStringNew(StringNewOp(op),
(Expression*)ptr,
(Expression*)start,
(Expression*)end));
(Expression*)end,
try_));
}
BinaryenExpressionRef BinaryenStringConst(BinaryenModuleRef module,
const char* name) {
Expand Down
122 changes: 86 additions & 36 deletions src/gen-s-parser.inc
Original file line number Diff line number Diff line change
Expand Up @@ -3191,6 +3191,9 @@ switch (buf[0]) {
default: goto parse_error;
}
}
case 'f':
if (op == "string.from_code_point"sv) { return makeStringNew(s, StringNewFromCodePoint, false); }
goto parse_error;
case 'i':
if (op == "string.is_usv_sequence"sv) { return makeStringMeasure(s, StringMeasureIsUSV); }
goto parse_error;
Expand All @@ -3206,26 +3209,42 @@ switch (buf[0]) {
}
}
case 'n': {
switch (buf[14]) {
case '1': {
switch (buf[11]) {
case 'u': {
switch (buf[16]) {
case '\0':
if (op == "string.new_wtf16"sv) { return makeStringNew(s, StringNewWTF16); }
case 'a':
if (op == "string.new_utf8_array_try"sv) { return makeStringNew(s, StringNewUTF8Array, true); }
goto parse_error;
case '_':
if (op == "string.new_wtf16_array"sv) { return makeStringNew(s, StringNewWTF16Array); }
case 't':
if (op == "string.new_utf8_try"sv) { return makeStringNew(s, StringNewUTF8, true); }
goto parse_error;
default: goto parse_error;
}
}
case '8': {
switch (buf[15]) {
case '\0':
if (op == "string.new_wtf8"sv) { return makeStringNew(s, StringNewWTF8); }
goto parse_error;
case '_':
if (op == "string.new_wtf8_array"sv) { return makeStringNew(s, StringNewWTF8Array); }
goto parse_error;
case 'w': {
switch (buf[14]) {
case '1': {
switch (buf[16]) {
case '\0':
if (op == "string.new_wtf16"sv) { return makeStringNew(s, StringNewWTF16, false); }
goto parse_error;
case '_':
if (op == "string.new_wtf16_array"sv) { return makeStringNew(s, StringNewWTF16Array, false); }
goto parse_error;
default: goto parse_error;
}
}
case '8': {
switch (buf[15]) {
case '\0':
if (op == "string.new_wtf8"sv) { return makeStringNew(s, StringNewWTF8, false); }
goto parse_error;
case '_':
if (op == "string.new_wtf8_array"sv) { return makeStringNew(s, StringNewWTF8Array, false); }
goto parse_error;
default: goto parse_error;
}
}
default: goto parse_error;
}
}
Expand Down Expand Up @@ -8905,6 +8924,13 @@ switch (buf[0]) {
default: goto parse_error;
}
}
case 'f':
if (op == "string.from_code_point"sv) {
auto ret = makeStringNew(ctx, pos, StringNewFromCodePoint, false);
CHECK_ERR(ret);
return *ret;
}
goto parse_error;
case 'i':
if (op == "string.is_usv_sequence"sv) {
auto ret = makeStringMeasure(ctx, pos, StringMeasureIsUSV);
Expand Down Expand Up @@ -8932,42 +8958,66 @@ switch (buf[0]) {
}
}
case 'n': {
switch (buf[14]) {
case '1': {
switch (buf[11]) {
case 'u': {
switch (buf[16]) {
case '\0':
if (op == "string.new_wtf16"sv) {
auto ret = makeStringNew(ctx, pos, StringNewWTF16);
case 'a':
if (op == "string.new_utf8_array_try"sv) {
auto ret = makeStringNew(ctx, pos, StringNewUTF8Array, true);
CHECK_ERR(ret);
return *ret;
}
goto parse_error;
case '_':
if (op == "string.new_wtf16_array"sv) {
auto ret = makeStringNew(ctx, pos, StringNewWTF16Array);
case 't':
if (op == "string.new_utf8_try"sv) {
auto ret = makeStringNew(ctx, pos, StringNewUTF8, true);
CHECK_ERR(ret);
return *ret;
}
goto parse_error;
default: goto parse_error;
}
}
case '8': {
switch (buf[15]) {
case '\0':
if (op == "string.new_wtf8"sv) {
auto ret = makeStringNew(ctx, pos, StringNewWTF8);
CHECK_ERR(ret);
return *ret;
case 'w': {
switch (buf[14]) {
case '1': {
switch (buf[16]) {
case '\0':
if (op == "string.new_wtf16"sv) {
auto ret = makeStringNew(ctx, pos, StringNewWTF16, false);
CHECK_ERR(ret);
return *ret;
}
goto parse_error;
case '_':
if (op == "string.new_wtf16_array"sv) {
auto ret = makeStringNew(ctx, pos, StringNewWTF16Array, false);
CHECK_ERR(ret);
return *ret;
}
goto parse_error;
default: goto parse_error;
}
goto parse_error;
case '_':
if (op == "string.new_wtf8_array"sv) {
auto ret = makeStringNew(ctx, pos, StringNewWTF8Array);
CHECK_ERR(ret);
return *ret;
}
case '8': {
switch (buf[15]) {
case '\0':
if (op == "string.new_wtf8"sv) {
auto ret = makeStringNew(ctx, pos, StringNewWTF8, false);
CHECK_ERR(ret);
return *ret;
}
goto parse_error;
case '_':
if (op == "string.new_wtf8_array"sv) {
auto ret = makeStringNew(ctx, pos, StringNewWTF8Array, false);
CHECK_ERR(ret);
return *ret;
}
goto parse_error;
default: goto parse_error;
}
goto parse_error;
}
default: goto parse_error;
}
}
Expand Down
15 changes: 13 additions & 2 deletions src/passes/Print.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2345,7 +2345,11 @@ struct PrintExpressionContents
void visitStringNew(StringNew* curr) {
switch (curr->op) {
case StringNewUTF8:
printMedium(o, "string.new_wtf8 utf8");
if (!curr->try_) {
printMedium(o, "string.new_wtf8 utf8");
} else {
printMedium(o, "string.new_utf8_try");
}
break;
case StringNewWTF8:
printMedium(o, "string.new_wtf8 wtf8");
Expand All @@ -2357,7 +2361,11 @@ struct PrintExpressionContents
printMedium(o, "string.new_wtf16");
break;
case StringNewUTF8Array:
printMedium(o, "string.new_wtf8_array utf8");
if (!curr->try_) {
printMedium(o, "string.new_wtf8_array utf8");
} else {
printMedium(o, "string.new_utf8_array_try");
}
break;
case StringNewWTF8Array:
printMedium(o, "string.new_wtf8_array wtf8");
Expand All @@ -2368,6 +2376,9 @@ struct PrintExpressionContents
case StringNewWTF16Array:
printMedium(o, "string.new_wtf16_array");
break;
case StringNewFromCodePoint:
printMedium(o, "string.from_code_point");
break;
default:
WASM_UNREACHABLE("invalid string.new*");
}
Expand Down
3 changes: 3 additions & 0 deletions src/wasm-binary.h
Original file line number Diff line number Diff line change
Expand Up @@ -1150,6 +1150,7 @@ enum ASTNodes {
StringConcat = 0x88,
StringEq = 0x89,
StringIsUSV = 0x8a,
StringNewUTF8Try = 0x8f,
StringAsWTF8 = 0x90,
StringViewWTF8Advance = 0x91,
StringViewWTF8Slice = 0x93,
Expand All @@ -1163,10 +1164,12 @@ enum ASTNodes {
StringViewIterRewind = 0xa3,
StringViewIterSlice = 0xa4,
StringCompare = 0xa8,
StringFromCodePoint = 0xa9,
StringNewWTF8Array = 0xb0,
StringNewWTF16Array = 0xb1,
StringEncodeWTF8Array = 0xb2,
StringEncodeWTF16Array = 0xb3,
StringNewUTF8ArrayTry = 0xb8,
};

enum MemoryAccess {
Expand Down
11 changes: 8 additions & 3 deletions src/wasm-builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -1004,24 +1004,29 @@ class Builder {
ret->finalize();
return ret;
}
StringNew*
makeStringNew(StringNewOp op, Expression* ptr, Expression* length) {
StringNew* makeStringNew(StringNewOp op,
Expression* ptr,
Expression* length,
bool try_) {
auto* ret = wasm.allocator.alloc<StringNew>();
ret->op = op;
ret->ptr = ptr;
ret->length = length;
ret->try_ = try_;
ret->finalize();
return ret;
}
StringNew* makeStringNew(StringNewOp op,
Expression* ptr,
Expression* start,
Expression* end) {
Expression* end,
bool try_) {
auto* ret = wasm.allocator.alloc<StringNew>();
ret->op = op;
ret->ptr = ptr;
ret->start = start;
ret->end = end;
ret->try_ = try_;
ret->finalize();
return ret;
}
Expand Down
1 change: 1 addition & 0 deletions src/wasm-delegations-fields.def
Original file line number Diff line number Diff line change
Expand Up @@ -719,6 +719,7 @@ switch (DELEGATE_ID) {
case Expression::Id::StringNewId: {
DELEGATE_START(StringNew);
DELEGATE_FIELD_INT(StringNew, op);
DELEGATE_FIELD_INT(StringNew, try_);
DELEGATE_FIELD_OPTIONAL_CHILD(StringNew, end);
DELEGATE_FIELD_OPTIONAL_CHILD(StringNew, start);
DELEGATE_FIELD_OPTIONAL_CHILD(StringNew, length);
Expand Down
2 changes: 1 addition & 1 deletion src/wasm-s-parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ class SExpressionWasmBuilder {
Expression* makeArrayCopy(Element& s);
Expression* makeRefAs(Element& s, RefAsOp op);
Expression* makeRefAsNonNull(Element& s);
Expression* makeStringNew(Element& s, StringNewOp op);
Expression* makeStringNew(Element& s, StringNewOp op, bool try_);
Expression* makeStringConst(Element& s);
Expression* makeStringMeasure(Element& s, StringMeasureOp op);
Expression* makeStringEncode(Element& s, StringEncodeOp op);
Expand Down
8 changes: 7 additions & 1 deletion src/wasm.h
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,8 @@ enum StringNewOp {
StringNewWTF8Array,
StringNewReplaceArray,
StringNewWTF16Array,
// Other
StringNewFromCodePoint,
};

enum StringMeasureOp {
Expand Down Expand Up @@ -1684,7 +1686,7 @@ class StringNew : public SpecificExpression<Expression::StringNewId> {
StringNewOp op;

// In linear memory variations this is the pointer in linear memory. In the
// GC variations this is an Array.
// GC variations this is an Array. In from_codepoint this is the code point.
Expression* ptr;

// Used only in linear memory variations.
Expand All @@ -1694,6 +1696,10 @@ class StringNew : public SpecificExpression<Expression::StringNewId> {
Expression* start = nullptr;
Expression* end = nullptr;

// The "try" variants will return null if an encoding error happens, rather
// than trap.
bool try_ = false;

void finalize();
};

Expand Down
20 changes: 16 additions & 4 deletions src/wasm/wasm-binary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7184,7 +7184,13 @@ bool WasmBinaryBuilder::maybeVisitStringNew(Expression*& out, uint32_t code) {
Expression* length = nullptr;
Expression* start = nullptr;
Expression* end = nullptr;
if (code == BinaryConsts::StringNewWTF8) {
bool try_ = false;
if (code == BinaryConsts::StringNewWTF8 ||
code == BinaryConsts::StringNewUTF8Try) {
if (code == BinaryConsts::StringNewUTF8Try) {
try_ = true;
}
// FIXME: the memory index should be an LEB like all other places
if (getInt8() != 0) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is existing code, but surely the memory index should be a ULEB32 rather than a byte?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, I think you're right, I'll add a TODO here. When we implement multimemory support for these we should fix that.

throwError("Unexpected nonzero memory index");
}
Expand All @@ -7209,7 +7215,11 @@ bool WasmBinaryBuilder::maybeVisitStringNew(Expression*& out, uint32_t code) {
}
op = StringNewWTF16;
length = popNonVoidExpression();
} else if (code == BinaryConsts::StringNewWTF8Array) {
} else if (code == BinaryConsts::StringNewWTF8Array ||
code == BinaryConsts::StringNewUTF8ArrayTry) {
if (code == BinaryConsts::StringNewUTF8ArrayTry) {
try_ = true;
}
auto policy = getU32LEB();
switch (policy) {
case BinaryConsts::StringPolicy::UTF8:
Expand All @@ -7230,14 +7240,16 @@ bool WasmBinaryBuilder::maybeVisitStringNew(Expression*& out, uint32_t code) {
op = StringNewWTF16Array;
end = popNonVoidExpression();
start = popNonVoidExpression();
} else if (code == BinaryConsts::StringFromCodePoint) {
op = StringNewFromCodePoint;
} else {
return false;
}
auto* ptr = popNonVoidExpression();
if (length) {
out = Builder(wasm).makeStringNew(op, ptr, length);
out = Builder(wasm).makeStringNew(op, ptr, length, try_);
} else {
out = Builder(wasm).makeStringNew(op, ptr, start, end);
out = Builder(wasm).makeStringNew(op, ptr, start, end, try_);
}
return true;
}
Expand Down
Loading