Skip to content

Commit 0e4712f

Browse files
authored
[Strings] Add experimental StringNew variants (#5459)
string.from_code_point makes a string from an int code point. string.new_utf8*_try makes a utf8 string and returns null on a UTF8 encoding error rather than trap.
1 parent 07362b3 commit 0e4712f

15 files changed

+251
-86
lines changed

scripts/gen-s-parser.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -607,10 +607,13 @@
607607
("ref.as_i31", "makeRefCast(s, Type(HeapType::i31, NonNullable))"),
608608
("extern.internalize", "makeRefAs(s, ExternInternalize)"),
609609
("extern.externalize", "makeRefAs(s, ExternExternalize)"),
610-
("string.new_wtf8", "makeStringNew(s, StringNewWTF8)"),
611-
("string.new_wtf16", "makeStringNew(s, StringNewWTF16)"),
612-
("string.new_wtf8_array", "makeStringNew(s, StringNewWTF8Array)"),
613-
("string.new_wtf16_array", "makeStringNew(s, StringNewWTF16Array)"),
610+
("string.new_wtf8", "makeStringNew(s, StringNewWTF8, false)"),
611+
("string.new_wtf16", "makeStringNew(s, StringNewWTF16, false)"),
612+
("string.new_wtf8_array", "makeStringNew(s, StringNewWTF8Array, false)"),
613+
("string.new_wtf16_array", "makeStringNew(s, StringNewWTF16Array, false)"),
614+
("string.from_code_point", "makeStringNew(s, StringNewFromCodePoint, false)"),
615+
("string.new_utf8_try", "makeStringNew(s, StringNewUTF8, true)"),
616+
("string.new_utf8_array_try", "makeStringNew(s, StringNewUTF8Array, true)"),
614617
("string.const", "makeStringConst(s)"),
615618
("string.measure_wtf8", "makeStringMeasure(s, StringMeasureWTF8)"),
616619
("string.measure_wtf16", "makeStringMeasure(s, StringMeasureWTF16)"),

src/binaryen-c.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1836,14 +1836,18 @@ BinaryenExpressionRef BinaryenStringNew(BinaryenModuleRef module,
18361836
BinaryenExpressionRef length,
18371837
BinaryenExpressionRef start,
18381838
BinaryenExpressionRef end) {
1839+
// TODO: add API support for this
1840+
bool try_ = false;
1841+
18391842
Builder builder(*(Module*)module);
18401843
return static_cast<Expression*>(
18411844
length ? builder.makeStringNew(
1842-
StringNewOp(op), (Expression*)ptr, (Expression*)length)
1845+
StringNewOp(op), (Expression*)ptr, (Expression*)length, try_)
18431846
: builder.makeStringNew(StringNewOp(op),
18441847
(Expression*)ptr,
18451848
(Expression*)start,
1846-
(Expression*)end));
1849+
(Expression*)end,
1850+
try_));
18471851
}
18481852
BinaryenExpressionRef BinaryenStringConst(BinaryenModuleRef module,
18491853
const char* name) {

src/gen-s-parser.inc

Lines changed: 86 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -3191,6 +3191,9 @@ switch (buf[0]) {
31913191
default: goto parse_error;
31923192
}
31933193
}
3194+
case 'f':
3195+
if (op == "string.from_code_point"sv) { return makeStringNew(s, StringNewFromCodePoint, false); }
3196+
goto parse_error;
31943197
case 'i':
31953198
if (op == "string.is_usv_sequence"sv) { return makeStringMeasure(s, StringMeasureIsUSV); }
31963199
goto parse_error;
@@ -3206,26 +3209,42 @@ switch (buf[0]) {
32063209
}
32073210
}
32083211
case 'n': {
3209-
switch (buf[14]) {
3210-
case '1': {
3212+
switch (buf[11]) {
3213+
case 'u': {
32113214
switch (buf[16]) {
3212-
case '\0':
3213-
if (op == "string.new_wtf16"sv) { return makeStringNew(s, StringNewWTF16); }
3215+
case 'a':
3216+
if (op == "string.new_utf8_array_try"sv) { return makeStringNew(s, StringNewUTF8Array, true); }
32143217
goto parse_error;
3215-
case '_':
3216-
if (op == "string.new_wtf16_array"sv) { return makeStringNew(s, StringNewWTF16Array); }
3218+
case 't':
3219+
if (op == "string.new_utf8_try"sv) { return makeStringNew(s, StringNewUTF8, true); }
32173220
goto parse_error;
32183221
default: goto parse_error;
32193222
}
32203223
}
3221-
case '8': {
3222-
switch (buf[15]) {
3223-
case '\0':
3224-
if (op == "string.new_wtf8"sv) { return makeStringNew(s, StringNewWTF8); }
3225-
goto parse_error;
3226-
case '_':
3227-
if (op == "string.new_wtf8_array"sv) { return makeStringNew(s, StringNewWTF8Array); }
3228-
goto parse_error;
3224+
case 'w': {
3225+
switch (buf[14]) {
3226+
case '1': {
3227+
switch (buf[16]) {
3228+
case '\0':
3229+
if (op == "string.new_wtf16"sv) { return makeStringNew(s, StringNewWTF16, false); }
3230+
goto parse_error;
3231+
case '_':
3232+
if (op == "string.new_wtf16_array"sv) { return makeStringNew(s, StringNewWTF16Array, false); }
3233+
goto parse_error;
3234+
default: goto parse_error;
3235+
}
3236+
}
3237+
case '8': {
3238+
switch (buf[15]) {
3239+
case '\0':
3240+
if (op == "string.new_wtf8"sv) { return makeStringNew(s, StringNewWTF8, false); }
3241+
goto parse_error;
3242+
case '_':
3243+
if (op == "string.new_wtf8_array"sv) { return makeStringNew(s, StringNewWTF8Array, false); }
3244+
goto parse_error;
3245+
default: goto parse_error;
3246+
}
3247+
}
32293248
default: goto parse_error;
32303249
}
32313250
}
@@ -8905,6 +8924,13 @@ switch (buf[0]) {
89058924
default: goto parse_error;
89068925
}
89078926
}
8927+
case 'f':
8928+
if (op == "string.from_code_point"sv) {
8929+
auto ret = makeStringNew(ctx, pos, StringNewFromCodePoint, false);
8930+
CHECK_ERR(ret);
8931+
return *ret;
8932+
}
8933+
goto parse_error;
89088934
case 'i':
89098935
if (op == "string.is_usv_sequence"sv) {
89108936
auto ret = makeStringMeasure(ctx, pos, StringMeasureIsUSV);
@@ -8932,42 +8958,66 @@ switch (buf[0]) {
89328958
}
89338959
}
89348960
case 'n': {
8935-
switch (buf[14]) {
8936-
case '1': {
8961+
switch (buf[11]) {
8962+
case 'u': {
89378963
switch (buf[16]) {
8938-
case '\0':
8939-
if (op == "string.new_wtf16"sv) {
8940-
auto ret = makeStringNew(ctx, pos, StringNewWTF16);
8964+
case 'a':
8965+
if (op == "string.new_utf8_array_try"sv) {
8966+
auto ret = makeStringNew(ctx, pos, StringNewUTF8Array, true);
89418967
CHECK_ERR(ret);
89428968
return *ret;
89438969
}
89448970
goto parse_error;
8945-
case '_':
8946-
if (op == "string.new_wtf16_array"sv) {
8947-
auto ret = makeStringNew(ctx, pos, StringNewWTF16Array);
8971+
case 't':
8972+
if (op == "string.new_utf8_try"sv) {
8973+
auto ret = makeStringNew(ctx, pos, StringNewUTF8, true);
89488974
CHECK_ERR(ret);
89498975
return *ret;
89508976
}
89518977
goto parse_error;
89528978
default: goto parse_error;
89538979
}
89548980
}
8955-
case '8': {
8956-
switch (buf[15]) {
8957-
case '\0':
8958-
if (op == "string.new_wtf8"sv) {
8959-
auto ret = makeStringNew(ctx, pos, StringNewWTF8);
8960-
CHECK_ERR(ret);
8961-
return *ret;
8981+
case 'w': {
8982+
switch (buf[14]) {
8983+
case '1': {
8984+
switch (buf[16]) {
8985+
case '\0':
8986+
if (op == "string.new_wtf16"sv) {
8987+
auto ret = makeStringNew(ctx, pos, StringNewWTF16, false);
8988+
CHECK_ERR(ret);
8989+
return *ret;
8990+
}
8991+
goto parse_error;
8992+
case '_':
8993+
if (op == "string.new_wtf16_array"sv) {
8994+
auto ret = makeStringNew(ctx, pos, StringNewWTF16Array, false);
8995+
CHECK_ERR(ret);
8996+
return *ret;
8997+
}
8998+
goto parse_error;
8999+
default: goto parse_error;
89629000
}
8963-
goto parse_error;
8964-
case '_':
8965-
if (op == "string.new_wtf8_array"sv) {
8966-
auto ret = makeStringNew(ctx, pos, StringNewWTF8Array);
8967-
CHECK_ERR(ret);
8968-
return *ret;
9001+
}
9002+
case '8': {
9003+
switch (buf[15]) {
9004+
case '\0':
9005+
if (op == "string.new_wtf8"sv) {
9006+
auto ret = makeStringNew(ctx, pos, StringNewWTF8, false);
9007+
CHECK_ERR(ret);
9008+
return *ret;
9009+
}
9010+
goto parse_error;
9011+
case '_':
9012+
if (op == "string.new_wtf8_array"sv) {
9013+
auto ret = makeStringNew(ctx, pos, StringNewWTF8Array, false);
9014+
CHECK_ERR(ret);
9015+
return *ret;
9016+
}
9017+
goto parse_error;
9018+
default: goto parse_error;
89699019
}
8970-
goto parse_error;
9020+
}
89719021
default: goto parse_error;
89729022
}
89739023
}

src/passes/Print.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2345,7 +2345,11 @@ struct PrintExpressionContents
23452345
void visitStringNew(StringNew* curr) {
23462346
switch (curr->op) {
23472347
case StringNewUTF8:
2348-
printMedium(o, "string.new_wtf8 utf8");
2348+
if (!curr->try_) {
2349+
printMedium(o, "string.new_wtf8 utf8");
2350+
} else {
2351+
printMedium(o, "string.new_utf8_try");
2352+
}
23492353
break;
23502354
case StringNewWTF8:
23512355
printMedium(o, "string.new_wtf8 wtf8");
@@ -2357,7 +2361,11 @@ struct PrintExpressionContents
23572361
printMedium(o, "string.new_wtf16");
23582362
break;
23592363
case StringNewUTF8Array:
2360-
printMedium(o, "string.new_wtf8_array utf8");
2364+
if (!curr->try_) {
2365+
printMedium(o, "string.new_wtf8_array utf8");
2366+
} else {
2367+
printMedium(o, "string.new_utf8_array_try");
2368+
}
23612369
break;
23622370
case StringNewWTF8Array:
23632371
printMedium(o, "string.new_wtf8_array wtf8");
@@ -2368,6 +2376,9 @@ struct PrintExpressionContents
23682376
case StringNewWTF16Array:
23692377
printMedium(o, "string.new_wtf16_array");
23702378
break;
2379+
case StringNewFromCodePoint:
2380+
printMedium(o, "string.from_code_point");
2381+
break;
23712382
default:
23722383
WASM_UNREACHABLE("invalid string.new*");
23732384
}

src/wasm-binary.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1150,6 +1150,7 @@ enum ASTNodes {
11501150
StringConcat = 0x88,
11511151
StringEq = 0x89,
11521152
StringIsUSV = 0x8a,
1153+
StringNewUTF8Try = 0x8f,
11531154
StringAsWTF8 = 0x90,
11541155
StringViewWTF8Advance = 0x91,
11551156
StringViewWTF8Slice = 0x93,
@@ -1163,10 +1164,12 @@ enum ASTNodes {
11631164
StringViewIterRewind = 0xa3,
11641165
StringViewIterSlice = 0xa4,
11651166
StringCompare = 0xa8,
1167+
StringFromCodePoint = 0xa9,
11661168
StringNewWTF8Array = 0xb0,
11671169
StringNewWTF16Array = 0xb1,
11681170
StringEncodeWTF8Array = 0xb2,
11691171
StringEncodeWTF16Array = 0xb3,
1172+
StringNewUTF8ArrayTry = 0xb8,
11701173
};
11711174

11721175
enum MemoryAccess {

src/wasm-builder.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1004,24 +1004,29 @@ class Builder {
10041004
ret->finalize();
10051005
return ret;
10061006
}
1007-
StringNew*
1008-
makeStringNew(StringNewOp op, Expression* ptr, Expression* length) {
1007+
StringNew* makeStringNew(StringNewOp op,
1008+
Expression* ptr,
1009+
Expression* length,
1010+
bool try_) {
10091011
auto* ret = wasm.allocator.alloc<StringNew>();
10101012
ret->op = op;
10111013
ret->ptr = ptr;
10121014
ret->length = length;
1015+
ret->try_ = try_;
10131016
ret->finalize();
10141017
return ret;
10151018
}
10161019
StringNew* makeStringNew(StringNewOp op,
10171020
Expression* ptr,
10181021
Expression* start,
1019-
Expression* end) {
1022+
Expression* end,
1023+
bool try_) {
10201024
auto* ret = wasm.allocator.alloc<StringNew>();
10211025
ret->op = op;
10221026
ret->ptr = ptr;
10231027
ret->start = start;
10241028
ret->end = end;
1029+
ret->try_ = try_;
10251030
ret->finalize();
10261031
return ret;
10271032
}

src/wasm-delegations-fields.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -719,6 +719,7 @@ switch (DELEGATE_ID) {
719719
case Expression::Id::StringNewId: {
720720
DELEGATE_START(StringNew);
721721
DELEGATE_FIELD_INT(StringNew, op);
722+
DELEGATE_FIELD_INT(StringNew, try_);
722723
DELEGATE_FIELD_OPTIONAL_CHILD(StringNew, end);
723724
DELEGATE_FIELD_OPTIONAL_CHILD(StringNew, start);
724725
DELEGATE_FIELD_OPTIONAL_CHILD(StringNew, length);

src/wasm-s-parser.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,7 @@ class SExpressionWasmBuilder {
304304
Expression* makeArrayCopy(Element& s);
305305
Expression* makeRefAs(Element& s, RefAsOp op);
306306
Expression* makeRefAsNonNull(Element& s);
307-
Expression* makeStringNew(Element& s, StringNewOp op);
307+
Expression* makeStringNew(Element& s, StringNewOp op, bool try_);
308308
Expression* makeStringConst(Element& s);
309309
Expression* makeStringMeasure(Element& s, StringMeasureOp op);
310310
Expression* makeStringEncode(Element& s, StringEncodeOp op);

src/wasm.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -586,6 +586,8 @@ enum StringNewOp {
586586
StringNewWTF8Array,
587587
StringNewReplaceArray,
588588
StringNewWTF16Array,
589+
// Other
590+
StringNewFromCodePoint,
589591
};
590592

591593
enum StringMeasureOp {
@@ -1684,7 +1686,7 @@ class StringNew : public SpecificExpression<Expression::StringNewId> {
16841686
StringNewOp op;
16851687

16861688
// In linear memory variations this is the pointer in linear memory. In the
1687-
// GC variations this is an Array.
1689+
// GC variations this is an Array. In from_codepoint this is the code point.
16881690
Expression* ptr;
16891691

16901692
// Used only in linear memory variations.
@@ -1694,6 +1696,10 @@ class StringNew : public SpecificExpression<Expression::StringNewId> {
16941696
Expression* start = nullptr;
16951697
Expression* end = nullptr;
16961698

1699+
// The "try" variants will return null if an encoding error happens, rather
1700+
// than trap.
1701+
bool try_ = false;
1702+
16971703
void finalize();
16981704
};
16991705

src/wasm/wasm-binary.cpp

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7184,7 +7184,13 @@ bool WasmBinaryBuilder::maybeVisitStringNew(Expression*& out, uint32_t code) {
71847184
Expression* length = nullptr;
71857185
Expression* start = nullptr;
71867186
Expression* end = nullptr;
7187-
if (code == BinaryConsts::StringNewWTF8) {
7187+
bool try_ = false;
7188+
if (code == BinaryConsts::StringNewWTF8 ||
7189+
code == BinaryConsts::StringNewUTF8Try) {
7190+
if (code == BinaryConsts::StringNewUTF8Try) {
7191+
try_ = true;
7192+
}
7193+
// FIXME: the memory index should be an LEB like all other places
71887194
if (getInt8() != 0) {
71897195
throwError("Unexpected nonzero memory index");
71907196
}
@@ -7209,7 +7215,11 @@ bool WasmBinaryBuilder::maybeVisitStringNew(Expression*& out, uint32_t code) {
72097215
}
72107216
op = StringNewWTF16;
72117217
length = popNonVoidExpression();
7212-
} else if (code == BinaryConsts::StringNewWTF8Array) {
7218+
} else if (code == BinaryConsts::StringNewWTF8Array ||
7219+
code == BinaryConsts::StringNewUTF8ArrayTry) {
7220+
if (code == BinaryConsts::StringNewUTF8ArrayTry) {
7221+
try_ = true;
7222+
}
72137223
auto policy = getU32LEB();
72147224
switch (policy) {
72157225
case BinaryConsts::StringPolicy::UTF8:
@@ -7230,14 +7240,16 @@ bool WasmBinaryBuilder::maybeVisitStringNew(Expression*& out, uint32_t code) {
72307240
op = StringNewWTF16Array;
72317241
end = popNonVoidExpression();
72327242
start = popNonVoidExpression();
7243+
} else if (code == BinaryConsts::StringFromCodePoint) {
7244+
op = StringNewFromCodePoint;
72337245
} else {
72347246
return false;
72357247
}
72367248
auto* ptr = popNonVoidExpression();
72377249
if (length) {
7238-
out = Builder(wasm).makeStringNew(op, ptr, length);
7250+
out = Builder(wasm).makeStringNew(op, ptr, length, try_);
72397251
} else {
7240-
out = Builder(wasm).makeStringNew(op, ptr, start, end);
7252+
out = Builder(wasm).makeStringNew(op, ptr, start, end, try_);
72417253
}
72427254
return true;
72437255
}

0 commit comments

Comments
 (0)