Skip to content

Commit d11db68

Browse files
MaxGraeydcodeIO
authored andcommitted
Optimize UTF8 conversion routines (#1022)
1 parent 08c0111 commit d11db68

File tree

3 files changed

+409
-425
lines changed

3 files changed

+409
-425
lines changed

Diff for: std/assembly/string.ts

+56-47
Original file line numberDiff line numberDiff line change
@@ -641,19 +641,21 @@ export namespace String {
641641
while (strOff < strEnd) {
642642
let c1 = <u32>load<u16>(strOff);
643643
if (c1 < 128) {
644-
if (nullTerminated && !c1) break;
645-
bufLen += 1; strOff += 2;
644+
// @ts-ignore: cast
645+
if (nullTerminated & !c1) break;
646+
bufLen += 1;
646647
} else if (c1 < 2048) {
647-
bufLen += 2; strOff += 2;
648+
bufLen += 2;
648649
} else {
649650
if ((c1 & 0xFC00) == 0xD800 && strOff + 2 < strEnd) {
650651
if ((<u32>load<u16>(strOff, 2) & 0xFC00) == 0xDC00) {
651-
strOff += 4; bufLen += 4;
652+
bufLen += 4; strOff += 4;
652653
continue;
653654
}
654655
}
655-
strOff += 2; bufLen += 3;
656+
bufLen += 3;
656657
}
658+
strOff += 2;
657659
}
658660
return bufLen;
659661
}
@@ -669,29 +671,34 @@ export namespace String {
669671
let c1 = <u32>load<u16>(strOff);
670672
if (c1 < 128) {
671673
store<u8>(bufOff, c1);
672-
bufOff += 1; strOff += 2;
674+
bufOff++;
673675
} else if (c1 < 2048) {
674-
store<u8>(bufOff, c1 >> 6 | 192);
675-
store<u8>(bufOff, c1 & 63 | 128, 1);
676-
bufOff += 2; strOff += 2;
676+
let b0 = c1 >> 6 | 192;
677+
let b1 = c1 & 63 | 128;
678+
store<u16>(bufOff, b1 << 8 | b0);
679+
bufOff += 2;
677680
} else {
678681
if ((c1 & 0xFC00) == 0xD800 && strOff + 2 < strEnd) {
679682
let c2 = <u32>load<u16>(strOff, 2);
680683
if ((c2 & 0xFC00) == 0xDC00) {
681684
c1 = 0x10000 + ((c1 & 0x03FF) << 10) | (c2 & 0x03FF);
682-
store<u8>(bufOff, c1 >> 18 | 240);
683-
store<u8>(bufOff, c1 >> 12 & 63 | 128, 1);
684-
store<u8>(bufOff, c1 >> 6 & 63 | 128, 2);
685-
store<u8>(bufOff, c1 & 63 | 128, 3);
686-
strOff += 4; bufOff += 4;
685+
let b0 = c1 >> 18 | 240;
686+
let b1 = c1 >> 12 & 63 | 128;
687+
let b2 = c1 >> 6 & 63 | 128;
688+
let b3 = c1 & 63 | 128;
689+
store<u32>(bufOff, b3 << 24 | b2 << 16 | b1 << 8 | b0);
690+
bufOff += 4; strOff += 4;
687691
continue;
688692
}
689693
}
690-
store<u8>(bufOff, c1 >> 12 | 224);
691-
store<u8>(bufOff, c1 >> 6 & 63 | 128, 1);
692-
store<u8>(bufOff, c1 & 63 | 128, 2);
693-
strOff += 2; bufOff += 3;
694+
let b0 = c1 >> 12 | 224;
695+
let b1 = c1 >> 6 & 63 | 128;
696+
let b2 = c1 & 63 | 128;
697+
store<u16>(bufOff, b1 << 8 | b0);
698+
store<u8>(bufOff, b2, 2);
699+
bufOff += 3;
694700
}
701+
strOff += 2;
695702
}
696703
assert(strOff <= strEnd);
697704
if (nullTerminated) {
@@ -713,36 +720,38 @@ export namespace String {
713720
var str = __alloc(len << 1, idof<String>()); // max is one u16 char per u8 byte
714721
var strOff = str;
715722
while (bufOff < bufEnd) {
716-
let cp = <u32>load<u8>(bufOff++);
717-
if (cp < 128) {
718-
if (nullTerminated && !cp) break;
719-
store<u16>(strOff, cp);
720-
strOff += 2;
721-
} else if (cp > 191 && cp < 224) {
722-
if (bufEnd - bufOff < 1) break;
723-
store<u16>(strOff, (cp & 31) << 6 | load<u8>(bufOff++) & 63);
724-
strOff += 2;
725-
} else if (cp > 239 && cp < 365) {
726-
if (bufEnd - bufOff < 3) break;
727-
cp = (
728-
(cp & 7) << 18 |
729-
(load<u8>(bufOff) & 63) << 12 |
730-
(load<u8>(bufOff, 1) & 63) << 6 |
731-
load<u8>(bufOff, 2) & 63
732-
) - 0x10000;
733-
bufOff += 3;
734-
store<u16>(strOff, 0xD800 | (cp >> 10));
735-
store<u16>(strOff, 0xDC00 | (cp & 1023), 2);
736-
strOff += 4;
723+
let u0 = <u32>load<u8>(bufOff); ++bufOff;
724+
if (!(u0 & 128)) {
725+
// @ts-ignore: cast
726+
if (nullTerminated & !u0) break;
727+
store<u16>(strOff, u0);
737728
} else {
738-
if (bufEnd - bufOff < 2) break;
739-
store<u16>(strOff,
740-
(cp & 15) << 12 |
741-
(load<u8>(bufOff) & 63) << 6 |
742-
load<u8>(bufOff, 1) & 63
743-
);
744-
bufOff += 2; strOff += 2;
729+
if (bufEnd == bufOff) break;
730+
let u1 = <u32>load<u8>(bufOff) & 63; ++bufOff;
731+
if ((u0 & 224) == 192) {
732+
store<u16>(strOff, (u0 & 31) << 6 | u1);
733+
} else {
734+
if (bufEnd == bufOff) break;
735+
let u2 = <u32>load<u8>(bufOff) & 63; ++bufOff;
736+
if ((u0 & 240) == 224) {
737+
u0 = (u0 & 15) << 12 | u1 << 6 | u2;
738+
} else {
739+
if (bufEnd == bufOff) break;
740+
u0 = (u0 & 7) << 18 | u1 << 12 | u2 << 6 | <u32>load<u8>(bufOff) & 63;
741+
++bufOff;
742+
}
743+
if (u0 < 0x10000) {
744+
store<u16>(strOff, u0);
745+
} else {
746+
u0 -= 0x10000;
747+
let lo = u0 >> 10 | 0xD800;
748+
let hi = (u0 & 0x03FF) | 0xDC00;
749+
store<u32>(strOff, lo | (hi << 16));
750+
strOff += 2;
751+
}
752+
}
745753
}
754+
strOff += 2;
746755
}
747756
return changetype<String>(__realloc(str, strOff - str)); // retains
748757
}
@@ -755,7 +764,7 @@ export namespace String {
755764
}
756765

757766
export function encode(str: string): ArrayBuffer {
758-
var size = changetype<BLOCK>(changetype<usize>(str) - BLOCK_OVERHEAD).rtSize;
767+
var size = UTF16.byteLength(str);
759768
var buf = __alloc(size, idof<ArrayBuffer>());
760769
memory.copy(buf, changetype<usize>(str), <usize>size);
761770
return changetype<ArrayBuffer>(buf); // retains

0 commit comments

Comments
 (0)