Skip to content

Commit c972cb6

Browse files
committed
Merge remote-tracking branch 'origin/master' into release
2 parents a77cbe1 + 395df77 commit c972cb6

File tree

1 file changed

+151
-154
lines changed

1 file changed

+151
-154
lines changed

std/assembly/util/memory.ts

+151-154
Original file line numberDiff line numberDiff line change
@@ -1,150 +1,143 @@
11
export function memcpy(dest: usize, src: usize, n: usize): void { // see: musl/src/string/memcpy.c
2-
if (ASC_SHRINK_LEVEL > 1) {
3-
while (n) {
4-
store<u8>(dest++, load<u8>(src++));
5-
--n;
6-
}
7-
} else {
8-
let w: u32, x: u32;
2+
var w: u32, x: u32;
93

10-
// copy 1 byte each until src is aligned to 4 bytes
11-
while (n && (src & 3)) {
4+
// copy 1 byte each until src is aligned to 4 bytes
5+
while (n && (src & 3)) {
6+
store<u8>(dest++, load<u8>(src++));
7+
n--;
8+
}
9+
10+
// if dst is aligned to 4 bytes as well, copy 4 bytes each
11+
if ((dest & 3) == 0) {
12+
while (n >= 16) {
13+
store<u32>(dest , load<u32>(src ));
14+
store<u32>(dest + 4, load<u32>(src + 4));
15+
store<u32>(dest + 8, load<u32>(src + 8));
16+
store<u32>(dest + 12, load<u32>(src + 12));
17+
src += 16; dest += 16; n -= 16;
18+
}
19+
if (n & 8) {
20+
store<u32>(dest , load<u32>(src ));
21+
store<u32>(dest + 4, load<u32>(src + 4));
22+
dest += 8; src += 8;
23+
}
24+
if (n & 4) {
25+
store<u32>(dest, load<u32>(src));
26+
dest += 4; src += 4;
27+
}
28+
if (n & 2) { // drop to 2 bytes each
29+
store<u16>(dest, load<u16>(src));
30+
dest += 2; src += 2;
31+
}
32+
if (n & 1) { // drop to 1 byte
1233
store<u8>(dest++, load<u8>(src++));
13-
n--;
1434
}
35+
return;
36+
}
1537

16-
// if dst is aligned to 4 bytes as well, copy 4 bytes each
17-
if ((dest & 3) == 0) {
18-
while (n >= 16) {
19-
store<u32>(dest , load<u32>(src ));
20-
store<u32>(dest + 4, load<u32>(src + 4));
21-
store<u32>(dest + 8, load<u32>(src + 8));
22-
store<u32>(dest + 12, load<u32>(src + 12));
23-
src += 16; dest += 16; n -= 16;
24-
}
25-
if (n & 8) {
26-
store<u32>(dest , load<u32>(src ));
27-
store<u32>(dest + 4, load<u32>(src + 4));
28-
dest += 8; src += 8;
29-
}
30-
if (n & 4) {
31-
store<u32>(dest, load<u32>(src));
32-
dest += 4; src += 4;
33-
}
34-
if (n & 2) { // drop to 2 bytes each
35-
store<u16>(dest, load<u16>(src));
36-
dest += 2; src += 2;
37-
}
38-
if (n & 1) { // drop to 1 byte
38+
// if dst is not aligned to 4 bytes, use alternating shifts to copy 4 bytes each
39+
// doing shifts if faster when copying enough bytes (here: 32 or more)
40+
if (n >= 32) {
41+
switch (dest & 3) {
42+
// known to be != 0
43+
case 1: {
44+
w = load<u32>(src);
3945
store<u8>(dest++, load<u8>(src++));
40-
}
41-
return;
42-
}
43-
44-
// if dst is not aligned to 4 bytes, use alternating shifts to copy 4 bytes each
45-
// doing shifts if faster when copying enough bytes (here: 32 or more)
46-
if (n >= 32) {
47-
switch (dest & 3) {
48-
// known to be != 0
49-
case 1: {
50-
w = load<u32>(src);
51-
store<u8>(dest++, load<u8>(src++));
52-
store<u8>(dest++, load<u8>(src++));
53-
store<u8>(dest++, load<u8>(src++));
54-
n -= 3;
55-
while (n >= 17) {
56-
x = load<u32>(src + 1);
57-
store<u32>(dest, w >> 24 | x << 8);
58-
w = load<u32>(src + 5);
59-
store<u32>(dest + 4, x >> 24 | w << 8);
60-
x = load<u32>(src + 9);
61-
store<u32>(dest + 8, w >> 24 | x << 8);
62-
w = load<u32>(src + 13);
63-
store<u32>(dest + 12, x >> 24 | w << 8);
64-
src += 16; dest += 16; n -= 16;
65-
}
66-
break;
46+
store<u8>(dest++, load<u8>(src++));
47+
store<u8>(dest++, load<u8>(src++));
48+
n -= 3;
49+
while (n >= 17) {
50+
x = load<u32>(src + 1);
51+
store<u32>(dest, w >> 24 | x << 8);
52+
w = load<u32>(src + 5);
53+
store<u32>(dest + 4, x >> 24 | w << 8);
54+
x = load<u32>(src + 9);
55+
store<u32>(dest + 8, w >> 24 | x << 8);
56+
w = load<u32>(src + 13);
57+
store<u32>(dest + 12, x >> 24 | w << 8);
58+
src += 16; dest += 16; n -= 16;
6759
}
68-
case 2: {
69-
w = load<u32>(src);
70-
store<u8>(dest++, load<u8>(src++));
71-
store<u8>(dest++, load<u8>(src++));
72-
n -= 2;
73-
while (n >= 18) {
74-
x = load<u32>(src + 2);
75-
store<u32>(dest, w >> 16 | x << 16);
76-
w = load<u32>(src + 6);
77-
store<u32>(dest + 4, x >> 16 | w << 16);
78-
x = load<u32>(src + 10);
79-
store<u32>(dest + 8, w >> 16 | x << 16);
80-
w = load<u32>(src + 14);
81-
store<u32>(dest + 12, x >> 16 | w << 16);
82-
src += 16; dest += 16; n -= 16;
83-
}
84-
break;
60+
break;
61+
}
62+
case 2: {
63+
w = load<u32>(src);
64+
store<u8>(dest++, load<u8>(src++));
65+
store<u8>(dest++, load<u8>(src++));
66+
n -= 2;
67+
while (n >= 18) {
68+
x = load<u32>(src + 2);
69+
store<u32>(dest, w >> 16 | x << 16);
70+
w = load<u32>(src + 6);
71+
store<u32>(dest + 4, x >> 16 | w << 16);
72+
x = load<u32>(src + 10);
73+
store<u32>(dest + 8, w >> 16 | x << 16);
74+
w = load<u32>(src + 14);
75+
store<u32>(dest + 12, x >> 16 | w << 16);
76+
src += 16; dest += 16; n -= 16;
8577
}
86-
case 3: {
87-
w = load<u32>(src);
88-
store<u8>(dest++, load<u8>(src++));
89-
n -= 1;
90-
while (n >= 19) {
91-
x = load<u32>(src + 3);
92-
store<u32>(dest, w >> 8 | x << 24);
93-
w = load<u32>(src + 7);
94-
store<u32>(dest + 4, x >> 8 | w << 24);
95-
x = load<u32>(src + 11);
96-
store<u32>(dest + 8, w >> 8 | x << 24);
97-
w = load<u32>(src + 15);
98-
store<u32>(dest + 12, x >> 8 | w << 24);
99-
src += 16; dest += 16; n -= 16;
100-
}
101-
break;
78+
break;
79+
}
80+
case 3: {
81+
w = load<u32>(src);
82+
store<u8>(dest++, load<u8>(src++));
83+
n -= 1;
84+
while (n >= 19) {
85+
x = load<u32>(src + 3);
86+
store<u32>(dest, w >> 8 | x << 24);
87+
w = load<u32>(src + 7);
88+
store<u32>(dest + 4, x >> 8 | w << 24);
89+
x = load<u32>(src + 11);
90+
store<u32>(dest + 8, w >> 8 | x << 24);
91+
w = load<u32>(src + 15);
92+
store<u32>(dest + 12, x >> 8 | w << 24);
93+
src += 16; dest += 16; n -= 16;
10294
}
95+
break;
10396
}
10497
}
98+
}
10599

106-
// copy remaining bytes one by one
107-
if (n & 16) {
108-
store<u8>(dest++, load<u8>(src++));
109-
store<u8>(dest++, load<u8>(src++));
110-
store<u8>(dest++, load<u8>(src++));
111-
store<u8>(dest++, load<u8>(src++));
112-
store<u8>(dest++, load<u8>(src++));
113-
store<u8>(dest++, load<u8>(src++));
114-
store<u8>(dest++, load<u8>(src++));
115-
store<u8>(dest++, load<u8>(src++));
116-
store<u8>(dest++, load<u8>(src++));
117-
store<u8>(dest++, load<u8>(src++));
118-
store<u8>(dest++, load<u8>(src++));
119-
store<u8>(dest++, load<u8>(src++));
120-
store<u8>(dest++, load<u8>(src++));
121-
store<u8>(dest++, load<u8>(src++));
122-
store<u8>(dest++, load<u8>(src++));
123-
store<u8>(dest++, load<u8>(src++));
124-
}
125-
if (n & 8) {
126-
store<u8>(dest++, load<u8>(src++));
127-
store<u8>(dest++, load<u8>(src++));
128-
store<u8>(dest++, load<u8>(src++));
129-
store<u8>(dest++, load<u8>(src++));
130-
store<u8>(dest++, load<u8>(src++));
131-
store<u8>(dest++, load<u8>(src++));
132-
store<u8>(dest++, load<u8>(src++));
133-
store<u8>(dest++, load<u8>(src++));
134-
}
135-
if (n & 4) {
136-
store<u8>(dest++, load<u8>(src++));
137-
store<u8>(dest++, load<u8>(src++));
138-
store<u8>(dest++, load<u8>(src++));
139-
store<u8>(dest++, load<u8>(src++));
140-
}
141-
if (n & 2) {
142-
store<u8>(dest++, load<u8>(src++));
143-
store<u8>(dest++, load<u8>(src++));
144-
}
145-
if (n & 1) {
146-
store<u8>(dest++, load<u8>(src++));
147-
}
100+
// copy remaining bytes one by one
101+
if (n & 16) {
102+
store<u8>(dest++, load<u8>(src++));
103+
store<u8>(dest++, load<u8>(src++));
104+
store<u8>(dest++, load<u8>(src++));
105+
store<u8>(dest++, load<u8>(src++));
106+
store<u8>(dest++, load<u8>(src++));
107+
store<u8>(dest++, load<u8>(src++));
108+
store<u8>(dest++, load<u8>(src++));
109+
store<u8>(dest++, load<u8>(src++));
110+
store<u8>(dest++, load<u8>(src++));
111+
store<u8>(dest++, load<u8>(src++));
112+
store<u8>(dest++, load<u8>(src++));
113+
store<u8>(dest++, load<u8>(src++));
114+
store<u8>(dest++, load<u8>(src++));
115+
store<u8>(dest++, load<u8>(src++));
116+
store<u8>(dest++, load<u8>(src++));
117+
store<u8>(dest++, load<u8>(src++));
118+
}
119+
if (n & 8) {
120+
store<u8>(dest++, load<u8>(src++));
121+
store<u8>(dest++, load<u8>(src++));
122+
store<u8>(dest++, load<u8>(src++));
123+
store<u8>(dest++, load<u8>(src++));
124+
store<u8>(dest++, load<u8>(src++));
125+
store<u8>(dest++, load<u8>(src++));
126+
store<u8>(dest++, load<u8>(src++));
127+
store<u8>(dest++, load<u8>(src++));
128+
}
129+
if (n & 4) {
130+
store<u8>(dest++, load<u8>(src++));
131+
store<u8>(dest++, load<u8>(src++));
132+
store<u8>(dest++, load<u8>(src++));
133+
store<u8>(dest++, load<u8>(src++));
134+
}
135+
if (n & 2) {
136+
store<u8>(dest++, load<u8>(src++));
137+
store<u8>(dest++, load<u8>(src++));
138+
}
139+
if (n & 1) {
140+
store<u8>(dest++, load<u8>(src++));
148141
}
149142
}
150143

@@ -159,32 +152,36 @@ export function memmove(dest: usize, src: usize, n: usize): void { // see: musl/
159152
}
160153
}
161154
if (dest < src) {
162-
if ((src & 7) == (dest & 7)) {
163-
while (dest & 7) {
164-
if (!n) return;
165-
--n;
166-
store<u8>(dest++, load<u8>(src++));
167-
}
168-
while (n >= 8) {
169-
store<u64>(dest, load<u64>(src));
170-
n -= 8;
171-
dest += 8;
172-
src += 8;
155+
if (ASC_SHRINK_LEVEL < 2) {
156+
if ((src & 7) == (dest & 7)) {
157+
while (dest & 7) {
158+
if (!n) return;
159+
--n;
160+
store<u8>(dest++, load<u8>(src++));
161+
}
162+
while (n >= 8) {
163+
store<u64>(dest, load<u64>(src));
164+
n -= 8;
165+
dest += 8;
166+
src += 8;
167+
}
173168
}
174169
}
175170
while (n) {
176171
store<u8>(dest++, load<u8>(src++));
177172
--n;
178173
}
179174
} else {
180-
if ((src & 7) == (dest & 7)) {
181-
while ((dest + n) & 7) {
182-
if (!n) return;
183-
store<u8>(dest + --n, load<u8>(src + n));
184-
}
185-
while (n >= 8) {
186-
n -= 8;
187-
store<u64>(dest + n, load<u64>(src + n));
175+
if (ASC_SHRINK_LEVEL < 2) {
176+
if ((src & 7) == (dest & 7)) {
177+
while ((dest + n) & 7) {
178+
if (!n) return;
179+
store<u8>(dest + --n, load<u8>(src + n));
180+
}
181+
while (n >= 8) {
182+
n -= 8;
183+
store<u64>(dest + n, load<u64>(src + n));
184+
}
188185
}
189186
}
190187
while (n) {

0 commit comments

Comments
 (0)