Skip to content

Commit 14f929a

Browse files
committed
runtime/internal/atomic: improve ARM atomics
This is a follow-up of CL 93637. There, when we redirect sync/atomic to runtime/internal/atomic, a few good implementations of ARM atomics were lost. This CL brings most of them back, with some improvements. - Change atomic Store to a plain store with memory barrier, as we already changed atomic Load to plain load with memory barrier. - Use native 64-bit atomics on ARMv7, jump to Go implementations on older machines. But drop the kernel helper. In particular, for Load64, just do loads, not using Cas on the address being load from, so it works also for read-only memory (since we have already fixed 32-bit Load). Change-Id: I725cd65cf945ae5200db81a35be3f251c9f7af14 Reviewed-on: https://go-review.googlesource.com/111315 Run-TryBot: Cherry Zhang <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Austin Clements <[email protected]>
1 parent 150b728 commit 14f929a

File tree

4 files changed

+240
-37
lines changed

4 files changed

+240
-37
lines changed

src/runtime/internal/atomic/asm_arm.s

+170-4
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ casl:
2727
CMP R0, R2
2828
BNE casfail
2929

30-
MOVB runtime·goarm(SB), R11
31-
CMP $7, R11
30+
MOVB runtime·goarm(SB), R8
31+
CMP $7, R8
3232
BLT 2(PC)
3333
DMB MB_ISHST
3434

@@ -37,8 +37,7 @@ casl:
3737
BNE casl
3838
MOVW $1, R0
3939

40-
MOVB runtime·goarm(SB), R11
41-
CMP $7, R11
40+
CMP $7, R8
4241
BLT 2(PC)
4342
DMB MB_ISH
4443

@@ -49,12 +48,17 @@ casfail:
4948
MOVB R0, ret+12(FP)
5049
RET
5150

51+
// stubs
52+
5253
TEXT runtime∕internal∕atomic·Loadp(SB),NOSPLIT|NOFRAME,$0-8
5354
B runtime∕internal∕atomic·Load(SB)
5455

5556
TEXT runtime∕internal∕atomic·Casuintptr(SB),NOSPLIT,$0-13
5657
B runtime∕internal∕atomic·Cas(SB)
5758

59+
TEXT runtime∕internal∕atomic·Casp1(SB),NOSPLIT,$0-13
60+
B runtime∕internal∕atomic·Cas(SB)
61+
5862
TEXT runtime∕internal∕atomic·Loaduintptr(SB),NOSPLIT,$0-8
5963
B runtime∕internal∕atomic·Load(SB)
6064

@@ -64,6 +68,9 @@ TEXT runtime∕internal∕atomic·Loaduint(SB),NOSPLIT,$0-8
6468
TEXT runtime∕internal∕atomic·Storeuintptr(SB),NOSPLIT,$0-8
6569
B runtime∕internal∕atomic·Store(SB)
6670

71+
TEXT runtime∕internal∕atomic·StorepNoWB(SB),NOSPLIT,$0-8
72+
B runtime∕internal∕atomic·Store(SB)
73+
6774
TEXT runtime∕internal∕atomic·Xadduintptr(SB),NOSPLIT,$0-12
6875
B runtime∕internal∕atomic·Xadd(SB)
6976

@@ -72,3 +79,162 @@ TEXT runtime∕internal∕atomic·Loadint64(SB),NOSPLIT,$0-12
7279

7380
TEXT runtime∕internal∕atomic·Xaddint64(SB),NOSPLIT,$0-20
7481
B runtime∕internal∕atomic·Xadd64(SB)
82+
83+
// 64-bit atomics
84+
// The native ARM implementations use LDREXD/STREXD, which are
85+
// available on ARMv6k or later. We use them only on ARMv7.
86+
// On older ARM, we use Go implementations which simulate 64-bit
87+
// atomics with locks.
88+
89+
TEXT armCas64<>(SB),NOSPLIT,$0-21
90+
MOVW addr+0(FP), R1
91+
// make unaligned atomic access panic
92+
AND.S $7, R1, R2
93+
BEQ 2(PC)
94+
MOVW R2, (R2) // crash. AND.S above left only low 3 bits in R2.
95+
MOVW old_lo+4(FP), R2
96+
MOVW old_hi+8(FP), R3
97+
MOVW new_lo+12(FP), R4
98+
MOVW new_hi+16(FP), R5
99+
cas64loop:
100+
LDREXD (R1), R6 // loads R6 and R7
101+
CMP R2, R6
102+
BNE cas64fail
103+
CMP R3, R7
104+
BNE cas64fail
105+
106+
DMB MB_ISHST
107+
108+
STREXD R4, (R1), R0 // stores R4 and R5
109+
CMP $0, R0
110+
BNE cas64loop
111+
MOVW $1, R0
112+
113+
DMB MB_ISH
114+
115+
MOVBU R0, swapped+20(FP)
116+
RET
117+
cas64fail:
118+
MOVW $0, R0
119+
MOVBU R0, swapped+20(FP)
120+
RET
121+
122+
TEXT armXadd64<>(SB),NOSPLIT,$0-20
123+
MOVW addr+0(FP), R1
124+
// make unaligned atomic access panic
125+
AND.S $7, R1, R2
126+
BEQ 2(PC)
127+
MOVW R2, (R2) // crash. AND.S above left only low 3 bits in R2.
128+
MOVW delta_lo+4(FP), R2
129+
MOVW delta_hi+8(FP), R3
130+
131+
add64loop:
132+
LDREXD (R1), R4 // loads R4 and R5
133+
ADD.S R2, R4
134+
ADC R3, R5
135+
136+
DMB MB_ISHST
137+
138+
STREXD R4, (R1), R0 // stores R4 and R5
139+
CMP $0, R0
140+
BNE add64loop
141+
142+
DMB MB_ISH
143+
144+
MOVW R4, new_lo+12(FP)
145+
MOVW R5, new_hi+16(FP)
146+
RET
147+
148+
TEXT armXchg64<>(SB),NOSPLIT,$0-20
149+
MOVW addr+0(FP), R1
150+
// make unaligned atomic access panic
151+
AND.S $7, R1, R2
152+
BEQ 2(PC)
153+
MOVW R2, (R2) // crash. AND.S above left only low 3 bits in R2.
154+
MOVW new_lo+4(FP), R2
155+
MOVW new_hi+8(FP), R3
156+
157+
swap64loop:
158+
LDREXD (R1), R4 // loads R4 and R5
159+
160+
DMB MB_ISHST
161+
162+
STREXD R2, (R1), R0 // stores R2 and R3
163+
CMP $0, R0
164+
BNE swap64loop
165+
166+
DMB MB_ISH
167+
168+
MOVW R4, old_lo+12(FP)
169+
MOVW R5, old_hi+16(FP)
170+
RET
171+
172+
TEXT armLoad64<>(SB),NOSPLIT,$0-12
173+
MOVW addr+0(FP), R1
174+
// make unaligned atomic access panic
175+
AND.S $7, R1, R2
176+
BEQ 2(PC)
177+
MOVW R2, (R2) // crash. AND.S above left only low 3 bits in R2.
178+
179+
LDREXD (R1), R2 // loads R2 and R3
180+
DMB MB_ISH
181+
182+
MOVW R2, val_lo+4(FP)
183+
MOVW R3, val_hi+8(FP)
184+
RET
185+
186+
TEXT armStore64<>(SB),NOSPLIT,$0-12
187+
MOVW addr+0(FP), R1
188+
// make unaligned atomic access panic
189+
AND.S $7, R1, R2
190+
BEQ 2(PC)
191+
MOVW R2, (R2) // crash. AND.S above left only low 3 bits in R2.
192+
MOVW val_lo+4(FP), R2
193+
MOVW val_hi+8(FP), R3
194+
195+
store64loop:
196+
LDREXD (R1), R4 // loads R4 and R5
197+
198+
DMB MB_ISHST
199+
200+
STREXD R2, (R1), R0 // stores R2 and R3
201+
CMP $0, R0
202+
BNE store64loop
203+
204+
DMB MB_ISH
205+
RET
206+
207+
TEXT ·Cas64(SB),NOSPLIT,$0-21
208+
MOVB runtime·goarm(SB), R11
209+
CMP $7, R11
210+
BLT 2(PC)
211+
JMP armCas64<>(SB)
212+
JMP ·goCas64(SB)
213+
214+
TEXT ·Xadd64(SB),NOSPLIT,$0-20
215+
MOVB runtime·goarm(SB), R11
216+
CMP $7, R11
217+
BLT 2(PC)
218+
JMP armXadd64<>(SB)
219+
JMP ·goXadd64(SB)
220+
221+
TEXT ·Xchg64(SB),NOSPLIT,$0-20
222+
MOVB runtime·goarm(SB), R11
223+
CMP $7, R11
224+
BLT 2(PC)
225+
JMP armXchg64<>(SB)
226+
JMP ·goXchg64(SB)
227+
228+
TEXT ·Load64(SB),NOSPLIT,$0-12
229+
MOVB runtime·goarm(SB), R11
230+
CMP $7, R11
231+
BLT 2(PC)
232+
JMP armLoad64<>(SB)
233+
JMP ·goLoad64(SB)
234+
235+
TEXT ·Store64(SB),NOSPLIT,$0-12
236+
MOVB runtime·goarm(SB), R11
237+
CMP $7, R11
238+
BLT 2(PC)
239+
JMP armStore64<>(SB)
240+
JMP ·goStore64(SB)

src/runtime/internal/atomic/atomic_arm.go

+24-23
Original file line numberDiff line numberDiff line change
@@ -68,28 +68,14 @@ func Xchguintptr(addr *uintptr, v uintptr) uintptr {
6868
return uintptr(Xchg((*uint32)(unsafe.Pointer(addr)), uint32(v)))
6969
}
7070

71-
//go:nosplit
72-
func StorepNoWB(addr unsafe.Pointer, v unsafe.Pointer) {
73-
for {
74-
old := *(*unsafe.Pointer)(addr)
75-
if Casp1((*unsafe.Pointer)(addr), old, v) {
76-
return
77-
}
78-
}
79-
}
71+
// Not noescape -- it installs a pointer to addr.
72+
func StorepNoWB(addr unsafe.Pointer, v unsafe.Pointer)
8073

81-
//go:nosplit
82-
func Store(addr *uint32, v uint32) {
83-
for {
84-
old := *addr
85-
if Cas(addr, old, v) {
86-
return
87-
}
88-
}
89-
}
74+
//go:noescape
75+
func Store(addr *uint32, v uint32)
9076

9177
//go:nosplit
92-
func Cas64(addr *uint64, old, new uint64) bool {
78+
func goCas64(addr *uint64, old, new uint64) bool {
9379
if uintptr(unsafe.Pointer(addr))&7 != 0 {
9480
*(*int)(nil) = 0 // crash on unaligned uint64
9581
}
@@ -105,7 +91,7 @@ func Cas64(addr *uint64, old, new uint64) bool {
10591
}
10692

10793
//go:nosplit
108-
func Xadd64(addr *uint64, delta int64) uint64 {
94+
func goXadd64(addr *uint64, delta int64) uint64 {
10995
if uintptr(unsafe.Pointer(addr))&7 != 0 {
11096
*(*int)(nil) = 0 // crash on unaligned uint64
11197
}
@@ -119,7 +105,7 @@ func Xadd64(addr *uint64, delta int64) uint64 {
119105
}
120106

121107
//go:nosplit
122-
func Xchg64(addr *uint64, v uint64) uint64 {
108+
func goXchg64(addr *uint64, v uint64) uint64 {
123109
if uintptr(unsafe.Pointer(addr))&7 != 0 {
124110
*(*int)(nil) = 0 // crash on unaligned uint64
125111
}
@@ -133,7 +119,7 @@ func Xchg64(addr *uint64, v uint64) uint64 {
133119
}
134120

135121
//go:nosplit
136-
func Load64(addr *uint64) uint64 {
122+
func goLoad64(addr *uint64) uint64 {
137123
if uintptr(unsafe.Pointer(addr))&7 != 0 {
138124
*(*int)(nil) = 0 // crash on unaligned uint64
139125
}
@@ -146,7 +132,7 @@ func Load64(addr *uint64) uint64 {
146132
}
147133

148134
//go:nosplit
149-
func Store64(addr *uint64, v uint64) {
135+
func goStore64(addr *uint64, v uint64) {
150136
if uintptr(unsafe.Pointer(addr))&7 != 0 {
151137
*(*int)(nil) = 0 // crash on unaligned uint64
152138
}
@@ -194,3 +180,18 @@ func Load(addr *uint32) uint32
194180

195181
//go:noescape
196182
func Loadp(addr unsafe.Pointer) unsafe.Pointer
183+
184+
//go:noescape
185+
func Cas64(addr *uint64, old, new uint64) bool
186+
187+
//go:noescape
188+
func Xadd64(addr *uint64, delta int64) uint64
189+
190+
//go:noescape
191+
func Xchg64(addr *uint64, v uint64) uint64
192+
193+
//go:noescape
194+
func Load64(addr *uint64) uint64
195+
196+
//go:noescape
197+
func Store64(addr *uint64, v uint64)

src/runtime/internal/atomic/sys_linux_arm.s

+26-7
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,6 @@ check:
5555
MOVB R0, ret+12(FP)
5656
RET
5757

58-
TEXT runtime∕internal∕atomic·Casp1(SB),NOSPLIT,$0
59-
B runtime∕internal∕atomic·Cas(SB)
60-
6158
// As for cas, memory barriers are complicated on ARM, but the kernel
6259
// provides a user helper. ARMv5 does not support SMP and has no
6360
// memory barrier instruction at all. ARMv6 added SMP support and has
@@ -70,18 +67,40 @@ TEXT runtime∕internal∕atomic·Casp1(SB),NOSPLIT,$0
7067
TEXT memory_barrier<>(SB),NOSPLIT|NOFRAME,$0
7168
MOVW $0xffff0fa0, R15 // R15 is hardware PC.
7269

73-
TEXT runtime∕internal∕atomic·Load(SB),NOSPLIT,$0-8
70+
TEXT ·Load(SB),NOSPLIT,$0-8
7471
MOVW addr+0(FP), R0
7572
MOVW (R0), R1
7673

7774
MOVB runtime·goarm(SB), R11
7875
CMP $7, R11
7976
BGE native_barrier
8077
BL memory_barrier<>(SB)
81-
B prolog
78+
B end
8279
native_barrier:
8380
DMB MB_ISH
84-
85-
prolog:
81+
end:
8682
MOVW R1, ret+4(FP)
8783
RET
84+
85+
TEXT ·Store(SB),NOSPLIT,$0-8
86+
MOVW addr+0(FP), R1
87+
MOVW v+4(FP), R2
88+
89+
MOVB runtime·goarm(SB), R8
90+
CMP $7, R8
91+
BGE native_barrier
92+
BL memory_barrier<>(SB)
93+
B store
94+
native_barrier:
95+
DMB MB_ISH
96+
97+
store:
98+
MOVW R2, (R1)
99+
100+
CMP $7, R8
101+
BGE native_barrier2
102+
BL memory_barrier<>(SB)
103+
RET
104+
native_barrier2:
105+
DMB MB_ISH
106+
RET

src/runtime/internal/atomic/sys_nonlinux_arm.s

+20-3
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,11 @@
1717
TEXT ·Cas(SB),NOSPLIT,$0
1818
JMP ·armcas(SB)
1919

20-
TEXT ·Casp1(SB),NOSPLIT,$0
21-
JMP ·Cas(SB)
20+
// Non-linux OSes support only single processor machines before ARMv7.
21+
// So we don't need memory barriers if goarm < 7. And we fail loud at
22+
// startup (runtime.checkgoarm) if it is a multi-processor but goarm < 7.
2223

23-
TEXT runtime∕internal∕atomic·Load(SB),NOSPLIT|NOFRAME,$0-8
24+
TEXT ·Load(SB),NOSPLIT|NOFRAME,$0-8
2425
MOVW addr+0(FP), R0
2526
MOVW (R0), R1
2627

@@ -31,3 +32,19 @@ TEXT runtime∕internal∕atomic·Load(SB),NOSPLIT|NOFRAME,$0-8
3132

3233
MOVW R1, ret+4(FP)
3334
RET
35+
36+
TEXT ·Store(SB),NOSPLIT,$0-8
37+
MOVW addr+0(FP), R1
38+
MOVW v+4(FP), R2
39+
40+
MOVB runtime·goarm(SB), R8
41+
CMP $7, R8
42+
BLT 2(PC)
43+
DMB MB_ISH
44+
45+
MOVW R2, (R1)
46+
47+
CMP $7, R8
48+
BLT 2(PC)
49+
DMB MB_ISH
50+
RET

0 commit comments

Comments
 (0)