Skip to content

Commit 374c284

Browse files
committed
runtime: add async preemption support on PPC64
This CL adds support of call injection and async preemption on PPC64. For the injected call to return to the preempted PC, we have to clobber either LR or CTR. For reasons mentioned in previous CLs, we choose CTR. Previous CLs have marked code sequences that use CTR async-nonpreemtible. Change-Id: Ia642b5f06a890dd52476f45023b2a830c522eee0 Reviewed-on: https://go-review.googlesource.com/c/go/+/203824 Run-TryBot: Cherry Zhang <[email protected]> Reviewed-by: Keith Randall <[email protected]>
1 parent 7f574e4 commit 374c284

File tree

4 files changed

+219
-5
lines changed

4 files changed

+219
-5
lines changed

src/cmd/compile/internal/ssa/gen/PPC64Ops.go

+2
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,8 @@ var regNamesPPC64 = []string{
8383
"F30",
8484
"F31",
8585

86+
// If you add registers, update asyncPreempt in runtime.
87+
8688
// "CR0",
8789
// "CR1",
8890
// "CR2",

src/runtime/mkpreempt.go

+56-1
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ var arches = map[string]func(){
8282
"arm64": genARM64,
8383
"mips64x": func() { genMIPS(true) },
8484
"mipsx": func() { genMIPS(false) },
85-
"ppc64x": notImplemented,
85+
"ppc64x": genPPC64,
8686
"s390x": genS390X,
8787
"wasm": genWasm,
8888
}
@@ -417,6 +417,61 @@ func genMIPS(_64bit bool) {
417417
p("JMP (R23)")
418418
}
419419

420+
func genPPC64() {
421+
// Add integer registers R3-R29
422+
// R0 (zero), R1 (SP), R30 (g) are special and not saved here.
423+
// R2 (TOC pointer in PIC mode), R12 (function entry address in PIC mode) have been saved in sigctxt.pushCall.
424+
// R31 (REGTMP) will be saved manually.
425+
var l = layout{sp: "R1", stack: 32 + 8} // MinFrameSize on PPC64, plus one word for saving R31
426+
for i := 3; i <= 29; i++ {
427+
if i == 12 || i == 13 {
428+
// R12 has been saved in sigctxt.pushCall.
429+
// R13 is TLS pointer, not used by Go code. we must NOT
430+
// restore it, otherwise if we parked and resumed on a
431+
// different thread we'll mess up TLS addresses.
432+
continue
433+
}
434+
reg := fmt.Sprintf("R%d", i)
435+
l.add("MOVD", reg, 8)
436+
}
437+
l.addSpecial(
438+
"MOVW CR, R31\nMOVW R31, %d(R1)",
439+
"MOVW %d(R1), R31\nMOVFL R31, $0xff", // this is MOVW R31, CR
440+
8) // CR is 4-byte wide, but just keep the alignment
441+
l.addSpecial(
442+
"MOVD XER, R31\nMOVD R31, %d(R1)",
443+
"MOVD %d(R1), R31\nMOVD R31, XER",
444+
8)
445+
// Add floating point registers F0-F31.
446+
for i := 0; i <= 31; i++ {
447+
reg := fmt.Sprintf("F%d", i)
448+
l.add("FMOVD", reg, 8)
449+
}
450+
// Add floating point control/status register FPSCR.
451+
l.addSpecial(
452+
"MOVFL FPSCR, F0\nFMOVD F0, %d(R1)",
453+
"FMOVD %d(R1), F0\nMOVFL F0, FPSCR",
454+
8)
455+
456+
p("MOVD R31, -%d(R1)", l.stack-32) // save R31 first, we'll use R31 for saving LR
457+
p("MOVD LR, R31")
458+
p("MOVDU R31, -%d(R1)", l.stack) // allocate frame, save PC of interrupted instruction (in LR)
459+
460+
l.save()
461+
p("CALL ·asyncPreempt2(SB)")
462+
l.restore()
463+
464+
p("MOVD %d(R1), R31", l.stack) // sigctxt.pushCall has pushed LR, R2, R12 (at interrupt) on stack, restore them
465+
p("MOVD R31, LR")
466+
p("MOVD %d(R1), R2", l.stack+8)
467+
p("MOVD %d(R1), R12", l.stack+16)
468+
p("MOVD (R1), R31") // load PC to CTR
469+
p("MOVD R31, CTR")
470+
p("MOVD 32(R1), R31") // restore R31
471+
p("ADD $%d, R1", l.stack+32) // pop frame (including the space pushed by sigctxt.pushCall)
472+
p("JMP (CTR)")
473+
}
474+
420475
func genS390X() {
421476
// Add integer registers R0-R12
422477
// R13 (g), R14 (LR), R15 (SP) are special, and not saved here.

src/runtime/preempt_ppc64x.s

+139-2
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,142 @@
66
#include "textflag.h"
77

88
TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
9-
// Not implemented yet
10-
JMP ·abort(SB)
9+
MOVD R31, -488(R1)
10+
MOVD LR, R31
11+
MOVDU R31, -520(R1)
12+
MOVD R3, 40(R1)
13+
MOVD R4, 48(R1)
14+
MOVD R5, 56(R1)
15+
MOVD R6, 64(R1)
16+
MOVD R7, 72(R1)
17+
MOVD R8, 80(R1)
18+
MOVD R9, 88(R1)
19+
MOVD R10, 96(R1)
20+
MOVD R11, 104(R1)
21+
MOVD R14, 112(R1)
22+
MOVD R15, 120(R1)
23+
MOVD R16, 128(R1)
24+
MOVD R17, 136(R1)
25+
MOVD R18, 144(R1)
26+
MOVD R19, 152(R1)
27+
MOVD R20, 160(R1)
28+
MOVD R21, 168(R1)
29+
MOVD R22, 176(R1)
30+
MOVD R23, 184(R1)
31+
MOVD R24, 192(R1)
32+
MOVD R25, 200(R1)
33+
MOVD R26, 208(R1)
34+
MOVD R27, 216(R1)
35+
MOVD R28, 224(R1)
36+
MOVD R29, 232(R1)
37+
MOVW CR, R31
38+
MOVW R31, 240(R1)
39+
MOVD XER, R31
40+
MOVD R31, 248(R1)
41+
FMOVD F0, 256(R1)
42+
FMOVD F1, 264(R1)
43+
FMOVD F2, 272(R1)
44+
FMOVD F3, 280(R1)
45+
FMOVD F4, 288(R1)
46+
FMOVD F5, 296(R1)
47+
FMOVD F6, 304(R1)
48+
FMOVD F7, 312(R1)
49+
FMOVD F8, 320(R1)
50+
FMOVD F9, 328(R1)
51+
FMOVD F10, 336(R1)
52+
FMOVD F11, 344(R1)
53+
FMOVD F12, 352(R1)
54+
FMOVD F13, 360(R1)
55+
FMOVD F14, 368(R1)
56+
FMOVD F15, 376(R1)
57+
FMOVD F16, 384(R1)
58+
FMOVD F17, 392(R1)
59+
FMOVD F18, 400(R1)
60+
FMOVD F19, 408(R1)
61+
FMOVD F20, 416(R1)
62+
FMOVD F21, 424(R1)
63+
FMOVD F22, 432(R1)
64+
FMOVD F23, 440(R1)
65+
FMOVD F24, 448(R1)
66+
FMOVD F25, 456(R1)
67+
FMOVD F26, 464(R1)
68+
FMOVD F27, 472(R1)
69+
FMOVD F28, 480(R1)
70+
FMOVD F29, 488(R1)
71+
FMOVD F30, 496(R1)
72+
FMOVD F31, 504(R1)
73+
MOVFL FPSCR, F0
74+
FMOVD F0, 512(R1)
75+
CALL ·asyncPreempt2(SB)
76+
FMOVD 512(R1), F0
77+
MOVFL F0, FPSCR
78+
FMOVD 504(R1), F31
79+
FMOVD 496(R1), F30
80+
FMOVD 488(R1), F29
81+
FMOVD 480(R1), F28
82+
FMOVD 472(R1), F27
83+
FMOVD 464(R1), F26
84+
FMOVD 456(R1), F25
85+
FMOVD 448(R1), F24
86+
FMOVD 440(R1), F23
87+
FMOVD 432(R1), F22
88+
FMOVD 424(R1), F21
89+
FMOVD 416(R1), F20
90+
FMOVD 408(R1), F19
91+
FMOVD 400(R1), F18
92+
FMOVD 392(R1), F17
93+
FMOVD 384(R1), F16
94+
FMOVD 376(R1), F15
95+
FMOVD 368(R1), F14
96+
FMOVD 360(R1), F13
97+
FMOVD 352(R1), F12
98+
FMOVD 344(R1), F11
99+
FMOVD 336(R1), F10
100+
FMOVD 328(R1), F9
101+
FMOVD 320(R1), F8
102+
FMOVD 312(R1), F7
103+
FMOVD 304(R1), F6
104+
FMOVD 296(R1), F5
105+
FMOVD 288(R1), F4
106+
FMOVD 280(R1), F3
107+
FMOVD 272(R1), F2
108+
FMOVD 264(R1), F1
109+
FMOVD 256(R1), F0
110+
MOVD 248(R1), R31
111+
MOVD R31, XER
112+
MOVW 240(R1), R31
113+
MOVFL R31, $0xff
114+
MOVD 232(R1), R29
115+
MOVD 224(R1), R28
116+
MOVD 216(R1), R27
117+
MOVD 208(R1), R26
118+
MOVD 200(R1), R25
119+
MOVD 192(R1), R24
120+
MOVD 184(R1), R23
121+
MOVD 176(R1), R22
122+
MOVD 168(R1), R21
123+
MOVD 160(R1), R20
124+
MOVD 152(R1), R19
125+
MOVD 144(R1), R18
126+
MOVD 136(R1), R17
127+
MOVD 128(R1), R16
128+
MOVD 120(R1), R15
129+
MOVD 112(R1), R14
130+
MOVD 104(R1), R11
131+
MOVD 96(R1), R10
132+
MOVD 88(R1), R9
133+
MOVD 80(R1), R8
134+
MOVD 72(R1), R7
135+
MOVD 64(R1), R6
136+
MOVD 56(R1), R5
137+
MOVD 48(R1), R4
138+
MOVD 40(R1), R3
139+
MOVD 520(R1), R31
140+
MOVD R31, LR
141+
MOVD 528(R1), R2
142+
MOVD 536(R1), R12
143+
MOVD (R1), R31
144+
MOVD R31, CTR
145+
MOVD 32(R1), R31
146+
ADD $552, R1
147+
JMP (CTR)

src/runtime/signal_ppc64x.go

+22-2
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,28 @@ func (c *sigctxt) preparePanic(sig uint32, gp *g) {
8686
c.set_pc(uint64(funcPC(sigpanic)))
8787
}
8888

89-
const pushCallSupported = false
89+
const pushCallSupported = true
9090

9191
func (c *sigctxt) pushCall(targetPC uintptr) {
92-
throw("not implemented")
92+
// Push the LR to stack, as we'll clobber it in order to
93+
// push the call. The function being pushed is responsible
94+
// for restoring the LR and setting the SP back.
95+
// This extra space is known to gentraceback.
96+
sp := c.sp() - sys.MinFrameSize
97+
c.set_sp(sp)
98+
*(*uint64)(unsafe.Pointer(uintptr(sp))) = c.link()
99+
// In PIC mode, we'll set up (i.e. clobber) R2 on function
100+
// entry. Save it ahead of time.
101+
// In PIC mode it requires R12 points to the function entry,
102+
// so we'll set it up when pushing the call. Save it ahead
103+
// of time as well.
104+
// 8(SP) and 16(SP) are unused space in the reserved
105+
// MinFrameSize (32) bytes.
106+
*(*uint64)(unsafe.Pointer(uintptr(sp) + 8)) = c.r2()
107+
*(*uint64)(unsafe.Pointer(uintptr(sp) + 16)) = c.r12()
108+
// Set up PC and LR to pretend the function being signaled
109+
// calls targetPC at the faulting PC.
110+
c.set_link(c.pc())
111+
c.set_r12(uint64(targetPC))
112+
c.set_pc(uint64(targetPC))
93113
}

0 commit comments

Comments
 (0)