Skip to content

Commit f66581e

Browse files
committed
cmd/internal/obj/x86: use push/pop instead of mov to store/load FP
This CL changes how the x86 compiler stores and loads the frame pointer on each function prologue and epilogue, with the goal to reduce the final binary size without affecting performance. The compiler is currently using MOV instructions to load and store BP, which can take from 5 to 8 bytes each. This CL changes this approach so it emits PUSH/POP instructions instead, which always take only 1 byte each (when operating with BP). It can also avoid using the SUBQ/ADDQ to grow the stack for functions that have frame pointer but does not have local variables. On Windows, this CL reduces the go toolchain size from 15,697,920 bytes to 15,584,768 bytes, a reduction of 0.7%. Example of epilog and prologue for a function with 0x10 bytes of local variables: Before === SUBQ $0x18, SP MOVQ BP, 0x10(SP) LEAQ 0x10(SP), BP ... function body ... MOVQ 0x10(SP), BP ADDQ $0x18, SP RET === After === PUSHQ BP LEAQ 0(SP), BP SUBQ $0x10, SP ... function body ... MOVQ ADDQ $0x10, SP POPQ BP RET === Updates #6853 Change-Id: Ice9e14bbf8dff083c5f69feb97e9a764c3ca7785 Reviewed-on: https://go-review.googlesource.com/c/go/+/462300 Reviewed-by: Keith Randall <[email protected]> Reviewed-by: Cherry Mui <[email protected]> TryBot-Result: Gopher Robot <[email protected]> Reviewed-by: Keith Randall <[email protected]> Run-TryBot: Quim Muntal <[email protected]>
1 parent 0a27a45 commit f66581e

File tree

1 file changed

+33
-37
lines changed

1 file changed

+33
-37
lines changed

src/cmd/internal/obj/x86/obj6.go

Lines changed: 33 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -684,37 +684,13 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
684684
p, regg = loadG(ctxt, cursym, p, newprog)
685685
}
686686

687-
// Delve debugger would like the next instruction to be noted as the end of the function prologue.
688-
// TODO: are there other cases (e.g., wrapper functions) that need marking?
689-
markedPrologue := false
690-
691-
if autoffset != 0 {
692-
if autoffset%int32(ctxt.Arch.RegSize) != 0 {
693-
ctxt.Diag("unaligned stack size %d", autoffset)
694-
}
695-
p = obj.Appendp(p, newprog)
696-
p.As = AADJSP
697-
p.From.Type = obj.TYPE_CONST
698-
p.From.Offset = int64(autoffset)
699-
p.Spadj = autoffset
700-
p.Pos = p.Pos.WithXlogue(src.PosPrologueEnd)
701-
markedPrologue = true
702-
}
703-
704687
if bpsize > 0 {
705688
// Save caller's BP
706689
p = obj.Appendp(p, newprog)
707690

708-
p.As = AMOVQ
691+
p.As = APUSHQ
709692
p.From.Type = obj.TYPE_REG
710693
p.From.Reg = REG_BP
711-
p.To.Type = obj.TYPE_MEM
712-
p.To.Reg = REG_SP
713-
p.To.Scale = 1
714-
p.To.Offset = int64(autoffset) - int64(bpsize)
715-
if !markedPrologue {
716-
p.Pos = p.Pos.WithXlogue(src.PosPrologueEnd)
717-
}
718694

719695
// Move current frame to BP
720696
p = obj.Appendp(p, newprog)
@@ -723,11 +699,32 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
723699
p.From.Type = obj.TYPE_MEM
724700
p.From.Reg = REG_SP
725701
p.From.Scale = 1
726-
p.From.Offset = int64(autoffset) - int64(bpsize)
702+
p.From.Offset = 0
727703
p.To.Type = obj.TYPE_REG
728704
p.To.Reg = REG_BP
729705
}
730706

707+
if autoffset%int32(ctxt.Arch.RegSize) != 0 {
708+
ctxt.Diag("unaligned stack size %d", autoffset)
709+
}
710+
711+
// localoffset is autoffset discounting the frame pointer,
712+
// which has already been allocated in the stack.
713+
localoffset := autoffset - int32(bpsize)
714+
if localoffset != 0 {
715+
p = obj.Appendp(p, newprog)
716+
p.As = AADJSP
717+
p.From.Type = obj.TYPE_CONST
718+
p.From.Offset = int64(localoffset)
719+
p.Spadj = localoffset
720+
}
721+
722+
// Delve debugger would like the next instruction to be noted as the end of the function prologue.
723+
// TODO: are there other cases (e.g., wrapper functions) that need marking?
724+
if autoffset != 0 {
725+
p.Pos = p.Pos.WithXlogue(src.PosPrologueEnd)
726+
}
727+
731728
if cursym.Func().Text.From.Sym.Wrapper() {
732729
// if g._panic != nil && g._panic.argp == FP {
733730
// g._panic.argp = bottom-of-frame
@@ -933,24 +930,23 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
933930
if autoffset != 0 {
934931
to := p.To // Keep To attached to RET for retjmp below
935932
p.To = obj.Addr{}
933+
if localoffset != 0 {
934+
p.As = AADJSP
935+
p.From.Type = obj.TYPE_CONST
936+
p.From.Offset = int64(-localoffset)
937+
p.Spadj = -localoffset
938+
p = obj.Appendp(p, newprog)
939+
}
940+
936941
if bpsize > 0 {
937942
// Restore caller's BP
938-
p.As = AMOVQ
939-
940-
p.From.Type = obj.TYPE_MEM
941-
p.From.Reg = REG_SP
942-
p.From.Scale = 1
943-
p.From.Offset = int64(autoffset) - int64(bpsize)
943+
p.As = APOPQ
944944
p.To.Type = obj.TYPE_REG
945945
p.To.Reg = REG_BP
946+
p.Spadj = -int32(bpsize)
946947
p = obj.Appendp(p, newprog)
947948
}
948949

949-
p.As = AADJSP
950-
p.From.Type = obj.TYPE_CONST
951-
p.From.Offset = int64(-autoffset)
952-
p.Spadj = -autoffset
953-
p = obj.Appendp(p, newprog)
954950
p.As = obj.ARET
955951
p.To = to
956952

0 commit comments

Comments
 (0)