Skip to content

Commit 83c2f91

Browse files
committed
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (39 commits) perf tools: Fix compile error on x86_64 Ubuntu perf report: Fix --stdio output alignment when --showcpuutilization used perf annotate: Get rid of field_sep check perf annotate: Fix usage string perf kmem: Fix a memory leak perf kmem: Add missing closedir() calls perf top: Add error message for EMFILE perf test: Change type of '-v' option to INCR perf script: Add missing closedir() calls tracing: Fix compile error when static ftrace is enabled recordmcount: Fix handling of elf64 big-endian objects. perf tools: Add const.h to MANIFEST to make perf-tar-src-pkg work again perf tools: Add support for guest/host-only profiling perf kvm: Do guest-only counting by default perf top: Don't update total_period on process_sample perf hists: Stop using 'self' for struct hist_entry perf hists: Rename total_session to total_period x86: Add counter when debug stack is used with interrupts enabled x86: Allow NMIs to hit breakpoints in i386 x86: Keep current stack in NMI breakpoints ...
2 parents f0ed5b9 + 172d1b0 commit 83c2f91

29 files changed

+1262
-468
lines changed

Documentation/kernel-parameters.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2475,6 +2475,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
24752475
stacktrace [FTRACE]
24762476
Enabled the stack tracer on boot up.
24772477

2478+
stacktrace_filter=[function-list]
2479+
[FTRACE] Limit the functions that the stack tracer
2480+
will trace at boot up. function-list is a comma separated
2481+
list of functions. This list can be changed at run
2482+
time by the stack_trace_filter file in the debugfs
2483+
tracing directory. Note, this enables stack tracing
2484+
and the stacktrace above is not needed.
2485+
24782486
sti= [PARISC,HW]
24792487
Format: <num>
24802488
Set the STI (builtin display/keyboard on the HP-PARISC

arch/x86/include/asm/debugreg.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,28 @@ extern void aout_dump_debugregs(struct user *dump);
101101

102102
extern void hw_breakpoint_restore(void);
103103

104+
#ifdef CONFIG_X86_64
105+
DECLARE_PER_CPU(int, debug_stack_usage);
106+
static inline void debug_stack_usage_inc(void)
107+
{
108+
__get_cpu_var(debug_stack_usage)++;
109+
}
110+
static inline void debug_stack_usage_dec(void)
111+
{
112+
__get_cpu_var(debug_stack_usage)--;
113+
}
114+
int is_debug_stack(unsigned long addr);
115+
void debug_stack_set_zero(void);
116+
void debug_stack_reset(void);
117+
#else /* !X86_64 */
118+
static inline int is_debug_stack(unsigned long addr) { return 0; }
119+
static inline void debug_stack_set_zero(void) { }
120+
static inline void debug_stack_reset(void) { }
121+
static inline void debug_stack_usage_inc(void) { }
122+
static inline void debug_stack_usage_dec(void) { }
123+
#endif /* X86_64 */
124+
125+
104126
#endif /* __KERNEL__ */
105127

106128
#endif /* _ASM_X86_DEBUGREG_H */

arch/x86/include/asm/desc.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
3535

3636
extern struct desc_ptr idt_descr;
3737
extern gate_desc idt_table[];
38+
extern struct desc_ptr nmi_idt_descr;
39+
extern gate_desc nmi_idt_table[];
3840

3941
struct gdt_page {
4042
struct desc_struct gdt[GDT_ENTRIES];
@@ -307,6 +309,16 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit)
307309
desc->limit = (limit >> 16) & 0xf;
308310
}
309311

312+
#ifdef CONFIG_X86_64
313+
static inline void set_nmi_gate(int gate, void *addr)
314+
{
315+
gate_desc s;
316+
317+
pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
318+
write_idt_entry(nmi_idt_table, gate, &s);
319+
}
320+
#endif
321+
310322
static inline void _set_gate(int gate, unsigned type, void *addr,
311323
unsigned dpl, unsigned ist, unsigned seg)
312324
{

arch/x86/kernel/cpu/common.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1021,6 +1021,8 @@ __setup("clearcpuid=", setup_disablecpuid);
10211021

10221022
#ifdef CONFIG_X86_64
10231023
struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
1024+
struct desc_ptr nmi_idt_descr = { NR_VECTORS * 16 - 1,
1025+
(unsigned long) nmi_idt_table };
10241026

10251027
DEFINE_PER_CPU_FIRST(union irq_stack_union,
10261028
irq_stack_union) __aligned(PAGE_SIZE);
@@ -1085,6 +1087,26 @@ unsigned long kernel_eflags;
10851087
*/
10861088
DEFINE_PER_CPU(struct orig_ist, orig_ist);
10871089

1090+
static DEFINE_PER_CPU(unsigned long, debug_stack_addr);
1091+
DEFINE_PER_CPU(int, debug_stack_usage);
1092+
1093+
int is_debug_stack(unsigned long addr)
1094+
{
1095+
return __get_cpu_var(debug_stack_usage) ||
1096+
(addr <= __get_cpu_var(debug_stack_addr) &&
1097+
addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ));
1098+
}
1099+
1100+
void debug_stack_set_zero(void)
1101+
{
1102+
load_idt((const struct desc_ptr *)&nmi_idt_descr);
1103+
}
1104+
1105+
void debug_stack_reset(void)
1106+
{
1107+
load_idt((const struct desc_ptr *)&idt_descr);
1108+
}
1109+
10881110
#else /* CONFIG_X86_64 */
10891111

10901112
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
@@ -1212,6 +1234,8 @@ void __cpuinit cpu_init(void)
12121234
estacks += exception_stack_sizes[v];
12131235
oist->ist[v] = t->x86_tss.ist[v] =
12141236
(unsigned long)estacks;
1237+
if (v == DEBUG_STACK-1)
1238+
per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks;
12151239
}
12161240
}
12171241

arch/x86/kernel/entry_64.S

Lines changed: 185 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1480,62 +1480,214 @@ ENTRY(error_exit)
14801480
CFI_ENDPROC
14811481
END(error_exit)
14821482

1483+
/*
1484+
* Test if a given stack is an NMI stack or not.
1485+
*/
1486+
.macro test_in_nmi reg stack nmi_ret normal_ret
1487+
cmpq %\reg, \stack
1488+
ja \normal_ret
1489+
subq $EXCEPTION_STKSZ, %\reg
1490+
cmpq %\reg, \stack
1491+
jb \normal_ret
1492+
jmp \nmi_ret
1493+
.endm
14831494

14841495
/* runs on exception stack */
14851496
ENTRY(nmi)
14861497
INTR_FRAME
14871498
PARAVIRT_ADJUST_EXCEPTION_FRAME
1488-
pushq_cfi $-1
1499+
/*
1500+
* We allow breakpoints in NMIs. If a breakpoint occurs, then
1501+
* the iretq it performs will take us out of NMI context.
1502+
* This means that we can have nested NMIs where the next
1503+
* NMI is using the top of the stack of the previous NMI. We
1504+
* can't let it execute because the nested NMI will corrupt the
1505+
* stack of the previous NMI. NMI handlers are not re-entrant
1506+
* anyway.
1507+
*
1508+
* To handle this case we do the following:
1509+
* Check the a special location on the stack that contains
1510+
* a variable that is set when NMIs are executing.
1511+
* The interrupted task's stack is also checked to see if it
1512+
* is an NMI stack.
1513+
* If the variable is not set and the stack is not the NMI
1514+
* stack then:
1515+
* o Set the special variable on the stack
1516+
* o Copy the interrupt frame into a "saved" location on the stack
1517+
* o Copy the interrupt frame into a "copy" location on the stack
1518+
* o Continue processing the NMI
1519+
* If the variable is set or the previous stack is the NMI stack:
1520+
* o Modify the "copy" location to jump to the repeate_nmi
1521+
* o return back to the first NMI
1522+
*
1523+
* Now on exit of the first NMI, we first clear the stack variable
1524+
* The NMI stack will tell any nested NMIs at that point that it is
1525+
* nested. Then we pop the stack normally with iret, and if there was
1526+
* a nested NMI that updated the copy interrupt stack frame, a
1527+
* jump will be made to the repeat_nmi code that will handle the second
1528+
* NMI.
1529+
*/
1530+
1531+
/* Use %rdx as out temp variable throughout */
1532+
pushq_cfi %rdx
1533+
1534+
/*
1535+
* Check the special variable on the stack to see if NMIs are
1536+
* executing.
1537+
*/
1538+
cmp $1, -8(%rsp)
1539+
je nested_nmi
1540+
1541+
/*
1542+
* Now test if the previous stack was an NMI stack.
1543+
* We need the double check. We check the NMI stack to satisfy the
1544+
* race when the first NMI clears the variable before returning.
1545+
* We check the variable because the first NMI could be in a
1546+
* breakpoint routine using a breakpoint stack.
1547+
*/
1548+
lea 6*8(%rsp), %rdx
1549+
test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi
1550+
1551+
nested_nmi:
1552+
/*
1553+
* Do nothing if we interrupted the fixup in repeat_nmi.
1554+
* It's about to repeat the NMI handler, so we are fine
1555+
* with ignoring this one.
1556+
*/
1557+
movq $repeat_nmi, %rdx
1558+
cmpq 8(%rsp), %rdx
1559+
ja 1f
1560+
movq $end_repeat_nmi, %rdx
1561+
cmpq 8(%rsp), %rdx
1562+
ja nested_nmi_out
1563+
1564+
1:
1565+
/* Set up the interrupted NMIs stack to jump to repeat_nmi */
1566+
leaq -6*8(%rsp), %rdx
1567+
movq %rdx, %rsp
1568+
CFI_ADJUST_CFA_OFFSET 6*8
1569+
pushq_cfi $__KERNEL_DS
1570+
pushq_cfi %rdx
1571+
pushfq_cfi
1572+
pushq_cfi $__KERNEL_CS
1573+
pushq_cfi $repeat_nmi
1574+
1575+
/* Put stack back */
1576+
addq $(11*8), %rsp
1577+
CFI_ADJUST_CFA_OFFSET -11*8
1578+
1579+
nested_nmi_out:
1580+
popq_cfi %rdx
1581+
1582+
/* No need to check faults here */
1583+
INTERRUPT_RETURN
1584+
1585+
first_nmi:
1586+
/*
1587+
* Because nested NMIs will use the pushed location that we
1588+
* stored in rdx, we must keep that space available.
1589+
* Here's what our stack frame will look like:
1590+
* +-------------------------+
1591+
* | original SS |
1592+
* | original Return RSP |
1593+
* | original RFLAGS |
1594+
* | original CS |
1595+
* | original RIP |
1596+
* +-------------------------+
1597+
* | temp storage for rdx |
1598+
* +-------------------------+
1599+
* | NMI executing variable |
1600+
* +-------------------------+
1601+
* | Saved SS |
1602+
* | Saved Return RSP |
1603+
* | Saved RFLAGS |
1604+
* | Saved CS |
1605+
* | Saved RIP |
1606+
* +-------------------------+
1607+
* | copied SS |
1608+
* | copied Return RSP |
1609+
* | copied RFLAGS |
1610+
* | copied CS |
1611+
* | copied RIP |
1612+
* +-------------------------+
1613+
* | pt_regs |
1614+
* +-------------------------+
1615+
*
1616+
* The saved RIP is used to fix up the copied RIP that a nested
1617+
* NMI may zero out. The original stack frame and the temp storage
1618+
* is also used by nested NMIs and can not be trusted on exit.
1619+
*/
1620+
/* Set the NMI executing variable on the stack. */
1621+
pushq_cfi $1
1622+
1623+
/* Copy the stack frame to the Saved frame */
1624+
.rept 5
1625+
pushq_cfi 6*8(%rsp)
1626+
.endr
1627+
1628+
/* Make another copy, this one may be modified by nested NMIs */
1629+
.rept 5
1630+
pushq_cfi 4*8(%rsp)
1631+
.endr
1632+
1633+
/* Do not pop rdx, nested NMIs will corrupt it */
1634+
movq 11*8(%rsp), %rdx
1635+
1636+
/*
1637+
* Everything below this point can be preempted by a nested
1638+
* NMI if the first NMI took an exception. Repeated NMIs
1639+
* caused by an exception and nested NMI will start here, and
1640+
* can still be preempted by another NMI.
1641+
*/
1642+
restart_nmi:
1643+
pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
14891644
subq $ORIG_RAX-R15, %rsp
14901645
CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1646+
/*
1647+
* Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit
1648+
* as we should not be calling schedule in NMI context.
1649+
* Even with normal interrupts enabled. An NMI should not be
1650+
* setting NEED_RESCHED or anything that normal interrupts and
1651+
* exceptions might do.
1652+
*/
14911653
call save_paranoid
14921654
DEFAULT_FRAME 0
14931655
/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
14941656
movq %rsp,%rdi
14951657
movq $-1,%rsi
14961658
call do_nmi
1497-
#ifdef CONFIG_TRACE_IRQFLAGS
1498-
/* paranoidexit; without TRACE_IRQS_OFF */
1499-
/* ebx: no swapgs flag */
1500-
DISABLE_INTERRUPTS(CLBR_NONE)
15011659
testl %ebx,%ebx /* swapgs needed? */
15021660
jnz nmi_restore
1503-
testl $3,CS(%rsp)
1504-
jnz nmi_userspace
15051661
nmi_swapgs:
15061662
SWAPGS_UNSAFE_STACK
15071663
nmi_restore:
15081664
RESTORE_ALL 8
1665+
/* Clear the NMI executing stack variable */
1666+
movq $0, 10*8(%rsp)
15091667
jmp irq_return
1510-
nmi_userspace:
1511-
GET_THREAD_INFO(%rcx)
1512-
movl TI_flags(%rcx),%ebx
1513-
andl $_TIF_WORK_MASK,%ebx
1514-
jz nmi_swapgs
1515-
movq %rsp,%rdi /* &pt_regs */
1516-
call sync_regs
1517-
movq %rax,%rsp /* switch stack for scheduling */
1518-
testl $_TIF_NEED_RESCHED,%ebx
1519-
jnz nmi_schedule
1520-
movl %ebx,%edx /* arg3: thread flags */
1521-
ENABLE_INTERRUPTS(CLBR_NONE)
1522-
xorl %esi,%esi /* arg2: oldset */
1523-
movq %rsp,%rdi /* arg1: &pt_regs */
1524-
call do_notify_resume
1525-
DISABLE_INTERRUPTS(CLBR_NONE)
1526-
jmp nmi_userspace
1527-
nmi_schedule:
1528-
ENABLE_INTERRUPTS(CLBR_ANY)
1529-
call schedule
1530-
DISABLE_INTERRUPTS(CLBR_ANY)
1531-
jmp nmi_userspace
1532-
CFI_ENDPROC
1533-
#else
1534-
jmp paranoid_exit
15351668
CFI_ENDPROC
1536-
#endif
15371669
END(nmi)
15381670

1671+
/*
1672+
* If an NMI hit an iret because of an exception or breakpoint,
1673+
* it can lose its NMI context, and a nested NMI may come in.
1674+
* In that case, the nested NMI will change the preempted NMI's
1675+
* stack to jump to here when it does the final iret.
1676+
*/
1677+
repeat_nmi:
1678+
INTR_FRAME
1679+
/* Update the stack variable to say we are still in NMI */
1680+
movq $1, 5*8(%rsp)
1681+
1682+
/* copy the saved stack back to copy stack */
1683+
.rept 5
1684+
pushq_cfi 4*8(%rsp)
1685+
.endr
1686+
1687+
jmp restart_nmi
1688+
CFI_ENDPROC
1689+
end_repeat_nmi:
1690+
15391691
ENTRY(ignore_sysret)
15401692
CFI_STARTPROC
15411693
mov $-ENOSYS,%eax

arch/x86/kernel/head_64.S

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,10 @@ ENTRY(phys_base)
417417
ENTRY(idt_table)
418418
.skip IDT_ENTRIES * 16
419419

420+
.align L1_CACHE_BYTES
421+
ENTRY(nmi_idt_table)
422+
.skip IDT_ENTRIES * 16
423+
420424
__PAGE_ALIGNED_BSS
421425
.align PAGE_SIZE
422426
ENTRY(empty_zero_page)

0 commit comments

Comments
 (0)