@@ -1480,62 +1480,214 @@ ENTRY(error_exit)
1480
1480
CFI_ENDPROC
1481
1481
END(error_exit)
1482
1482
1483
+ /*
1484
+ * Test if a given stack is an NMI stack or not.
1485
+ */
1486
+ .macro test_in_nmi reg stack nmi_ret normal_ret
1487
+ cmpq %\reg, \stack
1488
+ ja \normal_ret
1489
+ subq $EXCEPTION_STKSZ, %\reg
1490
+ cmpq %\reg, \stack
1491
+ jb \normal_ret
1492
+ jmp \nmi_ret
1493
+ .endm
1483
1494
1484
1495
/* runs on exception stack */
1485
1496
ENTRY(nmi)
1486
1497
INTR_FRAME
1487
1498
PARAVIRT_ADJUST_EXCEPTION_FRAME
1488
- pushq_cfi $-1
1499
+ /*
1500
+ * We allow breakpoints in NMIs. If a breakpoint occurs, then
1501
+ * the iretq it performs will take us out of NMI context.
1502
+ * This means that we can have nested NMIs where the next
1503
+ * NMI is using the top of the stack of the previous NMI. We
1504
+ * can't let it execute because the nested NMI will corrupt the
1505
+ * stack of the previous NMI. NMI handlers are not re-entrant
1506
+ * anyway.
1507
+ *
1508
+ * To handle this case we do the following:
1509
+ * Check the a special location on the stack that contains
1510
+ * a variable that is set when NMIs are executing.
1511
+ * The interrupted task's stack is also checked to see if it
1512
+ * is an NMI stack.
1513
+ * If the variable is not set and the stack is not the NMI
1514
+ * stack then:
1515
+ * o Set the special variable on the stack
1516
+ * o Copy the interrupt frame into a "saved" location on the stack
1517
+ * o Copy the interrupt frame into a "copy" location on the stack
1518
+ * o Continue processing the NMI
1519
+ * If the variable is set or the previous stack is the NMI stack:
1520
+ * o Modify the "copy" location to jump to the repeate_nmi
1521
+ * o return back to the first NMI
1522
+ *
1523
+ * Now on exit of the first NMI, we first clear the stack variable
1524
+ * The NMI stack will tell any nested NMIs at that point that it is
1525
+ * nested. Then we pop the stack normally with iret, and if there was
1526
+ * a nested NMI that updated the copy interrupt stack frame, a
1527
+ * jump will be made to the repeat_nmi code that will handle the second
1528
+ * NMI.
1529
+ */
1530
+
1531
+ /* Use %rdx as out temp variable throughout */
1532
+ pushq_cfi %rdx
1533
+
1534
+ /*
1535
+ * Check the special variable on the stack to see if NMIs are
1536
+ * executing.
1537
+ */
1538
+ cmp $1 , -8 (%rsp )
1539
+ je nested_nmi
1540
+
1541
+ /*
1542
+ * Now test if the previous stack was an NMI stack.
1543
+ * We need the double check. We check the NMI stack to satisfy the
1544
+ * race when the first NMI clears the variable before returning.
1545
+ * We check the variable because the first NMI could be in a
1546
+ * breakpoint routine using a breakpoint stack.
1547
+ */
1548
+ lea 6*8 (%rsp ), %rdx
1549
+ test_in_nmi rdx, 4*8 (%rsp ), nested_nmi, first_nmi
1550
+
1551
+ nested_nmi:
1552
+ /*
1553
+ * Do nothing if we interrupted the fixup in repeat_nmi.
1554
+ * It's about to repeat the NMI handler, so we are fine
1555
+ * with ignoring this one.
1556
+ */
1557
+ movq $repeat_nmi, %rdx
1558
+ cmpq 8 (%rsp ), %rdx
1559
+ ja 1f
1560
+ movq $end_repeat_nmi, %rdx
1561
+ cmpq 8 (%rsp ), %rdx
1562
+ ja nested_nmi_out
1563
+
1564
+ 1:
1565
+ /* Set up the interrupted NMIs stack to jump to repeat_nmi */
1566
+ leaq -6*8 (%rsp ), %rdx
1567
+ movq %rdx , %rsp
1568
+ CFI_ADJUST_CFA_OFFSET 6*8
1569
+ pushq_cfi $__KERNEL_DS
1570
+ pushq_cfi %rdx
1571
+ pushfq_cfi
1572
+ pushq_cfi $__KERNEL_CS
1573
+ pushq_cfi $repeat_nmi
1574
+
1575
+ /* Put stack back */
1576
+ addq $(11*8 ), %rsp
1577
+ CFI_ADJUST_CFA_OFFSET -11*8
1578
+
1579
+ nested_nmi_out:
1580
+ popq_cfi %rdx
1581
+
1582
+ /* No need to check faults here */
1583
+ INTERRUPT_RETURN
1584
+
1585
+ first_nmi:
1586
+ /*
1587
+ * Because nested NMIs will use the pushed location that we
1588
+ * stored in rdx, we must keep that space available.
1589
+ * Here's what our stack frame will look like:
1590
+ * +-------------------------+
1591
+ * | original SS |
1592
+ * | original Return RSP |
1593
+ * | original RFLAGS |
1594
+ * | original CS |
1595
+ * | original RIP |
1596
+ * +-------------------------+
1597
+ * | temp storage for rdx |
1598
+ * +-------------------------+
1599
+ * | NMI executing variable |
1600
+ * +-------------------------+
1601
+ * | Saved SS |
1602
+ * | Saved Return RSP |
1603
+ * | Saved RFLAGS |
1604
+ * | Saved CS |
1605
+ * | Saved RIP |
1606
+ * +-------------------------+
1607
+ * | copied SS |
1608
+ * | copied Return RSP |
1609
+ * | copied RFLAGS |
1610
+ * | copied CS |
1611
+ * | copied RIP |
1612
+ * +-------------------------+
1613
+ * | pt_regs |
1614
+ * +-------------------------+
1615
+ *
1616
+ * The saved RIP is used to fix up the copied RIP that a nested
1617
+ * NMI may zero out. The original stack frame and the temp storage
1618
+ * is also used by nested NMIs and can not be trusted on exit.
1619
+ */
1620
+ /* Set the NMI executing variable on the stack. */
1621
+ pushq_cfi $1
1622
+
1623
+ /* Copy the stack frame to the Saved frame */
1624
+ .rept 5
1625
+ pushq_cfi 6*8 (%rsp )
1626
+ .endr
1627
+
1628
+ /* Make another copy, this one may be modified by nested NMIs */
1629
+ .rept 5
1630
+ pushq_cfi 4*8 (%rsp )
1631
+ .endr
1632
+
1633
+ /* Do not pop rdx, nested NMIs will corrupt it */
1634
+ movq 11*8 (%rsp ), %rdx
1635
+
1636
+ /*
1637
+ * Everything below this point can be preempted by a nested
1638
+ * NMI if the first NMI took an exception. Repeated NMIs
1639
+ * caused by an exception and nested NMI will start here, and
1640
+ * can still be preempted by another NMI.
1641
+ */
1642
+ restart_nmi:
1643
+ pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
1489
1644
subq $ORIG_RAX-R15, %rsp
1490
1645
CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1646
+ /*
1647
+ * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit
1648
+ * as we should not be calling schedule in NMI context.
1649
+ * Even with normal interrupts enabled. An NMI should not be
1650
+ * setting NEED_RESCHED or anything that normal interrupts and
1651
+ * exceptions might do.
1652
+ */
1491
1653
call save_paranoid
1492
1654
DEFAULT_FRAME 0
1493
1655
/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
1494
1656
movq %rsp ,%rdi
1495
1657
movq $-1 ,%rsi
1496
1658
call do_nmi
1497
- #ifdef CONFIG_TRACE_IRQFLAGS
1498
- /* paranoidexit; without TRACE_IRQS_OFF */
1499
- /* ebx: no swapgs flag */
1500
- DISABLE_INTERRUPTS(CLBR_NONE)
1501
1659
testl %ebx ,%ebx /* swapgs needed? */
1502
1660
jnz nmi_restore
1503
- testl $3 ,CS (%rsp )
1504
- jnz nmi_userspace
1505
1661
nmi_swapgs:
1506
1662
SWAPGS_UNSAFE_STACK
1507
1663
nmi_restore:
1508
1664
RESTORE_ALL 8
1665
+ /* Clear the NMI executing stack variable */
1666
+ movq $0 , 10*8 (%rsp )
1509
1667
jmp irq_return
1510
- nmi_userspace:
1511
- GET_THREAD_INFO(%rcx )
1512
- movl TI_flags(%rcx ),%ebx
1513
- andl $_TIF_WORK_MASK,%ebx
1514
- jz nmi_swapgs
1515
- movq %rsp ,%rdi /* &pt_regs */
1516
- call sync_regs
1517
- movq %rax ,%rsp /* switch stack for scheduling */
1518
- testl $_TIF_NEED_RESCHED,%ebx
1519
- jnz nmi_schedule
1520
- movl %ebx ,%edx /* arg3: thread flags */
1521
- ENABLE_INTERRUPTS(CLBR_NONE)
1522
- xorl %esi ,%esi /* arg2: oldset */
1523
- movq %rsp ,%rdi /* arg1: &pt_regs */
1524
- call do_notify_resume
1525
- DISABLE_INTERRUPTS(CLBR_NONE)
1526
- jmp nmi_userspace
1527
- nmi_schedule:
1528
- ENABLE_INTERRUPTS(CLBR_ANY)
1529
- call schedule
1530
- DISABLE_INTERRUPTS(CLBR_ANY)
1531
- jmp nmi_userspace
1532
- CFI_ENDPROC
1533
- #else
1534
- jmp paranoid_exit
1535
1668
CFI_ENDPROC
1536
- #endif
1537
1669
END(nmi)
1538
1670
1671
+ /*
1672
+ * If an NMI hit an iret because of an exception or breakpoint,
1673
+ * it can lose its NMI context, and a nested NMI may come in.
1674
+ * In that case, the nested NMI will change the preempted NMI's
1675
+ * stack to jump to here when it does the final iret.
1676
+ */
1677
+ repeat_nmi:
1678
+ INTR_FRAME
1679
+ /* Update the stack variable to say we are still in NMI */
1680
+ movq $1 , 5*8 (%rsp )
1681
+
1682
+ /* copy the saved stack back to copy stack */
1683
+ .rept 5
1684
+ pushq_cfi 4*8 (%rsp )
1685
+ .endr
1686
+
1687
+ jmp restart_nmi
1688
+ CFI_ENDPROC
1689
+ end_repeat_nmi:
1690
+
1539
1691
ENTRY(ignore_sysret)
1540
1692
CFI_STARTPROC
1541
1693
mov $-ENOSYS,%eax
0 commit comments