Skip to content

Commit 9da3f2b

Browse files
thejhKAGA-KOKO
authored andcommitted
x86/fault: BUG() when uaccess helpers fault on kernel addresses
There have been multiple kernel vulnerabilities that permitted userspace to pass completely unchecked pointers through to userspace accessors: - the waitid() bug - commit 96ca579 ("waitid(): Add missing access_ok() checks") - the sg/bsg read/write APIs - the infiniband read/write APIs These don't happen all that often, but when they do happen, it is hard to test for them properly; and it is probably also hard to discover them with fuzzing. Even when an unmapped kernel address is supplied to such buggy code, it just returns -EFAULT instead of doing a proper BUG() or at least WARN(). Try to make such misbehaving code a bit more visible by refusing to do a fixup in the pagefault handler code when a userspace accessor causes a #PF on a kernel address and the current context isn't whitelisted. Signed-off-by: Jann Horn <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> Tested-by: Kees Cook <[email protected]> Cc: Andy Lutomirski <[email protected]> Cc: [email protected] Cc: [email protected] Cc: Masami Hiramatsu <[email protected]> Cc: "Naveen N. Rao" <[email protected]> Cc: Anil S Keshavamurthy <[email protected]> Cc: "David S. Miller" <[email protected]> Cc: Alexander Viro <[email protected]> Cc: [email protected] Cc: Borislav Petkov <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent 81fd9c1 commit 9da3f2b

File tree

4 files changed

+72
-0
lines changed

4 files changed

+72
-0
lines changed

arch/x86/mm/extable.c

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,11 +117,67 @@ __visible bool ex_handler_fprestore(const struct exception_table_entry *fixup,
117117
}
118118
EXPORT_SYMBOL_GPL(ex_handler_fprestore);
119119

120+
/* Helper to check whether a uaccess fault indicates a kernel bug. */
121+
static bool bogus_uaccess(struct pt_regs *regs, int trapnr,
122+
unsigned long fault_addr)
123+
{
124+
/* This is the normal case: #PF with a fault address in userspace. */
125+
if (trapnr == X86_TRAP_PF && fault_addr < TASK_SIZE_MAX)
126+
return false;
127+
128+
/*
129+
* This code can be reached for machine checks, but only if the #MC
130+
* handler has already decided that it looks like a candidate for fixup.
131+
* This e.g. happens when attempting to access userspace memory which
132+
* the CPU can't access because of uncorrectable bad memory.
133+
*/
134+
if (trapnr == X86_TRAP_MC)
135+
return false;
136+
137+
/*
138+
* There are two remaining exception types we might encounter here:
139+
* - #PF for faulting accesses to kernel addresses
140+
* - #GP for faulting accesses to noncanonical addresses
141+
* Complain about anything else.
142+
*/
143+
if (trapnr != X86_TRAP_PF && trapnr != X86_TRAP_GP) {
144+
WARN(1, "unexpected trap %d in uaccess\n", trapnr);
145+
return false;
146+
}
147+
148+
/*
149+
* This is a faulting memory access in kernel space, on a kernel
150+
* address, in a usercopy function. This can e.g. be caused by improper
151+
* use of helpers like __put_user and by improper attempts to access
152+
* userspace addresses in KERNEL_DS regions.
153+
* The one (semi-)legitimate exception are probe_kernel_{read,write}(),
154+
* which can be invoked from places like kgdb, /dev/mem (for reading)
155+
* and privileged BPF code (for reading).
156+
* The probe_kernel_*() functions set the kernel_uaccess_faults_ok flag
157+
* to tell us that faulting on kernel addresses, and even noncanonical
158+
* addresses, in a userspace accessor does not necessarily imply a
159+
* kernel bug, root might just be doing weird stuff.
160+
*/
161+
if (current->kernel_uaccess_faults_ok)
162+
return false;
163+
164+
/* This is bad. Refuse the fixup so that we go into die(). */
165+
if (trapnr == X86_TRAP_PF) {
166+
pr_emerg("BUG: pagefault on kernel address 0x%lx in non-whitelisted uaccess\n",
167+
fault_addr);
168+
} else {
169+
pr_emerg("BUG: GPF in non-whitelisted uaccess (non-canonical address?)\n");
170+
}
171+
return true;
172+
}
173+
120174
__visible bool ex_handler_uaccess(const struct exception_table_entry *fixup,
121175
struct pt_regs *regs, int trapnr,
122176
unsigned long error_code,
123177
unsigned long fault_addr)
124178
{
179+
if (bogus_uaccess(regs, trapnr, fault_addr))
180+
return false;
125181
regs->ip = ex_fixup_addr(fixup);
126182
return true;
127183
}
@@ -132,6 +188,8 @@ __visible bool ex_handler_ext(const struct exception_table_entry *fixup,
132188
unsigned long error_code,
133189
unsigned long fault_addr)
134190
{
191+
if (bogus_uaccess(regs, trapnr, fault_addr))
192+
return false;
135193
/* Special hack for uaccess_err */
136194
current->thread.uaccess_err = 1;
137195
regs->ip = ex_fixup_addr(fixup);

fs/namespace.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2642,6 +2642,7 @@ static long exact_copy_from_user(void *to, const void __user * from,
26422642
if (!access_ok(VERIFY_READ, from, n))
26432643
return n;
26442644

2645+
current->kernel_uaccess_faults_ok++;
26452646
while (n) {
26462647
if (__get_user(c, f)) {
26472648
memset(t, 0, n);
@@ -2651,6 +2652,7 @@ static long exact_copy_from_user(void *to, const void __user * from,
26512652
f++;
26522653
n--;
26532654
}
2655+
current->kernel_uaccess_faults_ok--;
26542656
return n;
26552657
}
26562658

include/linux/sched.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -739,6 +739,12 @@ struct task_struct {
739739
unsigned use_memdelay:1;
740740
#endif
741741

742+
/*
743+
* May usercopy functions fault on kernel addresses?
744+
* This is not just a single bit because this can potentially nest.
745+
*/
746+
unsigned int kernel_uaccess_faults_ok;
747+
742748
unsigned long atomic_flags; /* Flags requiring atomic access. */
743749

744750
struct restart_block restart_block;

mm/maccess.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,10 @@ long __probe_kernel_read(void *dst, const void *src, size_t size)
3030

3131
set_fs(KERNEL_DS);
3232
pagefault_disable();
33+
current->kernel_uaccess_faults_ok++;
3334
ret = __copy_from_user_inatomic(dst,
3435
(__force const void __user *)src, size);
36+
current->kernel_uaccess_faults_ok--;
3537
pagefault_enable();
3638
set_fs(old_fs);
3739

@@ -58,7 +60,9 @@ long __probe_kernel_write(void *dst, const void *src, size_t size)
5860

5961
set_fs(KERNEL_DS);
6062
pagefault_disable();
63+
current->kernel_uaccess_faults_ok++;
6164
ret = __copy_to_user_inatomic((__force void __user *)dst, src, size);
65+
current->kernel_uaccess_faults_ok--;
6266
pagefault_enable();
6367
set_fs(old_fs);
6468

@@ -94,11 +98,13 @@ long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)
9498

9599
set_fs(KERNEL_DS);
96100
pagefault_disable();
101+
current->kernel_uaccess_faults_ok++;
97102

98103
do {
99104
ret = __get_user(*dst++, (const char __user __force *)src++);
100105
} while (dst[-1] && ret == 0 && src - unsafe_addr < count);
101106

107+
current->kernel_uaccess_faults_ok--;
102108
dst[-1] = '\0';
103109
pagefault_enable();
104110
set_fs(old_fs);

0 commit comments

Comments
 (0)