Bugtraq mailing list archives

Re: 2.2.0 SECURITY (fwd)


From: andrea () E-MIND COM (Andrea Arcangeli)
Date: Wed, 27 Jan 1999 21:38:37 +0100


On Wed, 27 Jan 1999, //Stany wrote:

Later on down the thread there are other people then Dan Burcaw who say
that the bug crashes their boxes too.

There' s another way to cause 2.2.0 to generate many Oops, process in D
state, and once it also locked up when I was in X.

This night I am been able to write a simple script to reproduce the bad
behavior:

#!/bin/sh
# Copyright (C) 1999  Andrea Arcangeli
# Crash-2.2.0.sh

while:; do
        free &>/dev/null &
        ps &>/dev/null &
done

And here it is my race-fix against linux-2.2.0 (that I written today and
last night). I am not able to cause process to go in D state or Oopses or
locks anymore (once applyed also the other munmap bugfix by Ingo Molnar
of course ;).

Index: arch/i386/kernel/ldt.c
===================================================================
RCS file: /var/cvs/linux/arch/i386/kernel/ldt.c,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 ldt.c
--- ldt.c       1999/01/18 01:28:57     1.1.1.1
+++ linux/arch/i386/kernel/ldt.c        1999/01/27 19:36:10
@@ -83,7 +83,7 @@
                        set_ldt_desc(i, ldt, LDT_ENTRIES);
                        current->tss.ldt = _LDT(i);
                        load_ldt(i);
-                       if (atomic_read(&mm->count) > 1)
+                       if (mm->count > 1)
                                printk(KERN_WARNING
                                        "LDT allocated for cloned task!\n");
                } else {
Index: fs/binfmt_aout.c
===================================================================
RCS file: /var/cvs/linux/fs/binfmt_aout.c,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 binfmt_aout.c
--- binfmt_aout.c       1999/01/18 01:26:49     1.1.1.1
+++ linux/fs/binfmt_aout.c      1999/01/27 20:06:16
@@ -101,7 +101,7 @@
 #       define START_STACK(u)   (u.start_stack)
 #endif

-       if (!current->dumpable || atomic_read(&current->mm->count) != 1)
+       if (!current->dumpable || current->mm->count != 1)
                return 0;
        current->dumpable = 0;

Index: fs/binfmt_elf.c
===================================================================
RCS file: /var/cvs/linux/fs/binfmt_elf.c,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 binfmt_elf.c
--- binfmt_elf.c        1999/01/18 01:26:49     1.1.1.1
+++ linux/fs/binfmt_elf.c       1999/01/27 19:36:10
@@ -1067,7 +1067,7 @@

        if (!current->dumpable ||
            limit < ELF_EXEC_PAGESIZE ||
-           atomic_read(&current->mm->count) != 1)
+           current->mm->count != 1)
                return 0;
        current->dumpable = 0;

Index: fs/exec.c
===================================================================
RCS file: /var/cvs/linux/fs/exec.c,v
retrieving revision 1.1.1.2
diff -u -r1.1.1.2 exec.c
--- exec.c      1999/01/23 16:28:07     1.1.1.2
+++ linux/fs/exec.c     1999/01/27 19:36:10
@@ -378,7 +378,7 @@
        struct mm_struct * mm, * old_mm;
        int retval, nr;

-       if (atomic_read(&current->mm->count) == 1) {
+       if (current->mm->count == 1) {
                flush_cache_mm(current->mm);
                mm_release();
                release_segments(current->mm);
@@ -409,7 +409,9 @@
        activate_context(current);
        up(&mm->mmap_sem);
        mm_release();
+       spin_lock(&mm_lock);
        mmput(old_mm);
+       spin_unlock(&mm_lock);
        return 0;

        /*
Index: fs/proc/array.c
===================================================================
RCS file: /var/cvs/linux/fs/proc/array.c,v
retrieving revision 1.1.1.4
diff -u -r1.1.1.4 array.c
--- array.c     1999/01/26 18:30:44     1.1.1.4
+++ linux/fs/proc/array.c       1999/01/27 19:59:22
@@ -43,6 +43,8 @@
  *                     <Alan.Cox () linux org>
  *
  * Andi Kleen       :  Race Fixes.     
+ *
+ * Andrea Arcangeli  :  do_exit/mmput/mmget race fix on the Race Fixes ;).
  *
  */

@@ -65,6 +67,7 @@
 #include <linux/slab.h>
 #include <linux/smp.h>
 #include <linux/signal.h>
+#include <linux/init.h>

 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -77,6 +80,7 @@
 int get_malloc(char * buffer);
 #endif

+static kmem_cache_t * task_struct_head_cache;

 static ssize_t read_core(struct file * file, char * buf,
                         size_t count, loff_t *ppos)
@@ -399,8 +403,11 @@

        read_lock(&tasklist_lock);
        tsk = find_task_by_pid(pid);
+       spin_lock(&mm_lock);
        if (tsk && tsk->mm && tsk->mm != &init_mm)
-               mmget(mm = tsk->mm);
+               if (!mmget(mm = tsk->mm))
+                       mm = NULL;
+       spin_unlock(&mm_lock);
        read_unlock(&tasklist_lock);
        if (mm != NULL)
                down(&mm->mmap_sem);
@@ -410,7 +417,9 @@
 static void release_mm(struct mm_struct *mm)
 {
        up(&mm->mmap_sem);
+       spin_lock(&mm_lock);
        mmput(mm);
+       spin_unlock(&mm_lock);
 }

 static unsigned long get_phys_addr(struct mm_struct *mm, unsigned long ptr)
@@ -504,17 +513,9 @@
        return res;
 }

-/*
- * These bracket the sleeping functions..
- */
-extern void scheduling_functions_start_here(void);
-extern void scheduling_functions_end_here(void);
-#define first_sched    ((unsigned long) scheduling_functions_start_here)
-#define last_sched     ((unsigned long) scheduling_functions_end_here)
-
 static unsigned long get_wchan(struct task_struct *p)
 {
-       if (!p || p == current || p->state == TASK_RUNNING)
+       if (!p || real_tsk(p) == current || p->state == TASK_RUNNING)
                return 0;
 #if defined(__i386__)
        {
@@ -522,7 +523,7 @@
                unsigned long stack_page;
                int count = 0;

-               stack_page = (unsigned long)p;
+               stack_page = (unsigned long)real_tsk(p);
                esp = p->tss.esp;
                if (!stack_page || esp < stack_page || esp >= 8188+stack_page)
                        return 0;
@@ -564,7 +565,7 @@
            unsigned long stack_page;
            int count = 0;

-           stack_page = (unsigned long)p;
+           stack_page = (unsigned long)real_tsk(p);
            fp = ((struct switch_stack *)p->tss.ksp)->a6;
            do {
                    if (fp < stack_page+sizeof(struct task_struct) ||
@@ -585,7 +586,7 @@
                unsigned long stack_page;
                int count = 0;

-               stack_page = 4096 + (unsigned long)p;
+               stack_page = 4096 + (unsigned long)real_tsk(p);
                fp = get_css_fp (&p->tss);
                do {
                        if (fp < stack_page || fp > 4092+stack_page)
@@ -599,7 +600,7 @@
 #elif defined (__sparc__)
        {
                unsigned long pc, fp, bias = 0;
-               unsigned long task_base = (unsigned long) p;
+               unsigned long task_base = (unsigned long) real_tsk(p);
                struct reg_window *rw;
                int count = 0;

@@ -624,8 +625,8 @@
 }

 #if defined(__i386__)
-# define KSTK_EIP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1019])
-# define KSTK_ESP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1022])
+# define KSTK_EIP(tsk) (((unsigned long *)(4096+(unsigned long)real_tsk(tsk)))[1019])
+# define KSTK_ESP(tsk) (((unsigned long *)(4096+(unsigned long)real_tsk(tsk)))[1022])
 #elif defined(__alpha__)
   /*
    * See arch/alpha/kernel/ptrace.c for details.
@@ -633,11 +634,11 @@
 # define PT_REG(reg)           (PAGE_SIZE - sizeof(struct pt_regs)     \
                                 + (long)&((struct pt_regs *)0)->reg)
 # define KSTK_EIP(tsk) \
-    (*(unsigned long *)(PT_REG(pc) + PAGE_SIZE + (unsigned long)(tsk)))
+    (*(unsigned long *)(PT_REG(pc) + PAGE_SIZE + (unsigned long)real_tsk(tsk)))
 # define KSTK_ESP(tsk) ((tsk) == current ? rdusp() : (tsk)->tss.usp)
 #elif defined(CONFIG_ARM)
-# define KSTK_EIP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1022])
-# define KSTK_ESP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1020])
+# define KSTK_EIP(tsk) (((unsigned long *)(4096+(unsigned long)real_tsk(tsk)))[1022])
+# define KSTK_ESP(tsk) (((unsigned long *)(4096+(unsigned long)real_tsk(tsk)))[1020])
 #elif defined(__mc68000__)
 #define        KSTK_EIP(tsk)   \
     ({                 \
@@ -646,7 +647,7 @@
            MAP_NR((tsk)->tss.esp0) < max_mapnr) \
              eip = ((struct pt_regs *) (tsk)->tss.esp0)->pc;    \
        eip; })
-#define        KSTK_ESP(tsk)   ((tsk) == current ? rdusp() : (tsk)->tss.usp)
+#define        KSTK_ESP(tsk)   (real_tsk(tsk) == current ? rdusp() : (tsk)->tss.usp)
 #elif defined(__powerpc__)
 #define KSTK_EIP(tsk)  ((tsk)->tss.regs->nip)
 #define KSTK_ESP(tsk)  ((tsk)->tss.regs->gpr[1])
@@ -851,21 +852,42 @@

 static struct task_struct *grab_task(int pid)
 {
-       struct task_struct *tsk = current;
+       struct task_struct *tsk = current, *dst;
        if (pid != tsk->pid) {
+               dst = kmem_cache_alloc(task_struct_head_cache, SLAB_KERNEL);
+               if (!dst)
+                       return NULL;
                read_lock(&tasklist_lock);
                tsk = find_task_by_pid(pid);
-               if (tsk && tsk->mm && tsk->mm != &init_mm)
-                       mmget(tsk->mm);
+               if (tsk) {
+                       spin_lock(&mm_lock);
+                       if (tsk->mm && tsk->mm != &init_mm)
+                               if (!mmget(tsk->mm))
+                                       tsk = NULL;
+                       spin_unlock(&mm_lock);
+                       if (tsk)
+                       {
+                               memcpy(dst, tsk, sizeof(struct task_struct));
+                               tsk = dst;
+                       }
+               }
                read_unlock(&tasklist_lock);
-       }       
+               if (!tsk)
+                       kmem_cache_free(task_struct_head_cache, dst);
+       }
        return tsk;
 }

 static void release_task(struct task_struct *tsk)
 {
-       if (tsk != current && tsk->mm && tsk->mm != &init_mm)
-               mmput(tsk->mm);
+       if (tsk != current)
+       {
+               spin_lock(&mm_lock);
+               if (tsk->mm && tsk->mm != &init_mm)
+                       mmput(tsk->mm);
+               spin_unlock(&mm_lock);
+               kmem_cache_free(task_struct_head_cache, tsk);
+       }
 }

 static int get_status(int pid, char * buffer)
@@ -1149,7 +1171,7 @@
                goto getlen_out;

        /* Check whether the mmaps could change if we sleep */
-       volatile_task = (p != current || atomic_read(&p->mm->count) > 1);
+       volatile_task = (p != current || p->mm->count > 1);

        /* decode f_pos */
        lineno = *ppos >> MAPS_LINE_SHIFT;
@@ -1600,3 +1622,14 @@
        NULL,                   /* truncate */
        NULL                    /* permission */
 };
+
+void __init proc_array_init(void)
+{
+       task_struct_head_cache = kmem_cache_create("task_struct",
+                                                  sizeof(struct task_struct),
+                                                  0,
+                                                  SLAB_HWCACHE_ALIGN,
+                                                  NULL, NULL);
+       if (!task_struct_head_cache)
+               panic("cannot create task_struct cache");
+}
Index: fs/proc/root.c
===================================================================
RCS file: /var/cvs/linux/fs/proc/root.c,v
retrieving revision 1.1.1.2
diff -u -r1.1.1.2 root.c
--- root.c      1999/01/18 13:39:15     1.1.1.2
+++ linux/fs/proc/root.c        1999/01/27 19:49:40
@@ -671,6 +671,7 @@

 __initfunc(void proc_root_init(void))
 {
+       proc_array_init();
        proc_base_init();
        proc_register(&proc_root, &proc_root_loadavg);
        proc_register(&proc_root, &proc_root_uptime);
Index: include/asm-i386/pgtable.h
===================================================================
RCS file: /var/cvs/linux/include/asm-i386/pgtable.h,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 pgtable.h
--- pgtable.h   1999/01/18 01:27:15     1.1.1.1
+++ linux/include/asm-i386/pgtable.h    1999/01/27 19:45:56
@@ -101,7 +101,7 @@
 static inline void flush_tlb_current_task(void)
 {
        /* just one copy of this mm? */
-       if (atomic_read(&current->mm->count) == 1)
+       if (current->mm->count == 1)
                local_flush_tlb();      /* and that's us, so.. */
        else
                smp_flush_tlb();
@@ -113,7 +113,7 @@

 static inline void flush_tlb_mm(struct mm_struct * mm)
 {
-       if (mm == current->mm && atomic_read(&mm->count) == 1)
+       if (mm == current->mm && mm->count == 1)
                local_flush_tlb();
        else
                smp_flush_tlb();
@@ -122,7 +122,7 @@
 static inline void flush_tlb_page(struct vm_area_struct * vma,
        unsigned long va)
 {
-       if (vma->vm_mm == current->mm && atomic_read(&current->mm->count) == 1)
+       if (vma->vm_mm == current->mm && current->mm->count == 1)
                __flush_tlb_one(va);
        else
                smp_flush_tlb();
Index: include/linux/proc_fs.h
===================================================================
RCS file: /var/cvs/linux/include/linux/proc_fs.h,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 proc_fs.h
--- proc_fs.h   1999/01/18 01:27:09     1.1.1.1
+++ linux/include/linux/proc_fs.h       1999/01/27 19:48:49
@@ -312,6 +312,7 @@
 extern struct inode_operations proc_scsi_inode_operations;

 extern void proc_root_init(void);
+extern void proc_array_init(void);
 extern void proc_base_init(void);

 extern int proc_register(struct proc_dir_entry *, struct proc_dir_entry *);
Index: include/linux/sched.h
===================================================================
RCS file: /var/cvs/linux/include/linux/sched.h,v
retrieving revision 1.1.1.3
diff -u -r1.1.1.3 sched.h
--- sched.h     1999/01/23 16:29:58     1.1.1.3
+++ linux/include/linux/sched.h 1999/01/27 19:45:56
@@ -114,6 +114,15 @@
 extern rwlock_t tasklist_lock;
 extern spinlock_t scheduler_lock;

+/*
+ * These bracket the sleeping functions..
+ */
+extern void scheduling_functions_start_here(void);
+extern void scheduling_functions_end_here(void);
+#define first_sched    ((unsigned long) scheduling_functions_start_here)
+#define last_sched     ((unsigned long) scheduling_functions_end_here)
+#define real_tsk(tsk)  (*(tsk)->tarray_ptr)
+
 extern void sched_init(void);
 extern void show_state(void);
 extern void trap_init(void);
@@ -164,7 +173,7 @@
        struct vm_area_struct *mmap_avl;        /* tree of VMAs */
        struct vm_area_struct *mmap_cache;      /* last find_vma result */
        pgd_t * pgd;
-       atomic_t count;
+       int count;
        int map_count;                          /* number of VMAs */
        struct semaphore mmap_sem;
        unsigned long context;
@@ -184,7 +193,7 @@
 #define INIT_MM {                                      \
                &init_mmap, NULL, NULL,                 \
                swapper_pg_dir,                         \
-               ATOMIC_INIT(1), 1,                      \
+               1, 1,                                   \
                MUTEX,                                  \
                0,                                      \
                0, 0, 0, 0,                             \
@@ -391,6 +400,7 @@

 extern struct task_struct **tarray_freelist;
 extern spinlock_t taskslot_lock;
+extern spinlock_t mm_lock;

 extern __inline__ void add_free_taskslot(struct task_struct **t)
 {
@@ -609,11 +619,8 @@
  * Routines for handling mm_structs
  */
 extern struct mm_struct * mm_alloc(void);
-static inline void mmget(struct mm_struct * mm)
-{
-       atomic_inc(&mm->count);
-}
-extern void mmput(struct mm_struct *);
+extern int FASTCALL(mmget(struct mm_struct *));
+extern void FASTCALL(mmput(struct mm_struct *));
 /* Remove the current tasks stale references to the old mm_struct */
 extern void mm_release(void);

Index: kernel/exit.c
===================================================================
RCS file: /var/cvs/linux/kernel/exit.c,v
retrieving revision 1.1.1.2
diff -u -r1.1.1.2 exit.c
--- exit.c      1999/01/23 16:30:27     1.1.1.2
+++ linux/kernel/exit.c 1999/01/27 19:36:10
@@ -248,8 +248,10 @@

 static inline void __exit_mm(struct task_struct * tsk)
 {
-       struct mm_struct * mm = tsk->mm;
+       struct mm_struct * mm;

+       spin_lock(&mm_lock);
+       mm = tsk->mm;
        /* Set us up to use the kernel mm state */
        if (mm != &init_mm) {
                flush_cache_mm(mm);
@@ -261,6 +263,7 @@
                mm_release();
                mmput(mm);
        }
+       spin_unlock(&mm_lock);
 }

 void exit_mm(struct task_struct *tsk)
Index: kernel/fork.c
===================================================================
RCS file: /var/cvs/linux/kernel/fork.c,v
retrieving revision 1.1.1.3
diff -u -r1.1.1.3 fork.c
--- fork.c      1999/01/23 16:30:27     1.1.1.3
+++ linux/kernel/fork.c 1999/01/27 19:36:23
@@ -10,6 +10,11 @@
  * Fork is rather simple, once you get the hang of it, but the memory
  * management can be a bitch. See 'mm/mm.c': 'copy_page_tables()'
  */
+/*
+ * Fixed a race between mmget() and mmput(): added the mm_lock spinlock
+ * to serialize accesses to the tsk->mm field.
+ * Copyright (C) 1999  Andrea Arcangeli
+ */

 #include <linux/malloc.h>
 #include <linux/init.h>
@@ -39,6 +44,7 @@

 struct task_struct **tarray_freelist = NULL;
 spinlock_t taskslot_lock = SPIN_LOCK_UNLOCKED;
+spinlock_t mm_lock = SPIN_LOCK_UNLOCKED;

 /* UID task count cache, to prevent walking entire process list every
  * single fork() operation.
@@ -261,7 +267,7 @@
        if (mm) {
                *mm = *current->mm;
                init_new_context(mm);
-               atomic_set(&mm->count, 1);
+               mm->count = 1;
                mm->map_count = 0;
                mm->def_flags = 0;
                mm->mmap_sem = MUTEX_LOCKED;
@@ -303,12 +309,25 @@
        }
 }

+int mmget(struct mm_struct * mm)
+{
+       int retval = 0;
+
+       if (mm->count)
+       {
+               mm->count++;
+               retval = 1;
+       }
+
+       return retval;
+}
+
 /*
  * Decrement the use count and release all resources for an mm.
  */
 void mmput(struct mm_struct *mm)
 {
-       if (atomic_dec_and_test(&mm->count)) {
+       if (!--mm->count) {
                release_segments(mm);
                exit_mmap(mm);
                free_page_tables(mm);
@@ -322,7 +341,9 @@
        int retval;

        if (clone_flags & CLONE_VM) {
+               spin_lock(&mm_lock);
                mmget(current->mm);
+               spin_unlock(&mm_lock);
                /*
                 * Set up the LDT descriptor for the clone task.
                 */



Current thread: