tcpdump mailing list archives

optimizing large filters without blowing out stack


From: Alexander Dupuy <dupuy () cs columbia edu>
Date: Thu, 29 Dec 2005 18:03:11 -0500

The current libpcap optimization functions rely heavily on recursive functions; when optimizing very large filters, this can cause core dumps due to excessive stack usage in some worst-case scenarios.

The attached patch replaces a few of the "deepest" recursive functions with implementations that use iteration (or in cases where there are two branches that must be followed, a combination of iteration and recursion) that dramatically reduces the stack usage and allows much larger filters to be constructed without segmentation violations.

First, a bit of shell code to construct some large filters for testing:

$ cat genfilter.sh
#!/bin/sh
if [ $# -ne 1 ]; then
  echo 1>&2 "Usage: $0 filter-size"; exit 1
fi
echo -n "ip and ("
N=$1
while [ 1 -lt "$N" ]
do
  # for some shells, you may need to remove the \ for correct parsing
  A4=$(( 2 \* \( $N % 128 \) ))
  A3=$(( \( $N / 128 \) % 256 ))
  A2=$(( \( $N / \( 128 \* 256 \) \) % 256 ))
  A1=$(( 10 + \( $N / 8388608 \) ))
  echo -n "host $A1.$A2.$A3.$A4 or "
  N=$(( $N - 1 ))
done
echo "host 10.0.0.1)"

(These filters actually don't illustrate the worst-case stack recursion that I was seeing when I ran into this problem originally, perhaps because of length of JT vs. JF chains, but they do allow for some illustrative scaling.)

Then a patch to tcpdump to allow it to read a filter from stdin (these large filters exceed the maximum argument length on a BSD system):

RCS file: /tcpdump/master/tcpdump/util.c,v
retrieving revision 1.104
diff -u -r1.104 util.c
--- util.c      13 Dec 2005 08:47:10 -0000      1.104
+++ util.c      29 Dec 2005 21:13:42 -0000
@@ -495,31 +495,50 @@
        register char *cp;
        struct stat buf;

-       fd = open(fname, O_RDONLY|O_BINARY);
-       if (fd < 0)
-               error("can't open %s: %s", fname, pcap_strerror(errno));
-
-       if (fstat(fd, &buf) < 0)
-               error("can't stat %s: %s", fname, pcap_strerror(errno));
-
-       cp = malloc((u_int)buf.st_size + 1);
-       if (cp == NULL)
-               error("malloc(%d) for %s: %s", (u_int)buf.st_size + 1,
-                       fname, pcap_strerror(errno));
-       cc = read(fd, cp, (u_int)buf.st_size);
-       if (cc < 0)
-               error("read %s: %s", fname, pcap_strerror(errno));
-       if (cc != buf.st_size)
-               error("short read %s (%d != %d)", fname, cc, (int)buf.st_size);
+       if (!strcmp(fname, "-")) {
+               i = 0;
+               fd = BUFSIZ + 1;
+               cp = malloc(fd);
+               if (cp == NULL)
+                       error("malloc(%d) for stdin: %s", (u_int)fd,
+                               pcap_strerror(errno));
+               while ((cc = read(0, &cp[i], fd - i - 1)) > 0) {
+                       i += cc;
+                       fd += cc;
+                       cp = realloc(cp, fd);
+                       if (cp == NULL)
+                               error("realloc(%d) for stdin: %s", (u_int)fd,
+                                       pcap_strerror(errno));
+                       cp[i] = '\0';
+               }
+       }
+       else {
+               fd = open(fname, O_RDONLY|O_BINARY);
+               if (fd < 0)
+                       error("can't open %s: %s", fname, pcap_strerror(errno));
+
+               if (fstat(fd, &buf) < 0)
+                       error("can't stat %s: %s", fname, pcap_strerror(errno));
+
+               cp = malloc((u_int)buf.st_size + 1);
+               if (cp == NULL)
+                       error("malloc(%d) for %s: %s", (u_int)buf.st_size + 1,
+                               fname, pcap_strerror(errno));
+               cc = read(fd, cp, (u_int)buf.st_size);
+               if (cc < 0)
+                       error("read %s: %s", fname, pcap_strerror(errno));
+               if (cc != buf.st_size)
+ error("short read %s (%d != %d)", fname, cc, (int)buf.st_size);

-       close(fd);
+               close(fd);
+               cp[cc] = '\0';
+       }
        /* replace "# comment" with spaces */
        for (i = 0; i < cc; i++) {
                if (cp[i] == '#')
                        while (i < cc && cp[i] != '\n')
                                cp[i++] = ' ';
        }
-       cp[cc] = '\0';
        return (cp);
 }


Here are the "before" stats:

bash-2.05b$ echo `./genfilter.sh 200 | timing ./tcpdump -d -F - | wc -l` BPF instructions
        0.49 real         0.36 user         0.02 sys
       133  average unshared stack size
552 BPF instructions

bash-2.05b$ echo `./genfilter.sh 2000 | timing ./tcpdump -d -F - | wc -l` BPF instructions
      101.74 real        90.56 user         1.77 sys
       769  average unshared stack size
7751 BPF instructions

And "after":

bash-2.05b$ echo `./genfilter.sh 200 | timing ./tcpdump.new -d -F - | wc -l` BPF instructions
        0.49 real         0.39 user         0.00 sys
       120  average unshared stack size
552 BPF instructions

bash-2.05b$ echo `./genfilter.sh 2000 | timing ./tcpdump.new -d -F - | wc -l` BPF instructions
       154  average unshared stack size
7752 BPF instructions

(The one additional BPF instruction is due to a different order of the final ret #0 and ret #96 instructions, and one additional ja extended branch instruction needed as a result).

The actual patch to optimize.c is attached.

@alex
Index: optimize.c
===================================================================
RCS file: /tcpdump/master/libpcap/optimize.c,v
retrieving revision 1.86
diff -u -w -r1.86 optimize.c
--- optimize.c  31 Jul 2005 17:58:24 -0000      1.86
+++ optimize.c  29 Dec 2005 22:58:21 -0000
@@ -217,23 +217,31 @@
 find_levels_r(b)
        struct block *b;
 {
+       struct block *q;
+       int i, j;
        int level;
 
-       if (isMarked(b))
-               return;
-
-       Mark(b);
-       b->link = 0;
+       /* iterate first, marking as we go, then re-iterate with recursion */
+       for (q = b, i = 0; q != NULL && !isMarked(q); i++) {
+               Mark(q);
+               q->link = 0;
+               q = JF(q);
+       }
 
-       if (JT(b)) {
-               find_levels_r(JT(b));
-               find_levels_r(JF(b));
-               level = MAX(JT(b)->level, JF(b)->level) + 1;
+       for (;i > 0; i--) {
+               q = b;
+               for (j = i - 1; j > 0; j--) {
+                       q = JF(q);
+               }
+               if (JT(q)) {
+                       find_levels_r(JT(q));
+                       level = MAX(JT(q)->level, JF(q)->level) + 1;
        } else
                level = 0;
-       b->level = level;
-       b->link = levels[level];
-       levels[level] = b;
+               q->level = level;
+               q->link = levels[level];
+               levels[level] = q;
+       }
 }
 
 /*
@@ -1906,32 +1914,47 @@
 count_blocks(p)
        struct block *p;
 {
-       if (p == 0 || isMarked(p))
-               return 0;
-       Mark(p);
-       return count_blocks(JT(p)) + count_blocks(JF(p)) + 1;
+       struct block *q;
+       int i;
+       int n;
+
+       /* iterate first, marking as we go, then re-iterate with recursion */
+       for (q = p, i = 0; q != NULL && !isMarked(q); i++) {
+               Mark(q);
+               q = JF(q);
+       }
+       n = 0;
+       for (q = p; i > 0; i--) {
+               n += count_blocks(JT(q)) + 1;
+               q = JF(q);
+       }
+       return n;
 }
 
 /*
- * Do a depth first search on the flow graph, numbering the
- * the basic blocks, and entering them into the 'blocks' array.`
+ * Do a search on the flow graph, numbering the basic blocks,
+ * and entering them into the 'blocks' array.`
  */
 static void
 number_blks_r(p)
        struct block *p;
 {
+       struct block *q;
+       int i;
        int n;
 
-       if (p == 0 || isMarked(p))
-               return;
-
-       Mark(p);
+       /* iterate first, marking as we go, then re-iterate with recursion */
+       for (q = p, i = 0; q != NULL && !isMarked(q); i++) {
        n = n_blocks++;
-       p->id = n;
-       blocks[n] = p;
-
-       number_blks_r(JT(p));
-       number_blks_r(JF(p));
+               q->id = n;
+               blocks[n] = q;
+               Mark(q);
+               q = JF(q);
+       }
+       for (q = p; i > 0; i--) {
+               number_blks_r(JT(q));
+               q = JF(q);
+       }
 }
 
 /*
@@ -2235,6 +2258,25 @@
 {
        int n;
        struct bpf_insn *fp;
+       int iter;
+
+       /*
+        * Recursion is simpler (and faster) than iteration but tends to blow
+        * out the stack if the depth is excessive; if the depth exceeds some
+        * threshold (empirically set to 500), we will use iteration instead.
+        */
+       iter = (n_blocks != 0 && root->level > 500);
+
+       if (iter) {
+           /*
+            * The number of levels is bounded by the number of nodes.
+            */
+           levels = (struct block **)malloc(n_blocks * sizeof(*levels));
+           if (levels == NULL)
+               root->level = 0;
+           else
+               find_levels(root);
+       }
 
        /*
         * Loop doing convert_code_r() until no branches remain
@@ -2252,11 +2294,33 @@
            ftail = fp + n;
 
            unMarkAll();
+           if (iter)
+           {
+               int i;
+               int x;
+               struct block *b;
+
+               for (i = 0; i < root->level; i++) {
+                   x = 0;
+                   for (b = levels[i]; b; b = b->link) {
+                       if (!convert_code_r(b))
+                           x = 1;
+                   }
+                   if (x)                      /* some offset too large */
+                       break;
+               }
+
+               if (i != root->level)           /* didn't reach root level */
+                   continue;
+           }
            if (convert_code_r(root))
                break;
            free(fp);
        }
 
+       if (iter)
+           free((void *)levels);
+
        return fp;
 }
 
-
This is the tcpdump-workers list.
Visit https://lists.sandelman.ca/ to unsubscribe.

Current thread: