tcpdump mailing list archives
optimizing large filters without blowing out stack
From: Alexander Dupuy <dupuy () cs columbia edu>
Date: Thu, 29 Dec 2005 18:03:11 -0500
The current libpcap optimization functions rely heavily on recursive functions; when optimizing very large filters, this can cause core dumps due to excessive stack usage in some worst-case scenarios.
The attached patch replaces a few of the "deepest" recursive functions with implementations that use iteration (or in cases where there are two branches that must be followed, a combination of iteration and recursion) that dramatically reduces the stack usage and allows much larger filters to be constructed without segmentation violations.
First, a bit of shell code to construct some large filters for testing: $ cat genfilter.sh #!/bin/sh if [ $# -ne 1 ]; then echo 1>&2 "Usage: $0 filter-size"; exit 1 fi echo -n "ip and (" N=$1 while [ 1 -lt "$N" ] do # for some shells, you may need to remove the \ for correct parsing A4=$(( 2 \* \( $N % 128 \) )) A3=$(( \( $N / 128 \) % 256 )) A2=$(( \( $N / \( 128 \* 256 \) \) % 256 )) A1=$(( 10 + \( $N / 8388608 \) )) echo -n "host $A1.$A2.$A3.$A4 or " N=$(( $N - 1 )) done echo "host 10.0.0.1)"(These filters actually don't illustrate the worst-case stack recursion that I was seeing when I ran into this problem originally, perhaps because of length of JT vs. JF chains, but they do allow for some illustrative scaling.)
Then a patch to tcpdump to allow it to read a filter from stdin (these large filters exceed the maximum argument length on a BSD system):
RCS file: /tcpdump/master/tcpdump/util.c,v retrieving revision 1.104 diff -u -r1.104 util.c --- util.c 13 Dec 2005 08:47:10 -0000 1.104 +++ util.c 29 Dec 2005 21:13:42 -0000 @@ -495,31 +495,50 @@ register char *cp; struct stat buf; - fd = open(fname, O_RDONLY|O_BINARY); - if (fd < 0) - error("can't open %s: %s", fname, pcap_strerror(errno)); - - if (fstat(fd, &buf) < 0) - error("can't stat %s: %s", fname, pcap_strerror(errno)); - - cp = malloc((u_int)buf.st_size + 1); - if (cp == NULL) - error("malloc(%d) for %s: %s", (u_int)buf.st_size + 1, - fname, pcap_strerror(errno)); - cc = read(fd, cp, (u_int)buf.st_size); - if (cc < 0) - error("read %s: %s", fname, pcap_strerror(errno)); - if (cc != buf.st_size) - error("short read %s (%d != %d)", fname, cc, (int)buf.st_size); + if (!strcmp(fname, "-")) { + i = 0; + fd = BUFSIZ + 1; + cp = malloc(fd); + if (cp == NULL) + error("malloc(%d) for stdin: %s", (u_int)fd, + pcap_strerror(errno)); + while ((cc = read(0, &cp[i], fd - i - 1)) > 0) { + i += cc; + fd += cc; + cp = realloc(cp, fd); + if (cp == NULL) + error("realloc(%d) for stdin: %s", (u_int)fd, + pcap_strerror(errno)); + cp[i] = '\0'; + } + } + else { + fd = open(fname, O_RDONLY|O_BINARY); + if (fd < 0) + error("can't open %s: %s", fname, pcap_strerror(errno)); + + if (fstat(fd, &buf) < 0) + error("can't stat %s: %s", fname, pcap_strerror(errno)); + + cp = malloc((u_int)buf.st_size + 1); + if (cp == NULL) + error("malloc(%d) for %s: %s", (u_int)buf.st_size + 1, + fname, pcap_strerror(errno)); + cc = read(fd, cp, (u_int)buf.st_size); + if (cc < 0) + error("read %s: %s", fname, pcap_strerror(errno)); + if (cc != buf.st_size)+ error("short read %s (%d != %d)", fname, cc, (int)buf.st_size);
- close(fd); + close(fd); + cp[cc] = '\0'; + } /* replace "# comment" with spaces */ for (i = 0; i < cc; i++) { if (cp[i] == '#') while (i < cc && cp[i] != '\n') cp[i++] = ' '; } - cp[cc] = '\0'; return (cp); } Here are the "before" stats:bash-2.05b$ echo `./genfilter.sh 200 | timing ./tcpdump -d -F - | wc -l` BPF instructions
0.49 real 0.36 user 0.02 sys 133 average unshared stack size 552 BPF instructionsbash-2.05b$ echo `./genfilter.sh 2000 | timing ./tcpdump -d -F - | wc -l` BPF instructions
101.74 real 90.56 user 1.77 sys 769 average unshared stack size 7751 BPF instructions And "after":bash-2.05b$ echo `./genfilter.sh 200 | timing ./tcpdump.new -d -F - | wc -l` BPF instructions
0.49 real 0.39 user 0.00 sys 120 average unshared stack size 552 BPF instructionsbash-2.05b$ echo `./genfilter.sh 2000 | timing ./tcpdump.new -d -F - | wc -l` BPF instructions
154 average unshared stack size 7752 BPF instructions(The one additional BPF instruction is due to a different order of the final ret #0 and ret #96 instructions, and one additional ja extended branch instruction needed as a result).
The actual patch to optimize.c is attached. @alex
Index: optimize.c =================================================================== RCS file: /tcpdump/master/libpcap/optimize.c,v retrieving revision 1.86 diff -u -w -r1.86 optimize.c --- optimize.c 31 Jul 2005 17:58:24 -0000 1.86 +++ optimize.c 29 Dec 2005 22:58:21 -0000 @@ -217,23 +217,31 @@ find_levels_r(b) struct block *b; { + struct block *q; + int i, j; int level; - if (isMarked(b)) - return; - - Mark(b); - b->link = 0; + /* iterate first, marking as we go, then re-iterate with recursion */ + for (q = b, i = 0; q != NULL && !isMarked(q); i++) { + Mark(q); + q->link = 0; + q = JF(q); + } - if (JT(b)) { - find_levels_r(JT(b)); - find_levels_r(JF(b)); - level = MAX(JT(b)->level, JF(b)->level) + 1; + for (;i > 0; i--) { + q = b; + for (j = i - 1; j > 0; j--) { + q = JF(q); + } + if (JT(q)) { + find_levels_r(JT(q)); + level = MAX(JT(q)->level, JF(q)->level) + 1; } else level = 0; - b->level = level; - b->link = levels[level]; - levels[level] = b; + q->level = level; + q->link = levels[level]; + levels[level] = q; + } } /* @@ -1906,32 +1914,47 @@ count_blocks(p) struct block *p; { - if (p == 0 || isMarked(p)) - return 0; - Mark(p); - return count_blocks(JT(p)) + count_blocks(JF(p)) + 1; + struct block *q; + int i; + int n; + + /* iterate first, marking as we go, then re-iterate with recursion */ + for (q = p, i = 0; q != NULL && !isMarked(q); i++) { + Mark(q); + q = JF(q); + } + n = 0; + for (q = p; i > 0; i--) { + n += count_blocks(JT(q)) + 1; + q = JF(q); + } + return n; } /* - * Do a depth first search on the flow graph, numbering the - * the basic blocks, and entering them into the 'blocks' array.` + * Do a search on the flow graph, numbering the basic blocks, + * and entering them into the 'blocks' array.` */ static void number_blks_r(p) struct block *p; { + struct block *q; + int i; int n; - if (p == 0 || isMarked(p)) - return; - - Mark(p); + /* iterate first, marking as we go, then re-iterate with recursion */ + for (q = p, i = 0; q != NULL && !isMarked(q); i++) { n = n_blocks++; - p->id = n; - blocks[n] = p; - - number_blks_r(JT(p)); - number_blks_r(JF(p)); + q->id = n; + blocks[n] = q; + Mark(q); + q = JF(q); + } + for (q = p; i > 0; i--) { + number_blks_r(JT(q)); + q = JF(q); + } } /* @@ -2235,6 +2258,25 @@ { int n; struct bpf_insn *fp; + int iter; + + /* + * Recursion is simpler (and faster) than iteration but tends to blow + * out the stack if the depth is excessive; if the depth exceeds some + * threshold (empirically set to 500), we will use iteration instead. + */ + iter = (n_blocks != 0 && root->level > 500); + + if (iter) { + /* + * The number of levels is bounded by the number of nodes. + */ + levels = (struct block **)malloc(n_blocks * sizeof(*levels)); + if (levels == NULL) + root->level = 0; + else + find_levels(root); + } /* * Loop doing convert_code_r() until no branches remain @@ -2252,11 +2294,33 @@ ftail = fp + n; unMarkAll(); + if (iter) + { + int i; + int x; + struct block *b; + + for (i = 0; i < root->level; i++) { + x = 0; + for (b = levels[i]; b; b = b->link) { + if (!convert_code_r(b)) + x = 1; + } + if (x) /* some offset too large */ + break; + } + + if (i != root->level) /* didn't reach root level */ + continue; + } if (convert_code_r(root)) break; free(fp); } + if (iter) + free((void *)levels); + return fp; }
- This is the tcpdump-workers list. Visit https://lists.sandelman.ca/ to unsubscribe.
Current thread:
- optimizing large filters without blowing out stack Alexander Dupuy (Jan 10)