tcpdump mailing list archives

[Patch] tcpdump probabilistic sampling


From: Jesse Kempf <jkempf () davisvision com>
Date: Tue, 01 Apr 2008 20:19:05 -0400

Hi,
So tcpdump tends to jam up the terminal a bit when you try to dump on a saturated gigabit link. I've added a -P option to tcpdump that lets you specify a probability for tcpdump to print each packet. It uses drand48() to figure out whether each packet captured should be printed. Obviously this isn't the same thing as saying "print every Nth packet" since this is a Bernoulli process and the expected value of the number of printed packets is different.

Also, I hacked up the print_packet function, so this only works for parse and print mode.

Cheers,
-Jesse Kempf



------------------------------------------------------------------------
The information contained in this communication is intended
only for the use of the recipient(s) named above. It may
contain information that is privileged or confidential, and
may be protected by State and/or Federal Regulations. If
the reader of this message is not the intended recipient,
you are hereby notified that any dissemination,
distribution, or copying of this communication, or any of
its contents, is strictly prohibited. If you have received
this communication in error, please return it to the sender
immediately and delete the original message and any copy
of it from your computer system. If you have any questions
concerning this message, please contact the sender.
------------------------------------------------------------------------

Index: tcpdump.1
===================================================================
--- tcpdump.1   (.../vendor/tcpdump)    (revision 375)
+++ tcpdump.1   (.../utils/tcpdump)     (revision 382)
@@ -83,6 +83,10 @@
 .B \-W
 .I filecount
 ]
+[
+.B \-P
+.I probability
+]
 .br
 .ti +8
 [
@@ -555,6 +559,12 @@
 the files with enough leading 0s to support the maximum number of
 files, allowing them to sort correctly.
 .TP
+.B \-P
+When parsing and printing, set probabilistic dump mode. For each packet
+captured, the specified
+\fIprobability\fR is used to determine if it is printed.
+The value for \fIprobability\fR must lie in the range (0.0, 1.0].
+.TP
 .B \-x
 When parsing and printing,
 in addition to printing the headers of each packet, print the data of
Index: tcpdump.c
===================================================================
--- tcpdump.c   (.../vendor/tcpdump)    (revision 375)
+++ tcpdump.c   (.../utils/tcpdump)     (revision 382)
@@ -62,6 +62,7 @@
 #include <smi.h>
 #endif
 
+#include <sys/time.h>
 #include <pcap.h>
 #include <signal.h>
 #include <stdio.h>
@@ -294,6 +295,7 @@
 
 struct print_info {
        if_printer printer;
+       double print_probability;
 };
 
 struct dump_info {
@@ -501,7 +503,7 @@
 
        opterr = 0;
        while (
-           (op = getopt(argc, argv, "aA" B_FLAG "c:C:d" D_FLAG "eE:fF:i:lLm:M:nNOpqr:Rs:StT:u" U_FLAG "vw:W:xXy:YZ:")) 
!= -1)
+           (op = getopt(argc, argv, "aA" B_FLAG "c:C:d" D_FLAG "eE:fF:i:lLm:M:nNOpP:qr:Rs:StT:u" U_FLAG 
"vw:W:xXy:YZ:")) != -1)
                switch (op) {
 
                case 'a':
@@ -668,6 +670,24 @@
                        ++pflag;
                        break;
 
+               case 'P': {
+                       struct timeval tv;
+                       double pp;
+                       char *end;
+
+                       pp = strtod(optarg, &end);
+                       if (optarg == end || *end != '\0'
+                           || pp <= 0.0 || pp > 1.0)
+                               error("Invalid sample probability %s", optarg);
+                       else
+                               printinfo.print_probability = pp;
+
+                       /* Seed the RNG from the current time */
+                       gettimeofday(&tv, NULL);
+                       srand48(tv.tv_sec);
+
+                       break;
+               }
                case 'q':
                        ++qflag;
                        ++suppress_default_print;
@@ -1227,14 +1247,21 @@
 {
        struct print_info *print_info;
        u_int hdrlen;
+       double randval;
 
        ++packets_captured;
 
        ++infodelay;
-       ts_print(&h->ts);
 
        print_info = (struct print_info *)user;
 
+       if (print_info->print_probability > 0) {
+               randval = drand48();
+               if (randval > print_info->print_probability)
+                       return;
+       }
+
+       ts_print(&h->ts);
        /*
         * Some printers want to check that they're not walking off the
         * end of the packet.
@@ -1416,6 +1443,8 @@
        (void)fprintf(stderr,
 "\t\t[ -E algo:secret ] [ -F file ] [ -i interface ] [ -M secret ]\n");
        (void)fprintf(stderr,
+"\t\t[ -P probability ]\n");
+       (void)fprintf(stderr,
 "\t\t[ -r file ] [ -s snaplen ] [ -T type ] [ -w file ]\n");
        (void)fprintf(stderr,
 "\t\t[ -W filecount ] [ -y datalinktype ] [ -Z user ]\n");
-
This is the tcpdump-workers list.
Visit https://cod.sandelman.ca/ to unsubscribe.

Current thread: