tcpdump mailing list archives

[PATCH][RFC] fix BPF filter for VLAN tagged packets


From: Ani Sinha <ani () aristanetworks com>
Date: Sun, 6 Jan 2013 11:45:21 -0800 (PST)

Hi folks :

This is the first cut of my patch that tries to fix the BPF filter code
generation issue. Please note that this is the first time I have hacked
libpcap, so I might be off in terms of code philosophy/organization of
code etc. This change has been tested by me and independently by another
grad student at UC Berkeley (CC'd here) although none of us have yet done
a very thorough testing of all the aspects. I am sending it out to the
community so that others too can give it a try and report fixes. I will
also give it more testing in the coming weeks.

The description below summarizes the patch. I'd appreciate all the
comments that I can get on this issue.

Thanks,
ani


Fix BPF filter for VLAN tagged packets

 - Linux kernel no longer puts (outermost) vlan tags within the packet but in packet metadata.
   Use special ANC negative offsets when generating the filter code to check if it's a vlan tagged packet
   and whether the vlan ID matches that of the one specified in the filter expression.
 - When executing the filter code in userland, take care of these special ANC values - use the vlan tag extracted from
   the packet metadata for comparison.
 - For saving the captured packets, re-insert the vlan tags back into the packet. That way, both old and new pcap files
   will have vlan tags inserted into the packet itself in the pcap files. The behaviour would not change between old 
and new
   kernels.
 - For older Linux kernels that had the vlan tags as a part of the packet and those kernels that did not have the 
special ANC opcodes
   for vlan tag access, use the old mechanism - generate filter code that looks into the packet offset for vlan tags.
 - For all platforms other than Linux, do not change the current behaviour.

What this patch does not handle :

When 'vlan' keyword is not specified in the filter expression, the filter code generator does not by default add a 'not 
filter'
expresion so that only untagged packets pass the filter. So for example, an expression like 'tcp port 80' will also 
include
vlan tagged packets. We need to handle this in a future patch.

Signed-off-by: Ani Sinha <ani () aristanetworks com>

diff --git a/Makefile.in b/Makefile.in
index 772cc7d..c030a7e 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -82,7 +82,7 @@ YACC = @V_YACC@
        @rm -f $@
        $(CC) $(FULL_CFLAGS) -c $(srcdir)/$*.c

-PSRC = pcap-@V_PCAP@.c @USB_SRC@ @BT_SRC@ @CAN_SRC@ @NETFILTER_SRC@ @CANUSB_SRC@
+PSRC = pcap-@V_PCAP@.c @USB_SRC@ @BT_SRC@ @CAN_SRC@ @NETFILTER_SRC@ @CANUSB_SRC@ @BPF_FILTER_P@
 FSRC =  fad-@V_FINDALLDEVS@.c
 SSRC =  @SSRC@
 CSRC = pcap.c inet.c gencode.c optimize.c nametoaddr.c etherent.c \
@@ -301,6 +301,7 @@ EXTRA_DIST = \
        pcap-int.h \
        pcap-libdlpi.c \
        pcap-linux.c \
+       bpf_filter_linux.c \
        pcap-namedb.h \
        pcap-netfilter-linux.c \
        pcap-netfilter-linux.h \
diff --git a/bpf_filter_linux.c b/bpf_filter_linux.c
new file mode 100644
index 0000000..1d5d70f
--- /dev/null
+++ b/bpf_filter_linux.c
@@ -0,0 +1,571 @@
+/*-
+ * Copyright (c) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from the Stanford/CMU enet packet filter,
+ * (net/enet.c) distributed as part of 4.3BSD, and code contributed
+ * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
+ * Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *     @(#)bpf.c       7.5 (Berkeley) 7/15/91
+ */
+
+#if !(defined(lint) || defined(KERNEL) || defined(_KERNEL))
+static const char rcsid[] _U_ =
+    "@(#) $Header: /tcpdump/master/libpcap/bpf_filter_linux.c,v 1.46 2008-01-02 04:16:46 guy Exp $ (LBL)";
+#endif
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#if HAVE_INTTYPES_H
+#include <inttypes.h>
+#elif HAVE_STDINT_H
+#include <stdint.h>
+#endif
+#ifdef HAVE_SYS_BITYPES_H
+#include <sys/bitypes.h>
+#endif
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/time.h>
+
+#define        SOLARIS (defined(sun) && (defined(__SVR4) || defined(__svr4__)))
+#if defined(__hpux) || SOLARIS
+# include <sys/sysmacros.h>
+# include <sys/stream.h>
+# define       mbuf    msgb
+# define       m_next  b_cont
+# define       MLEN(m) ((m)->b_wptr - (m)->b_rptr)
+# define       mtod(m,t)       ((t)(m)->b_rptr)
+#else /* defined(__hpux) || SOLARIS */
+# define       MLEN(m) ((m)->m_len)
+#endif /* defined(__hpux) || SOLARIS */
+
+#include <pcap/bpf.h>
+#include <linux/filter.h>
+#include <linux/if_packet.h>
+
+#if !defined(KERNEL) && !defined(_KERNEL)
+#include <stdlib.h>
+#endif
+
+#define int32 bpf_int32
+#define u_int32 bpf_u_int32
+
+#ifndef LBL_ALIGN
+/*
+ * XXX - IA-64?  If not, this probably won't work on Win64 IA-64
+ * systems, unless LBL_ALIGN is defined elsewhere for them.
+ * XXX - SuperH?  If not, this probably won't work on WinCE SuperH
+ * systems, unless LBL_ALIGN is defined elsewhere for them.
+ */
+#if defined(sparc) || defined(__sparc__) || defined(mips) || \
+    defined(ibm032) || defined(__alpha) || defined(__hpux) || \
+    defined(__arm__)
+#define LBL_ALIGN
+#endif
+#endif
+
+#ifndef LBL_ALIGN
+#include <netinet/in.h>
+
+#define EXTRACT_SHORT(p)       ((u_short)ntohs(*(u_short *)p))
+#define EXTRACT_LONG(p)                (ntohl(*(u_int32 *)p))
+#else
+#define EXTRACT_SHORT(p)\
+       ((u_short)\
+               ((u_short)*((u_char *)p+0)<<8|\
+                (u_short)*((u_char *)p+1)<<0))
+#define EXTRACT_LONG(p)\
+               ((u_int32)*((u_char *)p+0)<<24|\
+                (u_int32)*((u_char *)p+1)<<16|\
+                (u_int32)*((u_char *)p+2)<<8|\
+                (u_int32)*((u_char *)p+3)<<0)
+#endif
+
+#if defined(KERNEL) || defined(_KERNEL)
+# if !defined(__hpux) && !SOLARIS
+#include <sys/mbuf.h>
+# endif
+#define MINDEX(len, _m, _k) \
+{ \
+       len = MLEN(m); \
+       while ((_k) >= len) { \
+               (_k) -= len; \
+               (_m) = (_m)->m_next; \
+               if ((_m) == 0) \
+                       return 0; \
+               len = MLEN(m); \
+       } \
+}
+
+static int
+m_xword(m, k, err)
+       register struct mbuf *m;
+       register int k, *err;
+{
+       register int len;
+       register u_char *cp, *np;
+       register struct mbuf *m0;
+
+       MINDEX(len, m, k);
+       cp = mtod(m, u_char *) + k;
+       if (len - k >= 4) {
+               *err = 0;
+               return EXTRACT_LONG(cp);
+       }
+       m0 = m->m_next;
+       if (m0 == 0 || MLEN(m0) + len - k < 4)
+               goto bad;
+       *err = 0;
+       np = mtod(m0, u_char *);
+       switch (len - k) {
+
+       case 1:
+               return (cp[0] << 24) | (np[0] << 16) | (np[1] << 8) | np[2];
+
+       case 2:
+               return (cp[0] << 24) | (cp[1] << 16) | (np[0] << 8) | np[1];
+
+       default:
+               return (cp[0] << 24) | (cp[1] << 16) | (cp[2] << 8) | np[0];
+       }
+    bad:
+       *err = 1;
+       return 0;
+}
+
+static int
+m_xhalf(m, k, err)
+       register struct mbuf *m;
+       register int k, *err;
+{
+       register int len;
+       register u_char *cp;
+       register struct mbuf *m0;
+
+       MINDEX(len, m, k);
+       cp = mtod(m, u_char *) + k;
+       if (len - k >= 2) {
+               *err = 0;
+               return EXTRACT_SHORT(cp);
+       }
+       m0 = m->m_next;
+       if (m0 == 0)
+               goto bad;
+       *err = 0;
+       return (cp[0] << 8) | mtod(m0, u_char *)[0];
+ bad:
+       *err = 1;
+       return 0;
+}
+#endif /* KERNEL or _KERNEL */
+
+enum {
+       /* Ancillary data */
+       BPF_S_ANC_NONE,
+       BPF_S_ANC_PROTOCOL,
+       BPF_S_ANC_PKTTYPE,
+       BPF_S_ANC_IFINDEX,
+       BPF_S_ANC_NLATTR,
+       BPF_S_ANC_NLATTR_NEST,
+       BPF_S_ANC_MARK,
+       BPF_S_ANC_QUEUE,
+       BPF_S_ANC_HATYPE,
+       BPF_S_ANC_RXHASH,
+       BPF_S_ANC_CPU,
+       BPF_S_ANC_ALU_XOR_X,
+       BPF_S_ANC_SECCOMP_LD_W,
+       BPF_S_ANC_VLAN_TAG,
+       BPF_S_ANC_VLAN_TAG_PRESENT,
+};
+
+/*
+ * Execute the filter program starting at pc on the packet p
+ * wirelen is the length of the original packet
+ * buflen is the amount of data present
+ * For the kernel, p is assumed to be a pointer to an mbuf if buflen is 0,
+ * in all other cases, p is a pointer to a buffer and buflen is its size.
+ */
+u_int
+bpf_filter_linux(pc, p, tp_vlan_tci, wirelen, buflen)
+       register const struct bpf_insn *pc;
+       register const u_char *p;
+        u_int16_t tp_vlan_tci;
+       u_int wirelen;
+       register u_int buflen;
+{
+       register u_int32 A, X;
+       register int k;
+       int32 mem[BPF_MEMWORDS];
+#if defined(KERNEL) || defined(_KERNEL)
+       struct mbuf *m, *n;
+       int merr, len;
+
+       if (buflen == 0) {
+               m = (struct mbuf *)p;
+               p = mtod(m, u_char *);
+               buflen = MLEN(m);
+       } else
+               m = NULL;
+#endif
+
+       if (pc == 0)
+               /*
+                * No filter means accept all.
+                */
+               return (u_int)-1;
+       A = 0;
+       X = 0;
+       --pc;
+       while (1) {
+               ++pc;
+               switch (pc->code) {
+
+               default:
+#if defined(KERNEL) || defined(_KERNEL)
+                       return 0;
+#else
+                       abort();
+#endif
+               case BPF_RET|BPF_K:
+                       return (u_int)pc->k;
+
+               case BPF_RET|BPF_A:
+                       return (u_int)A;
+
+               case BPF_LD|BPF_W|BPF_ABS:
+                       k = pc->k;
+                       if (k + sizeof(int32) > buflen) {
+#if defined(KERNEL) || defined(_KERNEL)
+                               if (m == NULL)
+                                       return 0;
+                               A = m_xword(m, k, &merr);
+                               if (merr != 0)
+                                       return 0;
+                               continue;
+#else
+                               return 0;
+#endif
+                       }
+                       A = EXTRACT_LONG(&p[k]);
+                       continue;
+
+               case BPF_LD|BPF_H|BPF_ABS:
+                       k = pc->k;
+                       if (k + sizeof(short) > buflen) {
+#if defined(KERNEL) || defined(_KERNEL)
+                               if (m == NULL)
+                                       return 0;
+                               A = m_xhalf(m, k, &merr);
+                               if (merr != 0)
+                                       return 0;
+                               continue;
+#else
+                               return 0;
+#endif
+                       }
+                       A = EXTRACT_SHORT(&p[k]);
+                       continue;
+
+               case BPF_LD|BPF_B|BPF_ABS:
+               {
+#if defined(SKF_AD_VLAN_TAG) && defined(SKF_AD_VLAN_TAG_PRESENT)
+                       int code = BPF_S_ANC_NONE;
+#define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE:       \
+                               code = BPF_S_ANC_##CODE;        \
+                               break
+                       switch (pc->k) {
+/*                     ANCILLARY(PROTOCOL);
+                       ANCILLARY(PKTTYPE);
+                       ANCILLARY(IFINDEX);
+                       ANCILLARY(NLATTR);
+                       ANCILLARY(NLATTR_NEST);
+                       ANCILLARY(MARK);
+                       ANCILLARY(QUEUE);
+                       ANCILLARY(HATYPE);
+                       ANCILLARY(RXHASH);
+                       ANCILLARY(CPU);
+                       ANCILLARY(ALU_XOR_X); */
+                       ANCILLARY(VLAN_TAG);
+                       ANCILLARY(VLAN_TAG_PRESENT);
+                        default :
+#endif
+                          k = pc->k;
+                          if (k >= buflen) {
+#if defined(KERNEL) || defined(_KERNEL)
+                             if (m == NULL)
+                                return 0;
+                             n = m;
+                             MINDEX(len, n, k);
+                             A = mtod(n, u_char *)[k];
+                             continue;
+#else
+                             return 0;
+#endif
+                          }
+                          A = p[k];
+#if defined(SKF_AD_VLAN_TAG) && defined(SKF_AD_VLAN_TAG_PRESENT)
+                       }
+                       switch (code) {
+                       case BPF_S_ANC_VLAN_TAG:
+                               A = tp_vlan_tci & ~TP_STATUS_VLAN_VALID;
+                               break;
+
+                       case BPF_S_ANC_VLAN_TAG_PRESENT:
+                               A = !!(tp_vlan_tci & TP_STATUS_VLAN_VALID);
+                               break;
+                       }
+#endif
+                       continue;
+               }
+               case BPF_LD|BPF_W|BPF_LEN:
+                       A = wirelen;
+                       continue;
+
+               case BPF_LDX|BPF_W|BPF_LEN:
+                       X = wirelen;
+                       continue;
+
+               case BPF_LD|BPF_W|BPF_IND:
+                       k = X + pc->k;
+                       if (k + sizeof(int32) > buflen) {
+#if defined(KERNEL) || defined(_KERNEL)
+                               if (m == NULL)
+                                       return 0;
+                               A = m_xword(m, k, &merr);
+                               if (merr != 0)
+                                       return 0;
+                               continue;
+#else
+                               return 0;
+#endif
+                       }
+                       A = EXTRACT_LONG(&p[k]);
+                       continue;
+
+               case BPF_LD|BPF_H|BPF_IND:
+                       k = X + pc->k;
+                       if (k + sizeof(short) > buflen) {
+#if defined(KERNEL) || defined(_KERNEL)
+                               if (m == NULL)
+                                       return 0;
+                               A = m_xhalf(m, k, &merr);
+                               if (merr != 0)
+                                       return 0;
+                               continue;
+#else
+                               return 0;
+#endif
+                       }
+                       A = EXTRACT_SHORT(&p[k]);
+                       continue;
+
+               case BPF_LD|BPF_B|BPF_IND:
+                       k = X + pc->k;
+                       if (k >= buflen) {
+#if defined(KERNEL) || defined(_KERNEL)
+                               if (m == NULL)
+                                       return 0;
+                               n = m;
+                               MINDEX(len, n, k);
+                               A = mtod(n, u_char *)[k];
+                               continue;
+#else
+                               return 0;
+#endif
+                       }
+                       A = p[k];
+                       continue;
+
+               case BPF_LDX|BPF_MSH|BPF_B:
+                       k = pc->k;
+                       if (k >= buflen) {
+#if defined(KERNEL) || defined(_KERNEL)
+                               if (m == NULL)
+                                       return 0;
+                               n = m;
+                               MINDEX(len, n, k);
+                               X = (mtod(n, char *)[k] & 0xf) << 2;
+                               continue;
+#else
+                               return 0;
+#endif
+                       }
+                       X = (p[pc->k] & 0xf) << 2;
+                       continue;
+
+               case BPF_LD|BPF_IMM:
+                       A = pc->k;
+                       continue;
+
+               case BPF_LDX|BPF_IMM:
+                       X = pc->k;
+                       continue;
+
+               case BPF_LD|BPF_MEM:
+                       A = mem[pc->k];
+                       continue;
+
+               case BPF_LDX|BPF_MEM:
+                       X = mem[pc->k];
+                       continue;
+
+               case BPF_ST:
+                       mem[pc->k] = A;
+                       continue;
+
+               case BPF_STX:
+                       mem[pc->k] = X;
+                       continue;
+
+               case BPF_JMP|BPF_JA:
+                       pc += pc->k;
+                       continue;
+
+               case BPF_JMP|BPF_JGT|BPF_K:
+                       pc += (A > pc->k) ? pc->jt : pc->jf;
+                       continue;
+
+               case BPF_JMP|BPF_JGE|BPF_K:
+                       pc += (A >= pc->k) ? pc->jt : pc->jf;
+                       continue;
+
+               case BPF_JMP|BPF_JEQ|BPF_K:
+                       pc += (A == pc->k) ? pc->jt : pc->jf;
+                       continue;
+
+               case BPF_JMP|BPF_JSET|BPF_K:
+                       pc += (A & pc->k) ? pc->jt : pc->jf;
+                       continue;
+
+               case BPF_JMP|BPF_JGT|BPF_X:
+                       pc += (A > X) ? pc->jt : pc->jf;
+                       continue;
+
+               case BPF_JMP|BPF_JGE|BPF_X:
+                       pc += (A >= X) ? pc->jt : pc->jf;
+                       continue;
+
+               case BPF_JMP|BPF_JEQ|BPF_X:
+                       pc += (A == X) ? pc->jt : pc->jf;
+                       continue;
+
+               case BPF_JMP|BPF_JSET|BPF_X:
+                       pc += (A & X) ? pc->jt : pc->jf;
+                       continue;
+
+               case BPF_ALU|BPF_ADD|BPF_X:
+                       A += X;
+                       continue;
+
+               case BPF_ALU|BPF_SUB|BPF_X:
+                       A -= X;
+                       continue;
+
+               case BPF_ALU|BPF_MUL|BPF_X:
+                       A *= X;
+                       continue;
+
+               case BPF_ALU|BPF_DIV|BPF_X:
+                       if (X == 0)
+                               return 0;
+                       A /= X;
+                       continue;
+
+               case BPF_ALU|BPF_AND|BPF_X:
+                       A &= X;
+                       continue;
+
+               case BPF_ALU|BPF_OR|BPF_X:
+                       A |= X;
+                       continue;
+
+               case BPF_ALU|BPF_LSH|BPF_X:
+                       A <<= X;
+                       continue;
+
+               case BPF_ALU|BPF_RSH|BPF_X:
+                       A >>= X;
+                       continue;
+
+               case BPF_ALU|BPF_ADD|BPF_K:
+                       A += pc->k;
+                       continue;
+
+               case BPF_ALU|BPF_SUB|BPF_K:
+                       A -= pc->k;
+                       continue;
+
+               case BPF_ALU|BPF_MUL|BPF_K:
+                       A *= pc->k;
+                       continue;
+
+               case BPF_ALU|BPF_DIV|BPF_K:
+                       A /= pc->k;
+                       continue;
+
+               case BPF_ALU|BPF_AND|BPF_K:
+                       A &= pc->k;
+                       continue;
+
+               case BPF_ALU|BPF_OR|BPF_K:
+                       A |= pc->k;
+                       continue;
+
+               case BPF_ALU|BPF_LSH|BPF_K:
+                       A <<= pc->k;
+                       continue;
+
+               case BPF_ALU|BPF_RSH|BPF_K:
+                       A >>= pc->k;
+                       continue;
+
+               case BPF_ALU|BPF_NEG:
+                       A = -A;
+                       continue;
+
+               case BPF_MISC|BPF_TAX:
+                       X = A;
+                       continue;
+
+               case BPF_MISC|BPF_TXA:
+                       A = X;
+                       continue;
+
+               }
+       }
+}
+
+
diff --git a/configure b/configure
index af70be8..60f1f27 100755
--- a/configure
+++ b/configure
@@ -685,6 +685,7 @@ V_DEFS
 V_FINDALLDEVS
 V_INCLS
 V_PCAP
+BPF_FILTER_P
 V_SHLIB_CMD
 V_SHLIB_OPT
 V_SONAME_OPT
@@ -7267,6 +7268,20 @@ fi
 { echo "$as_me:$LINENO: result: $V_PCAP" >&5
 echo "${ECHO_T}$V_PCAP" >&6; }

+
+#
+# add custom filter execution modules
+#
+        { echo "$as_me:$LINENO: adding custom $V_PCAP filter execution module if it exists ..." >&5
+echo "$as_me: adding custom $V_PCAP filter execution module if it exists ..." >&6;}
+case "$V_PCAP" in
+linux)
+       BPF_FILTER_P=bpf_filter_linux.c
+{ echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6; }
+       ;;
+esac
+
 #
 # Do capture-mechanism-dependent tests.
 #
@@ -7913,7 +7928,7 @@ cat >>conftest.$ac_ext <<_ACEOF
 int
 main ()
 {
-u_int i = sizeof(((struct tpacket_auxdata *)0)->tp_vlan_tci)
+__u16 i = sizeof(((struct tpacket_auxdata *)0)->tp_vlan_tci)
   ;
   return 0;
 }
@@ -12575,6 +12590,7 @@ V_DEFS!$V_DEFS$ac_delim
 V_FINDALLDEVS!$V_FINDALLDEVS$ac_delim
 V_INCLS!$V_INCLS$ac_delim
 V_PCAP!$V_PCAP$ac_delim
+BPF_FILTER_P!$BPF_FILTER_P$ac_delim
 V_SHLIB_CMD!$V_SHLIB_CMD$ac_delim
 V_SHLIB_OPT!$V_SHLIB_OPT$ac_delim
 V_SONAME_OPT!$V_SONAME_OPT$ac_delim
@@ -12601,7 +12617,7 @@ INSTALL_DATA!$INSTALL_DATA$ac_delim
 LTLIBOBJS!$LTLIBOBJS$ac_delim
 _ACEOF

-  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 95; then
+  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 96; then
     break
   elif $ac_last_try; then
     { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
diff --git a/configure.in b/configure.in
index 1a92983..b8c26bb 100644
--- a/configure.in
+++ b/configure.in
@@ -341,6 +341,18 @@ fi
 AC_MSG_RESULT($V_PCAP)

 #
+# add custom filter execution modules
+#
+AC_MSG_NOTICE(adding custom $V_PCAP filter execution module if it exists ...)
+case "$V_PCAP" in
+linux)
+       BPF_FILTER_P=bpf_filter_linux.c
+       AC_MSG_RESULT(yes)
+       ;;
+esac
+AC_SUBST(BPF_FILTER_P)
+
+#
 # Do capture-mechanism-dependent tests.
 #
 case "$V_PCAP" in
diff --git a/gencode.c b/gencode.c
index cdb0d34..aa50c0f 100644
--- a/gencode.c
+++ b/gencode.c
@@ -56,6 +56,11 @@ static const char rcsid[] _U_ =
 #include <sys/param.h>
 #endif

+#ifdef __linux__
+#include <linux/filter.h>
+#undef BPF_MAJOR_VERSION
+#endif
+
 #include <netinet/in.h>
 #include <arpa/inet.h>

@@ -135,9 +140,9 @@ static pcap_t *bpf_pcap;

 /* Hack for updating VLAN, MPLS, and PPPoE offsets. */
 #ifdef WIN32
-static u_int   orig_linktype = (u_int)-1, orig_nl = (u_int)-1, label_stack_depth = (u_int)-1;
+static u_int   orig_linktype = (u_int)-1, orig_nl = (u_int)-1, label_stack_depth = (u_int)-1, vlan_stack_depth = 
(u_int)-1;
 #else
-static u_int   orig_linktype = -1U, orig_nl = -1U, label_stack_depth = -1U;
+static u_int   orig_linktype = -1U, orig_nl = -1U, label_stack_depth = -1U, vlan_stack_depth = -1U;
 #endif

 /* XXX */
@@ -964,6 +969,7 @@ init_linktype(p)
        orig_linktype = -1;
        orig_nl = -1;
         label_stack_depth = 0;
+       vlan_stack_depth = 0;

        reg_off_ll = -1;
        reg_off_macpl = -1;
@@ -7904,28 +7910,59 @@ gen_vlan(vlan_num)
        case DLT_EN10MB:
        case DLT_NETANALYZER:
        case DLT_NETANALYZER_TRANSPARENT:
-               /* check for VLAN, including QinQ */
-               b0 = gen_cmp(OR_LINK, off_linktype, BPF_H,
-                   (bpf_int32)ETHERTYPE_8021Q);
-               b1 = gen_cmp(OR_LINK, off_linktype, BPF_H,
-                   (bpf_int32)ETHERTYPE_8021QINQ);
-               gen_or(b0,b1);
-               b0 = b1;
-
-               /* If a specific VLAN is requested, check VLAN id */
-               if (vlan_num >= 0) {
-                       b1 = gen_mcmp(OR_MACPL, 0, BPF_H,
-                           (bpf_int32)vlan_num, 0x0fff);
-                       gen_and(b0, b1);
+#if defined(SKF_AD_VLAN_TAG) && defined(SKF_AD_VLAN_TAG_PRESENT)
+               /* vlan_stack_depth keeps track of the first tag which is in the
+                * packet metadata. The second tag onwards is within the packet data
+                * itself and should work exactly like before
+                */
+               if (bpf_pcap->vlan_tag_in_pkt_meta_op &&
+                   bpf_pcap->vlan_tag_in_pkt_meta_op(bpf_pcap) &&
+                   !vlan_stack_depth)
+               {
+                       /* generate new filter code based on extracting packet
+                        * metadata */
+                       struct slist *s;
+                       s = new_stmt(BPF_LD|BPF_B|BPF_ABS);
+                       s->s.k = SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT;
+                       b0 = new_block(JMP(BPF_JEQ));
+                       b0->stmts = s;
+                       b0->s.k = 1; //true
+
+                       if (vlan_num >= 0) {
+                               s = new_stmt(BPF_LD|BPF_B|BPF_ABS);
+                               s->s.k = SKF_AD_OFF + SKF_AD_VLAN_TAG;
+                               b1 = new_block(JMP(BPF_JEQ));
+                               b1->stmts = s;
+                               b1->s.k = (bpf_int32)vlan_num;
+                               gen_and(b0,b1);
+                               b0 = b1;
+                       }
+               } else
+#endif
+               {
+                       /* check for VLAN, including QinQ */
+                       b0 = gen_cmp(OR_LINK, off_linktype, BPF_H,
+                                    (bpf_int32)ETHERTYPE_8021Q);
+                       b1 = gen_cmp(OR_LINK, off_linktype, BPF_H,
+                                    (bpf_int32)ETHERTYPE_8021QINQ);
+                       gen_or(b0,b1);
                        b0 = b1;
-               }

-               off_macpl += 4;
-               off_linktype += 4;
+                       /* If a specific VLAN is requested, check VLAN id */
+                       if (vlan_num >= 0) {
+                               b1 = gen_mcmp(OR_MACPL, 0, BPF_H,
+                                             (bpf_int32)vlan_num, 0x0fff);
+                               gen_and(b0, b1);
+                               b0 = b1;
+                       }
+
+                       off_macpl += 4;
+                       off_linktype += 4;
 #if 0
                off_nl_nosnap += 4;
                off_nl += 4;
 #endif
+               }
                break;

        default:
@@ -7934,6 +7971,8 @@ gen_vlan(vlan_num)
                /*NOTREACHED*/
        }

+       vlan_stack_depth++;
+
        return (b0);
 }

diff --git a/pcap-int.h b/pcap-int.h
index 487c4e0..5c5b515 100644
--- a/pcap-int.h
+++ b/pcap-int.h
@@ -242,6 +242,7 @@ typedef int (*setmode_op_t)(pcap_t *, int);
 typedef int    (*setmintocopy_op_t)(pcap_t *, int);
 #endif
 typedef void   (*cleanup_op_t)(pcap_t *);
+typedef int    (*vlan_tag_in_pkt_meta_op_t)(pcap_t *);

 struct pcap {
 #ifdef WIN32
@@ -325,6 +326,8 @@ struct pcap {
 #endif
        cleanup_op_t cleanup_op;

+        /* is the vlan tag in packet metadata? */
+        vlan_tag_in_pkt_meta_op_t vlan_tag_in_pkt_meta_op;
        /*
         * Placeholder for filter code if bpf not in kernel.
         */
diff --git a/pcap-linux.c b/pcap-linux.c
index a7f97bb..1546030 100644
--- a/pcap-linux.c
+++ b/pcap-linux.c
@@ -276,6 +276,16 @@ typedef int                socklen_t;

 #define MAX_LINKHEADER_SIZE    256

+/* these are the magic kernel versions that started
+ * supporting BPF packet filtering
+ * based on vlan tags that are present in the skb metadata
+ */
+#define BPF_VLAN_KVER 3
+#define BPF_VLAN_KPATCHLEVEL 8
+#define BPF_VLAN_KSUBLEVEL 0
+
+/* prototype for external function and methods */
+u_int  bpf_filter_linux(const struct bpf_insn *, const u_char *, u_int16_t, u_int, u_int);
 /*
  * When capturing on all interfaces we use this as the buffer size.
  * Should be bigger then all MTUs that occur in real life.
@@ -302,6 +312,7 @@ static int pcap_stats_linux(pcap_t *, struct pcap_stat *);
 static int pcap_setfilter_linux(pcap_t *, struct bpf_program *);
 static int pcap_setdirection_linux(pcap_t *, pcap_direction_t);
 static void pcap_cleanup_linux(pcap_t *);
+static int pcap_vlan_tag_in_pkt_linux(pcap_t *);

 union thdr {
        struct tpacket_hdr      *h1;
@@ -356,6 +367,49 @@ static struct sock_fprog   total_fcode
        = { 1, &total_insn };
 #endif /* SO_ATTACH_FILTER */

+static int
+vlan_tag_in_pkt_auxdata(int sup_version,
+                       int sup_patchlevel, int sup_sublevel)
+{
+       struct utsname u;
+       char *saveptr, *rel, *tok;
+       int i  = 0;
+#define VERLEN 3
+       union kernver {
+               struct {
+                       int version;
+                       int patchlevel;
+                       int sublevel;
+               }_v;
+                int _n[VERLEN];
+       };
+       union kernver v;
+
+       if (uname(&u)) {
+               perror("uname");
+               return 0;
+       }
+       rel = u.release;
+       while((tok = strtok_r(rel,".",&saveptr))!=NULL) {
+               rel = NULL;
+               if (i < VERLEN)
+                       v._n[i] = atoi(tok);
+               i++;
+       }
+       return  (v._v.version == sup_version)?                          \
+               (v._v.patchlevel == sup_patchlevel)?                    \
+               (v._v.sublevel == sup_sublevel)?                        \
+               1 : v._v.sublevel > sup_sublevel                        \
+               : v._v.patchlevel > sup_patchlevel                      \
+               : v._v.version > sup_version;
+}
+
+static int
+pcap_vlan_tag_in_pkt_linux(pcap_t *p) {
+       return vlan_tag_in_pkt_auxdata(BPF_VLAN_KVER,
+                               BPF_VLAN_KPATCHLEVEL,
+                               BPF_VLAN_KSUBLEVEL);
+}
 pcap_t *
 pcap_create_interface(const char *device, char *ebuf)
 {
@@ -1179,6 +1233,7 @@ pcap_activate_linux(pcap_t *handle)
        handle->cleanup_op = pcap_cleanup_linux;
        handle->read_op = pcap_read_linux;
        handle->stats_op = pcap_stats_linux;
+       handle->vlan_tag_in_pkt_meta_op = pcap_vlan_tag_in_pkt_linux;

        /*
         * The "any" device is a special device which causes us not
@@ -1330,6 +1385,19 @@ pcap_read_linux(pcap_t *handle, int max_packets, pcap_handler callback, u_char *
        return pcap_read_packet(handle, callback, user);
 }

+/* push a vlan tag extracted from the auxdata into the packet */
+static void
+insert_vlan_tag_into_packet(pcap_t *handle, u_char **bp, __u16 tp_vlan_tci)
+{
+       struct vlan_tag *tag;
+       *bp -= VLAN_TAG_LEN;
+       memmove(*bp, *bp + VLAN_TAG_LEN, handle->md.vlan_offset);
+
+       tag = (struct vlan_tag *)(*bp + handle->md.vlan_offset);
+       tag->vlan_tpid = htons(ETH_P_8021Q);
+       tag->vlan_tci = htons(tp_vlan_tci);
+       return;
+}
 /*
  *  Read a packet from the socket calling the handler provided by
  *  the user. Returns the number of packets received or -1 if an
@@ -1354,6 +1422,7 @@ pcap_read_packet(pcap_t *handle, pcap_handler callback, u_char *userdata)
                struct cmsghdr  cmsg;
                char            buf[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
        } cmsg_buf;
+       __u16 tp_vlan_tci = 0;
 #else /* defined(HAVE_PACKET_AUXDATA) && defined(HAVE_LINUX_TPACKET_AUXDATA_TP_VLAN_TCI) */
        socklen_t               fromlen;
 #endif /* defined(HAVE_PACKET_AUXDATA) && defined(HAVE_LINUX_TPACKET_AUXDATA_TP_VLAN_TCI) */
@@ -1557,14 +1626,8 @@ pcap_read_packet(pcap_t *handle, pcap_handler callback, u_char *userdata)
                        if (len < (unsigned int) handle->md.vlan_offset)
                                break;

-                       bp -= VLAN_TAG_LEN;
-                       memmove(bp, bp + VLAN_TAG_LEN, handle->md.vlan_offset);
-
-                       tag = (struct vlan_tag *)(bp + handle->md.vlan_offset);
-                       tag->vlan_tpid = htons(ETH_P_8021Q);
-                       tag->vlan_tci = htons(aux->tp_vlan_tci);
+                       tp_vlan_tci = aux->tp_vlan_tci;

-                       packet_len += VLAN_TAG_LEN;
                }
        }
 #endif /* defined(HAVE_PACKET_AUXDATA) && defined(HAVE_LINUX_TPACKET_AUXDATA_TP_VLAN_TCI) */
@@ -1608,14 +1671,19 @@ pcap_read_packet(pcap_t *handle, pcap_handler callback, u_char *userdata)

        /* Run the packet filter if not using kernel filter */
        if (!handle->md.use_bpf && handle->fcode.bf_insns) {
-               if (bpf_filter(handle->fcode.bf_insns, bp,
-                               packet_len, caplen) == 0)
+               if (bpf_filter_linux(handle->fcode.bf_insns, bp, tp_vlan_tci,
+                               packet_len, caplen) == 0)
                {
                        /* rejected by filter */
                        return 0;
                }
        }
-
+       /* insert the vlan tag information back into the packet after we have run our
+        * filter */
+       if (tp_vlan_tci) {
+               insert_vlan_tag_into_packet(handle, &bp, tp_vlan_tci);
+               packet_len += VLAN_TAG_LEN;
+       }
        /* Fill in our own header data */

        if (ioctl(handle->fd, SIOCGSTAMP, &pcap_header.ts) == -1) {
@@ -3804,6 +3872,7 @@ pcap_read_linux_mmap(pcap_t *handle, int max_packets, pcap_handler callback,
                unsigned int tp_snaplen;
                unsigned int tp_sec;
                unsigned int tp_usec;
+                __u16 tp_vlan_tci = 0;

                h.raw = pcap_get_ring_frame(handle, TP_STATUS_USER);
                if (!h.raw)
@@ -3841,23 +3910,6 @@ pcap_read_linux_mmap(pcap_t *handle, int max_packets, pcap_handler callback,
                        return -1;
                }

-               /* run filter on received packet
-                * If the kernel filtering is enabled we need to run the
-                * filter until all the frames present into the ring
-                * at filter creation time are processed.
-                * In such case md.use_bpf is used as a counter for the
-                * packet we need to filter.
-                * Note: alternatively it could be possible to stop applying
-                * the filter when the ring became empty, but it can possibly
-                * happen a lot later... */
-               bp = (unsigned char*)h.raw + tp_mac;
-               run_bpf = (!handle->md.use_bpf) ||
-                       ((handle->md.use_bpf>1) && handle->md.use_bpf--);
-               if (run_bpf && handle->fcode.bf_insns &&
-                               (bpf_filter(handle->fcode.bf_insns, bp,
-                                       tp_len, tp_snaplen) == 0))
-                       goto skip;
-
                /*
                 * Do checks based on packet direction.
                 */
@@ -3945,15 +3997,30 @@ pcap_read_linux_mmap(pcap_t *handle, int max_packets, pcap_handler callback,
 #endif
                    handle->md.vlan_offset != -1 &&
                    tp_snaplen >= (unsigned int) handle->md.vlan_offset) {
-                       struct vlan_tag *tag;
-
-                       bp -= VLAN_TAG_LEN;
-                       memmove(bp, bp + VLAN_TAG_LEN, handle->md.vlan_offset);
-
-                       tag = (struct vlan_tag *)(bp + handle->md.vlan_offset);
-                       tag->vlan_tpid = htons(ETH_P_8021Q);
-                       tag->vlan_tci = htons(h.h2->tp_vlan_tci);
-
+                        tp_vlan_tci = h.h2->tp_vlan_tci;
+                }
+#endif
+               /* run filter on received packet
+                * If the kernel filtering is enabled we need to run the
+                * filter until all the frames present into the ring
+                * at filter creation time are processed.
+                * In such case md.use_bpf is used as a counter for the
+                * packet we need to filter.
+                * Note: alternatively it could be possible to stop applying
+                * the filter when the ring became empty, but it can possibly
+                * happen a lot later... */
+               bp = (unsigned char*)h.raw + tp_mac;
+               run_bpf = (!handle->md.use_bpf) ||
+                       ((handle->md.use_bpf>1) && handle->md.use_bpf--);
+               if (run_bpf && handle->fcode.bf_insns) {
+                       if(bpf_filter_linux(handle->fcode.bf_insns, bp, tp_vlan_tci,
+                                           tp_len, tp_snaplen) == 0)
+                               goto skip;
+               }
+#ifdef HAVE_TPACKET2
+               /* insert the vlan tag into the packet now */
+               if (tp_vlan_tci) {
+                       insert_vlan_tag_into_packet(handle, &bp, tp_vlan_tci);
                        pcaphdr.caplen += VLAN_TAG_LEN;
                        pcaphdr.len += VLAN_TAG_LEN;
                }
diff --git a/pcap.c b/pcap.c
index 9ce4318..9496d55 100644
--- a/pcap.c
+++ b/pcap.c
@@ -455,6 +455,9 @@ initialize_ops(pcap_t *p)
         */
        p->cleanup_op = pcap_cleanup_live_common;

+        /* this is only implemented for Linux for now. For other platforms it can be
+         * set to NULL  */
+        p->vlan_tag_in_pkt_meta_op = NULL;
        /*
         * In most cases, the standard one-short callback can
         * be used for pcap_next()/pcap_next_ex().
diff --git a/pcap/bpf.h b/pcap/bpf.h
index 8576bbd..3283be0 100644
--- a/pcap/bpf.h
+++ b/pcap/bpf.h
@@ -1280,8 +1280,12 @@ struct bpf_insn {
 /*
  * Macros for insn array initializers.
  */
+#ifndef BPF_STMT
 #define BPF_STMT(code, k) { (u_short)(code), 0, 0, k }
+#endif
+#ifndef BPF_JUMP
 #define BPF_JUMP(code, k, jt, jf) { (u_short)(code), jt, jf, k }
+#endif

 #if __STDC__ || defined(__cplusplus)
 extern int bpf_validate(const struct bpf_insn *, int);
_______________________________________________
tcpdump-workers mailing list
tcpdump-workers () lists tcpdump org
https://lists.sandelman.ca/mailman/listinfo/tcpdump-workers


Current thread: