tcpdump mailing list archives

Re: tcpdump and libpcap releases, and future thoughts


From: Guy Harris <guy () alum mit edu>
Date: Wed, 3 Sep 2014 13:34:45 -0700


On Sep 3, 2014, at 12:34 PM, Michael Richardson <mcr () sandelman ca> wrote:

It seems that we might need more patches to better select Linux memory mapped packet choices?

I'd prefer a patch that reduces or the removes the *need* to do so, such as this patch, which I'm in the process of 
testing (yes, I already mentioned the problem on linux-netdev, but nobody else has looked much at the issues):

--- net/packet/af_packet.c.dist 2014-08-31 18:23:04.000000000 -0700
+++ net/packet/af_packet.c      2014-09-02 18:52:18.000000000 -0700
@@ -198,7 +198,7 @@
                        struct tpacket_block_desc *);
 static void *prb_dispatch_next_block(struct tpacket_kbdq_core *,
                        struct packet_sock *);
-static void prb_retire_current_block(struct tpacket_kbdq_core *,
+static bool prb_retire_current_block(struct tpacket_kbdq_core *,
                struct packet_sock *, unsigned int status);
 static int prb_queue_frozen(struct tpacket_kbdq_core *);
 static void prb_open_block(struct tpacket_kbdq_core *,
@@ -677,6 +677,7 @@
        struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
        unsigned int frozen;
        struct tpacket_block_desc *pbd;
+       bool block_was_closed = false;
 
        spin_lock(&po->sk.sk_receive_queue.lock);
 
@@ -704,7 +705,8 @@
 
        if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) {
                if (!frozen) {
-                       prb_retire_current_block(pkc, po, TP_STATUS_BLK_TMO);
+                       block_was_closed = prb_retire_current_block(pkc, po,
+                                                                   TP_STATUS_BLK_TMO);
                        if (!prb_dispatch_next_block(pkc, po))
                                goto refresh_timer;
                        else
@@ -738,6 +740,9 @@
 
 out:
        spin_unlock(&po->sk.sk_receive_queue.lock);
+
+       if (block_was_closed)
+               po->sk.sk_data_ready(&po->sk);
 }
 
 static void prb_flush_block(struct tpacket_kbdq_core *pkc1,
@@ -930,7 +935,7 @@
        return (void *)pkc->nxt_offset;
 }
 
-static void prb_retire_current_block(struct tpacket_kbdq_core *pkc,
+static bool prb_retire_current_block(struct tpacket_kbdq_core *pkc,
                struct packet_sock *po, unsigned int status)
 {
        struct tpacket_block_desc *pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
@@ -953,8 +958,9 @@
                        }
                }
                prb_close_block(pkc, pbd, po, status);
-               return;
+               return true;
        }
+       return false;
 }
 
 static int prb_curr_blk_in_use(struct tpacket_kbdq_core *pkc,
@@ -1033,13 +1039,16 @@
 static void *__packet_lookup_frame_in_block(struct packet_sock *po,
                                            struct sk_buff *skb,
                                                int status,
-                                           unsigned int len
+                                           unsigned int len,
+                                           bool *block_was_closed
                                            )
 {
        struct tpacket_kbdq_core *pkc;
        struct tpacket_block_desc *pbd;
        char *curr, *end;
 
+       *block_was_closed = false;
+
        pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
        pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
 
@@ -1075,7 +1084,7 @@
        }
 
        /* Ok, close the current block */
-       prb_retire_current_block(pkc, po, 0);
+       *block_was_closed = prb_retire_current_block(pkc, po, 0);
 
        /* Now, try to dispatch the next block */
        curr = (char *)prb_dispatch_next_block(pkc, po);
@@ -1094,20 +1103,24 @@
 
 static void *packet_current_rx_frame(struct packet_sock *po,
                                            struct sk_buff *skb,
-                                           int status, unsigned int len)
+                                           int status, unsigned int len,
+                                           bool *block_was_closed)
 {
        char *curr = NULL;
        switch (po->tp_version) {
        case TPACKET_V1:
        case TPACKET_V2:
+               *block_was_closed = false;
                curr = packet_lookup_frame(po, &po->rx_ring,
                                        po->rx_ring.head, status);
                return curr;
        case TPACKET_V3:
-               return __packet_lookup_frame_in_block(po, skb, status, len);
+               return __packet_lookup_frame_in_block(po, skb, status, len,
+                                                     block_was_closed);
        default:
                WARN(1, "TPACKET version not supported\n");
                BUG();
+               *block_was_closed = false;
                return NULL;
        }
 }
@@ -1879,6 +1892,7 @@
        struct sk_buff *copy_skb = NULL;
        struct timespec ts;
        __u32 ts_status;
+       bool block_was_closed;
 
        /* struct tpacket{2,3}_hdr is aligned to a multiple of TPACKET_ALIGNMENT.
         * We may add members to them until current aligned size without forcing
@@ -1958,7 +1972,8 @@
        }
        spin_lock(&sk->sk_receive_queue.lock);
        h.raw = packet_current_rx_frame(po, skb,
-                                       TP_STATUS_KERNEL, (macoff+snaplen));
+                                       TP_STATUS_KERNEL, (macoff+snaplen),
+                                       &block_was_closed);
        if (!h.raw)
                goto ring_is_full;
        if (po->tp_version <= TPACKET_V2) {
@@ -2058,12 +2073,14 @@
        smp_wmb();
 #endif
 
-       if (po->tp_version <= TPACKET_V2)
+       if (po->tp_version <= TPACKET_V2) {
                __packet_set_status(po, h.raw, status);
-       else
+               sk->sk_data_ready(sk);
+       } else {
                prb_clear_blk_fill_status(&po->rx_ring);
-
-       sk->sk_data_ready(sk);
+               if (block_was_closed)
+                       sk->sk_data_ready(sk);
+       }
 
 drop_n_restore:
        if (skb_head != skb->data && skb_shared(skb)) {


_______________________________________________
tcpdump-workers mailing list
tcpdump-workers () lists tcpdump org
https://lists.sandelman.ca/mailman/listinfo/tcpdump-workers


Current thread: