Commit 0ebab78c authored by Lorenzo Bianconi's avatar Lorenzo Bianconi Committed by Jakub Kicinski
Browse files

net: veth: add page_pool for page recycling



Introduce page_pool support in veth driver in order to recycle pages
in veth_convert_skb_to_xdp_buff routine and avoid reallocating the skb
through the page allocator.
The patch has been tested sending tcp traffic to a veth pair where the
remote peer is running a simple xdp program just returning xdp_pass:

veth upstream codebase:
MTU 1500B: ~ 8Gbps
MTU 8000B: ~ 13.9Gbps

veth upstream codebase + pp support:
MTU 1500B: ~ 9.2Gbps
MTU 8000B: ~ 16.2Gbps

Tested-by: default avatarMaryam Tahhan <mtahhan@redhat.com>
Signed-off-by: default avatarLorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent ffcddcae
Loading
Loading
Loading
Loading
+1 −0
Original line number Original line Diff line number Diff line
@@ -402,6 +402,7 @@ config TUN_VNET_CROSS_LE


config VETH
config VETH
	tristate "Virtual ethernet pair device"
	tristate "Virtual ethernet pair device"
	select PAGE_POOL
	help
	help
	  This device is a local ethernet tunnel. Devices are created in pairs.
	  This device is a local ethernet tunnel. Devices are created in pairs.
	  When one end receives the packet it appears on its pair and vice
	  When one end receives the packet it appears on its pair and vice
+44 −4
Original line number Original line Diff line number Diff line
@@ -26,6 +26,7 @@
#include <linux/ptr_ring.h>
#include <linux/ptr_ring.h>
#include <linux/bpf_trace.h>
#include <linux/bpf_trace.h>
#include <linux/net_tstamp.h>
#include <linux/net_tstamp.h>
#include <net/page_pool.h>


#define DRV_NAME	"veth"
#define DRV_NAME	"veth"
#define DRV_VERSION	"1.0"
#define DRV_VERSION	"1.0"
@@ -65,6 +66,7 @@ struct veth_rq {
	bool			rx_notify_masked;
	bool			rx_notify_masked;
	struct ptr_ring		xdp_ring;
	struct ptr_ring		xdp_ring;
	struct xdp_rxq_info	xdp_rxq;
	struct xdp_rxq_info	xdp_rxq;
	struct page_pool	*page_pool;
};
};


struct veth_priv {
struct veth_priv {
@@ -727,17 +729,20 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
			goto drop;
			goto drop;


		/* Allocate skb head */
		/* Allocate skb head */
		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
		page = page_pool_dev_alloc_pages(rq->page_pool);
		if (!page)
		if (!page)
			goto drop;
			goto drop;


		nskb = build_skb(page_address(page), PAGE_SIZE);
		nskb = build_skb(page_address(page), PAGE_SIZE);
		if (!nskb) {
		if (!nskb) {
			put_page(page);
			page_pool_put_full_page(rq->page_pool, page, true);
			goto drop;
			goto drop;
		}
		}


		skb_reserve(nskb, VETH_XDP_HEADROOM);
		skb_reserve(nskb, VETH_XDP_HEADROOM);
		skb_copy_header(nskb, skb);
		skb_mark_for_recycle(nskb);

		size = min_t(u32, skb->len, max_head_size);
		size = min_t(u32, skb->len, max_head_size);
		if (skb_copy_bits(skb, 0, nskb->data, size)) {
		if (skb_copy_bits(skb, 0, nskb->data, size)) {
			consume_skb(nskb);
			consume_skb(nskb);
@@ -745,7 +750,6 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
		}
		}
		skb_put(nskb, size);
		skb_put(nskb, size);


		skb_copy_header(nskb, skb);
		head_off = skb_headroom(nskb) - skb_headroom(skb);
		head_off = skb_headroom(nskb) - skb_headroom(skb);
		skb_headers_offset_update(nskb, head_off);
		skb_headers_offset_update(nskb, head_off);


@@ -754,7 +758,7 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
		len = skb->len - off;
		len = skb->len - off;


		for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) {
		for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) {
			page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
			page = page_pool_dev_alloc_pages(rq->page_pool);
			if (!page) {
			if (!page) {
				consume_skb(nskb);
				consume_skb(nskb);
				goto drop;
				goto drop;
@@ -1002,11 +1006,37 @@ static int veth_poll(struct napi_struct *napi, int budget)
	return done;
	return done;
}
}


static int veth_create_page_pool(struct veth_rq *rq)
{
	struct page_pool_params pp_params = {
		.order = 0,
		.pool_size = VETH_RING_SIZE,
		.nid = NUMA_NO_NODE,
		.dev = &rq->dev->dev,
	};

	rq->page_pool = page_pool_create(&pp_params);
	if (IS_ERR(rq->page_pool)) {
		int err = PTR_ERR(rq->page_pool);

		rq->page_pool = NULL;
		return err;
	}

	return 0;
}

static int __veth_napi_enable_range(struct net_device *dev, int start, int end)
static int __veth_napi_enable_range(struct net_device *dev, int start, int end)
{
{
	struct veth_priv *priv = netdev_priv(dev);
	struct veth_priv *priv = netdev_priv(dev);
	int err, i;
	int err, i;


	for (i = start; i < end; i++) {
		err = veth_create_page_pool(&priv->rq[i]);
		if (err)
			goto err_page_pool;
	}

	for (i = start; i < end; i++) {
	for (i = start; i < end; i++) {
		struct veth_rq *rq = &priv->rq[i];
		struct veth_rq *rq = &priv->rq[i];


@@ -1027,6 +1057,11 @@ static int __veth_napi_enable_range(struct net_device *dev, int start, int end)
err_xdp_ring:
err_xdp_ring:
	for (i--; i >= start; i--)
	for (i--; i >= start; i--)
		ptr_ring_cleanup(&priv->rq[i].xdp_ring, veth_ptr_free);
		ptr_ring_cleanup(&priv->rq[i].xdp_ring, veth_ptr_free);
err_page_pool:
	for (i = start; i < end; i++) {
		page_pool_destroy(priv->rq[i].page_pool);
		priv->rq[i].page_pool = NULL;
	}


	return err;
	return err;
}
}
@@ -1056,6 +1091,11 @@ static void veth_napi_del_range(struct net_device *dev, int start, int end)
		rq->rx_notify_masked = false;
		rq->rx_notify_masked = false;
		ptr_ring_cleanup(&rq->xdp_ring, veth_ptr_free);
		ptr_ring_cleanup(&rq->xdp_ring, veth_ptr_free);
	}
	}

	for (i = start; i < end; i++) {
		page_pool_destroy(priv->rq[i].page_pool);
		priv->rq[i].page_pool = NULL;
	}
}
}


static void veth_napi_del(struct net_device *dev)
static void veth_napi_del(struct net_device *dev)