Commit e26eceb9 authored by Tariq Toukan's avatar Tariq Toukan Committed by Saeed Mahameed
Browse files

net/mlx5e: RX, Test the XDP program existence out of the handler



Instead of early return inside mlx5e_xdp_handle(), let the caller check
if an XDP program is loaded.  This allows saving a few unnecessary
function calls and calculations in case !prog.

Performance test: single core, drop packets in iptables
Before: 3,872,504 pps
After:  3,975,628 pps (+2.66%)

Signed-off-by: default avatarTariq Toukan <tariqt@nvidia.com>
Reviewed-by: default avatarMaxim Mikityanskiy <maximmi@nvidia.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent 8d35fb57
Loading
Loading
Loading
Loading
+1 −4
Original line number Diff line number Diff line
@@ -120,15 +120,12 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,

/* returns true if packet was consumed by xdp */
bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
		      struct bpf_prog *prog,
		      u32 *len, struct xdp_buff *xdp)
{
	struct bpf_prog *prog = rcu_dereference(rq->xdp_prog);
	u32 act;
	int err;

	if (!prog)
		return false;

	act = bpf_prog_run_xdp(prog, xdp);
	switch (act) {
	case XDP_PASS:
+1 −0
Original line number Diff line number Diff line
@@ -48,6 +48,7 @@
struct mlx5e_xsk_param;
int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk);
bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
		      struct bpf_prog *prog,
		      u32 *len, struct xdp_buff *xdp);
void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq);
bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq);
+7 −2
Original line number Diff line number Diff line
@@ -4,6 +4,7 @@
#include "rx.h"
#include "en/xdp.h"
#include <net/xdp_sock_drv.h>
#include <linux/filter.h>

/* RX data path */

@@ -31,6 +32,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
{
	struct xdp_buff *xdp = wi->umr.dma_info[page_idx].xsk;
	u32 cqe_bcnt32 = cqe_bcnt;
	struct bpf_prog *prog;

	/* Check packet size. Note LRO doesn't use linear SKB */
	if (unlikely(cqe_bcnt > rq->hw_mtu)) {
@@ -65,7 +67,8 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
	 * allocated first from the Reuse Ring, so it has enough space.
	 */

	if (likely(mlx5e_xdp_handle(rq, NULL, &cqe_bcnt32, xdp))) {
	prog = rcu_dereference(rq->xdp_prog);
	if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, &cqe_bcnt32, xdp))) {
		if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)))
			__set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
		return NULL; /* page/packet was consumed by XDP */
@@ -83,6 +86,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
					      u32 cqe_bcnt)
{
	struct xdp_buff *xdp = wi->di->xsk;
	struct bpf_prog *prog;

	/* wi->offset is not used in this function, because xdp->data and the
	 * DMA address point directly to the necessary place. Furthermore, the
@@ -101,7 +105,8 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
		return NULL;
	}

	if (likely(mlx5e_xdp_handle(rq, NULL, &cqe_bcnt, xdp)))
	prog = rcu_dereference(rq->xdp_prog);
	if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, &cqe_bcnt, xdp)))
		return NULL; /* page/packet was consumed by XDP */

	/* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse
+30 −19
Original line number Diff line number Diff line
@@ -34,6 +34,7 @@
#include <linux/ipv6.h>
#include <linux/tcp.h>
#include <linux/bitmap.h>
#include <linux/filter.h>
#include <net/ip6_checksum.h>
#include <net/page_pool.h>
#include <net/inet_ecn.h>
@@ -1523,11 +1524,11 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
{
	struct mlx5e_dma_info *di = wi->di;
	u16 rx_headroom = rq->buff.headroom;
	struct xdp_buff xdp;
	struct bpf_prog *prog;
	struct sk_buff *skb;
	u32 metasize = 0;
	void *va, *data;
	u32 frag_size;
	u32 metasize;

	va             = page_address(di->page) + wi->offset;
	data           = va + rx_headroom;
@@ -1535,16 +1536,21 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,

	dma_sync_single_range_for_cpu(rq->pdev, di->addr, wi->offset,
				      frag_size, DMA_FROM_DEVICE);
	net_prefetchw(va); /* xdp_frame data area */
	net_prefetch(data);

	prog = rcu_dereference(rq->xdp_prog);
	if (prog) {
		struct xdp_buff xdp;

		net_prefetchw(va); /* xdp_frame data area */
		mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp);
	if (mlx5e_xdp_handle(rq, di, &cqe_bcnt, &xdp))
		if (mlx5e_xdp_handle(rq, di, prog, &cqe_bcnt, &xdp))
			return NULL; /* page/packet was consumed by XDP */

		rx_headroom = xdp.data - xdp.data_hard_start;
	frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
		metasize = xdp.data - xdp.data_meta;
	}
	frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
	skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize);
	if (unlikely(!skb))
		return NULL;
@@ -1842,11 +1848,11 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
	struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx];
	u16 rx_headroom = rq->buff.headroom;
	u32 cqe_bcnt32 = cqe_bcnt;
	struct xdp_buff xdp;
	struct bpf_prog *prog;
	struct sk_buff *skb;
	u32 metasize = 0;
	void *va, *data;
	u32 frag_size;
	u32 metasize;

	/* Check packet size. Note LRO doesn't use linear SKB */
	if (unlikely(cqe_bcnt > rq->hw_mtu)) {
@@ -1860,19 +1866,24 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,

	dma_sync_single_range_for_cpu(rq->pdev, di->addr, head_offset,
				      frag_size, DMA_FROM_DEVICE);
	net_prefetchw(va); /* xdp_frame data area */
	net_prefetch(data);

	prog = rcu_dereference(rq->xdp_prog);
	if (prog) {
		struct xdp_buff xdp;

		net_prefetchw(va); /* xdp_frame data area */
		mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt32, &xdp);
	if (mlx5e_xdp_handle(rq, di, &cqe_bcnt32, &xdp)) {
		if (mlx5e_xdp_handle(rq, di, prog, &cqe_bcnt32, &xdp)) {
			if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
				__set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
			return NULL; /* page/packet was consumed by XDP */
		}

		rx_headroom = xdp.data - xdp.data_hard_start;
	frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt32);
		metasize = xdp.data - xdp.data_meta;
	}
	frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt32);
	skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt32, metasize);
	if (unlikely(!skb))
		return NULL;