Skip to content

Commit b44e27b

Browse files
author
Paolo Abeni
committed
Merge branch 'net-stmmac-rx-performance-improvement'
Furong Xu says: ==================== net: stmmac: RX performance improvement This series improves RX performance a lot, ~40% TCP RX throughput boost has been observed with DWXGMAC CORE 3.20a running on Cortex-A65 CPUs: from 2.18 Gbits/sec increased to 3.06 Gbits/sec. ==================== Link: https://patch.msgid.link/cover.1736910454.git.0x1207@gmail.com Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2 parents 0b21051 + 204182e commit b44e27b

File tree

3 files changed

+21
-15
lines changed

3 files changed

+21
-15
lines changed

drivers/net/ethernet/stmicro/stmmac/stmmac.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ struct stmmac_rx_queue {
126126
unsigned int cur_rx;
127127
unsigned int dirty_rx;
128128
unsigned int buf_alloc_num;
129+
unsigned int napi_skb_frag_size;
129130
dma_addr_t dma_rx_phy;
130131
u32 rx_tail_addr;
131132
unsigned int state_saved;

drivers/net/ethernet/stmicro/stmmac/stmmac_main.c

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1341,7 +1341,7 @@ static unsigned int stmmac_rx_offset(struct stmmac_priv *priv)
13411341
if (stmmac_xdp_is_enabled(priv))
13421342
return XDP_PACKET_HEADROOM;
13431343

1344-
return 0;
1344+
return NET_SKB_PAD;
13451345
}
13461346

13471347
static int stmmac_set_bfsize(int mtu, int bufsize)
@@ -2040,22 +2040,26 @@ static int __alloc_dma_rx_desc_resources(struct stmmac_priv *priv,
20402040
struct stmmac_channel *ch = &priv->channel[queue];
20412041
bool xdp_prog = stmmac_xdp_is_enabled(priv);
20422042
struct page_pool_params pp_params = { 0 };
2043-
unsigned int num_pages;
2043+
unsigned int dma_buf_sz_pad, num_pages;
20442044
unsigned int napi_id;
20452045
int ret;
20462046

2047+
dma_buf_sz_pad = stmmac_rx_offset(priv) + dma_conf->dma_buf_sz +
2048+
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
2049+
num_pages = DIV_ROUND_UP(dma_buf_sz_pad, PAGE_SIZE);
2050+
20472051
rx_q->queue_index = queue;
20482052
rx_q->priv_data = priv;
2053+
rx_q->napi_skb_frag_size = num_pages * PAGE_SIZE;
20492054

20502055
pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV;
20512056
pp_params.pool_size = dma_conf->dma_rx_size;
2052-
num_pages = DIV_ROUND_UP(dma_conf->dma_buf_sz, PAGE_SIZE);
2053-
pp_params.order = ilog2(num_pages);
2057+
pp_params.order = order_base_2(num_pages);
20542058
pp_params.nid = dev_to_node(priv->device);
20552059
pp_params.dev = priv->device;
20562060
pp_params.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
20572061
pp_params.offset = stmmac_rx_offset(priv);
2058-
pp_params.max_len = STMMAC_MAX_RX_BUF_SIZE(num_pages);
2062+
pp_params.max_len = dma_conf->dma_buf_sz;
20592063

20602064
rx_q->page_pool = page_pool_create(&pp_params);
20612065
if (IS_ERR(rx_q->page_pool)) {
@@ -5504,10 +5508,6 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
55045508

55055509
/* Buffer is good. Go on. */
55065510

5507-
prefetch(page_address(buf->page) + buf->page_offset);
5508-
if (buf->sec_page)
5509-
prefetch(page_address(buf->sec_page));
5510-
55115511
buf1_len = stmmac_rx_buf1_len(priv, p, status, len);
55125512
len += buf1_len;
55135513
buf2_len = stmmac_rx_buf2_len(priv, p, status, len);
@@ -5529,6 +5529,8 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
55295529

55305530
dma_sync_single_for_cpu(priv->device, buf->addr,
55315531
buf1_len, dma_dir);
5532+
net_prefetch(page_address(buf->page) +
5533+
buf->page_offset);
55325534

55335535
xdp_init_buff(&ctx.xdp, buf_sz, &rx_q->xdp_rxq);
55345536
xdp_prepare_buff(&ctx.xdp, page_address(buf->page),
@@ -5582,22 +5584,26 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
55825584
}
55835585

55845586
if (!skb) {
5587+
unsigned int head_pad_len;
5588+
55855589
/* XDP program may expand or reduce tail */
55865590
buf1_len = ctx.xdp.data_end - ctx.xdp.data;
55875591

5588-
skb = napi_alloc_skb(&ch->rx_napi, buf1_len);
5592+
skb = napi_build_skb(page_address(buf->page),
5593+
rx_q->napi_skb_frag_size);
55895594
if (!skb) {
5595+
page_pool_recycle_direct(rx_q->page_pool,
5596+
buf->page);
55905597
rx_dropped++;
55915598
count++;
55925599
goto drain_data;
55935600
}
55945601

55955602
/* XDP program may adjust header */
5596-
skb_copy_to_linear_data(skb, ctx.xdp.data, buf1_len);
5603+
head_pad_len = ctx.xdp.data - ctx.xdp.data_hard_start;
5604+
skb_reserve(skb, head_pad_len);
55975605
skb_put(skb, buf1_len);
5598-
5599-
/* Data payload copied into SKB, page ready for recycle */
5600-
page_pool_recycle_direct(rx_q->page_pool, buf->page);
5606+
skb_mark_for_recycle(skb);
56015607
buf->page = NULL;
56025608
} else if (buf1_len) {
56035609
dma_sync_single_for_cpu(priv->device, buf->addr,

drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
#ifndef _STMMAC_XDP_H_
55
#define _STMMAC_XDP_H_
66

7-
#define STMMAC_MAX_RX_BUF_SIZE(num) (((num) * PAGE_SIZE) - XDP_PACKET_HEADROOM)
87
#define STMMAC_RX_DMA_ATTR (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
98

109
int stmmac_xdp_setup_pool(struct stmmac_priv *priv, struct xsk_buff_pool *pool,

0 commit comments

Comments
 (0)