Skip to content

Commit 470f366

Browse files
committed
Merge branch 'octeon_ep-transmit-cleanups-and-optimizations'
Shinas Rasheed says: ==================== Cleanup and optimizations to transmit code Pad small packets to ETH_ZLEN before transmit, cleanup dma sync calls, add xmit_more functionality and then further remove atomic variable usage in the prior. Changes: V3: - Stop returning NETDEV_TX_BUSY when ring is full in xmit_patch. Change to inspect early if next packet can fit in ring instead of current packet, and stop queue if not. - Add smp_mb between stopping tx queue and checking if tx queue has free entries again, in queue full check function to let reflect IQ process completions that might have happened on other cpus. - Update small packet padding patch changelog to give more info. V2: https://lore.kernel.org/all/20231024145119.2366588-1-srasheed@marvell.com/ - Added patch for padding small packets to ETH_ZLEN, part of optimization patches for transmit code missed out in V1 - Updated changelog to provide more details for dma_sync remove patch - Updated changelog to use imperative tone in add xmit_more patch V1: https://lore.kernel.org/all/20231023114449.2362147-1-srasheed@marvell.com/ ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
2 parents 56eddc3 + dc9c02b commit 470f366

File tree

5 files changed

+45
-30
lines changed

5 files changed

+45
-30
lines changed

drivers/net/ethernet/marvell/octeon_ep/octep_config.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,10 @@
1313
#define OCTEP_64BYTE_INSTR 64
1414

1515
/* Tx Queue: maximum descriptors per ring */
16+
/* This needs to be a power of 2 */
1617
#define OCTEP_IQ_MAX_DESCRIPTORS 1024
1718
/* Minimum input (Tx) requests to be enqueued to ring doorbell */
18-
#define OCTEP_DB_MIN 1
19+
#define OCTEP_DB_MIN 8
1920
/* Packet threshold for Tx queue interrupt */
2021
#define OCTEP_IQ_INTR_THRESHOLD 0x0
2122

drivers/net/ethernet/marvell/octeon_ep/octep_main.c

Lines changed: 33 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -777,17 +777,24 @@ static int octep_stop(struct net_device *netdev)
777777
*/
778778
static inline int octep_iq_full_check(struct octep_iq *iq)
779779
{
780-
if (likely((iq->max_count - atomic_read(&iq->instr_pending)) >=
780+
if (likely((IQ_INSTR_SPACE(iq)) >
781781
OCTEP_WAKE_QUEUE_THRESHOLD))
782782
return 0;
783783

784784
/* Stop the queue if unable to send */
785785
netif_stop_subqueue(iq->netdev, iq->q_no);
786786

787+
/* Allow for pending updates in write index
788+
* from iq_process_completion in other cpus
789+
* to reflect, in case queue gets free
790+
* entries.
791+
*/
792+
smp_mb();
793+
787794
/* check again and restart the queue, in case NAPI has just freed
788795
* enough Tx ring entries.
789796
*/
790-
if (unlikely((iq->max_count - atomic_read(&iq->instr_pending)) >=
797+
if (unlikely(IQ_INSTR_SPACE(iq) >
791798
OCTEP_WAKE_QUEUE_THRESHOLD)) {
792799
netif_start_subqueue(iq->netdev, iq->q_no);
793800
iq->stats.restart_cnt++;
@@ -818,19 +825,19 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb,
818825
struct octep_iq *iq;
819826
skb_frag_t *frag;
820827
u16 nr_frags, si;
828+
int xmit_more;
821829
u16 q_no, wi;
822830

831+
if (skb_put_padto(skb, ETH_ZLEN))
832+
return NETDEV_TX_OK;
833+
823834
q_no = skb_get_queue_mapping(skb);
824835
if (q_no >= oct->num_iqs) {
825836
netdev_err(netdev, "Invalid Tx skb->queue_mapping=%d\n", q_no);
826837
q_no = q_no % oct->num_iqs;
827838
}
828839

829840
iq = oct->iq[q_no];
830-
if (octep_iq_full_check(iq)) {
831-
iq->stats.tx_busy++;
832-
return NETDEV_TX_BUSY;
833-
}
834841

835842
shinfo = skb_shinfo(skb);
836843
nr_frags = shinfo->nr_frags;
@@ -869,9 +876,6 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb,
869876
if (dma_mapping_error(iq->dev, dma))
870877
goto dma_map_err;
871878

872-
dma_sync_single_for_cpu(iq->dev, tx_buffer->sglist_dma,
873-
OCTEP_SGLIST_SIZE_PER_PKT,
874-
DMA_TO_DEVICE);
875879
memset(sglist, 0, OCTEP_SGLIST_SIZE_PER_PKT);
876880
sglist[0].len[3] = len;
877881
sglist[0].dma_ptr[0] = dma;
@@ -891,26 +895,33 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb,
891895
frag++;
892896
si++;
893897
}
894-
dma_sync_single_for_device(iq->dev, tx_buffer->sglist_dma,
895-
OCTEP_SGLIST_SIZE_PER_PKT,
896-
DMA_TO_DEVICE);
897-
898898
hw_desc->dptr = tx_buffer->sglist_dma;
899899
}
900900

901-
netdev_tx_sent_queue(iq->netdev_q, skb->len);
901+
xmit_more = netdev_xmit_more();
902+
903+
__netdev_tx_sent_queue(iq->netdev_q, skb->len, xmit_more);
904+
902905
skb_tx_timestamp(skb);
903-
atomic_inc(&iq->instr_pending);
906+
iq->fill_cnt++;
904907
wi++;
905-
if (wi == iq->max_count)
906-
wi = 0;
907-
iq->host_write_index = wi;
908+
iq->host_write_index = wi & iq->ring_size_mask;
909+
910+
/* octep_iq_full_check stops the queue and returns
911+
* true if so, in case the queue has become full
912+
* by inserting current packet. If so, we can
913+
* go ahead and ring doorbell.
914+
*/
915+
if (!octep_iq_full_check(iq) && xmit_more &&
916+
iq->fill_cnt < iq->fill_threshold)
917+
return NETDEV_TX_OK;
918+
908919
/* Flush the hw descriptor before writing to doorbell */
909920
wmb();
910-
911-
/* Ring Doorbell to notify the NIC there is a new packet */
912-
writel(1, iq->doorbell_reg);
913-
iq->stats.instr_posted++;
921+
/* Ring Doorbell to notify the NIC of new packets */
922+
writel(iq->fill_cnt, iq->doorbell_reg);
923+
iq->stats.instr_posted += iq->fill_cnt;
924+
iq->fill_cnt = 0;
914925
return NETDEV_TX_OK;
915926

916927
dma_map_sg_err:

drivers/net/ethernet/marvell/octeon_ep/octep_main.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,15 @@
4040
#define OCTEP_OQ_INTR_RESEND_BIT 59
4141

4242
#define OCTEP_MMIO_REGIONS 3
43+
44+
#define IQ_INSTR_PENDING(iq) ({ typeof(iq) iq__ = (iq); \
45+
((iq__)->host_write_index - (iq__)->flush_index) & \
46+
(iq__)->ring_size_mask; \
47+
})
48+
#define IQ_INSTR_SPACE(iq) ({ typeof(iq) iq_ = (iq); \
49+
(iq_)->max_count - IQ_INSTR_PENDING(iq_); \
50+
})
51+
4352
/* PCI address space mapping information.
4453
* Each of the 3 address spaces given by BAR0, BAR2 and BAR4 of
4554
* Octeon gets mapped to different physical address spaces in

drivers/net/ethernet/marvell/octeon_ep/octep_tx.c

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ static void octep_iq_reset_indices(struct octep_iq *iq)
2121
iq->flush_index = 0;
2222
iq->pkts_processed = 0;
2323
iq->pkt_in_done = 0;
24-
atomic_set(&iq->instr_pending, 0);
2524
}
2625

2726
/**
@@ -82,7 +81,6 @@ int octep_iq_process_completions(struct octep_iq *iq, u16 budget)
8281
}
8382

8483
iq->pkts_processed += compl_pkts;
85-
atomic_sub(compl_pkts, &iq->instr_pending);
8684
iq->stats.instr_completed += compl_pkts;
8785
iq->stats.bytes_sent += compl_bytes;
8886
iq->stats.sgentry_sent += compl_sg;
@@ -91,7 +89,7 @@ int octep_iq_process_completions(struct octep_iq *iq, u16 budget)
9189
netdev_tx_completed_queue(iq->netdev_q, compl_pkts, compl_bytes);
9290

9391
if (unlikely(__netif_subqueue_stopped(iq->netdev, iq->q_no)) &&
94-
((iq->max_count - atomic_read(&iq->instr_pending)) >
92+
(IQ_INSTR_SPACE(iq) >
9593
OCTEP_WAKE_QUEUE_THRESHOLD))
9694
netif_wake_subqueue(iq->netdev, iq->q_no);
9795
return !budget;
@@ -144,7 +142,6 @@ static void octep_iq_free_pending(struct octep_iq *iq)
144142
dev_kfree_skb_any(skb);
145143
}
146144

147-
atomic_set(&iq->instr_pending, 0);
148145
iq->flush_index = fi;
149146
netdev_tx_reset_queue(netdev_get_tx_queue(iq->netdev, iq->q_no));
150147
}

drivers/net/ethernet/marvell/octeon_ep/octep_tx.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -172,9 +172,6 @@ struct octep_iq {
172172
/* Statistics for this input queue. */
173173
struct octep_iq_stats stats;
174174

175-
/* This field keeps track of the instructions pending in this queue. */
176-
atomic_t instr_pending;
177-
178175
/* Pointer to the Virtual Base addr of the input ring. */
179176
struct octep_tx_desc_hw *desc_ring;
180177

0 commit comments

Comments
 (0)