Skip to content

Commit 8b4c53e

Browse files
tmon-nordickartben
authored andcommitted
drivers: udc_dwc2: Optimize endpoint interrupt handling
SEGGER Ozone J-Trace Code Profile identified iterations over daint value as hot path. The iterations show at the very top of code profile because full iteration happens whenever there is any activity on endpoint. Optimize daint handling loops so only set bits are iterated over. While this optimization depends on find_lsb_set() efficiency, it seems to be worth it solely on the basis that quite often only few bits are set. After a bit deeper analysis, I was suprised that on ARM Cortex-M33 the find_lsb_set() approach is faster than naive iteration even if all bits are set (which is extreme case because USB applications are unlikely to use all 16 IN and 16 OUT endpoints simultaneously). This is due to fact that there is only one conditional jump CBNZ and find_lsb_set() - 1 translates to RBIT + CLZ and then clearing the bit uses LSL.W + BIC.W. Whereas the naive itation uses ADDS + CMP + BNE for the loop handling and also has LSR.W + LSLS + BPL (+ ADD.W instruction on each iteration to add 16 for OUT endpoints) for the continue check. Therefore the optimized code on ARM Cortex-M33 is never worse than naive iteration. Signed-off-by: Tomasz Moń <tomasz.mon@nordicsemi.no>
1 parent 050b8a9 commit 8b4c53e

File tree

2 files changed

+28
-23
lines changed

2 files changed

+28
-23
lines changed

drivers/usb/common/usb_dwc2_hw.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -780,6 +780,14 @@ USB_DWC2_GET_FIELD_DEFINE(dsts_enumspd, DSTS_ENUMSPD)
780780

781781
/* Device all endpoints interrupt registers */
782782
#define USB_DWC2_DAINT 0x0818UL
783+
#define USB_DWC2_DAINT_OUTEPINT_POS 16UL
784+
#define USB_DWC2_DAINT_OUTEPINT_MASK (0xFFFFUL << USB_DWC2_DAINT_OUTEPINT_POS)
785+
#define USB_DWC2_DAINT_INEPINT_POS 0UL
786+
#define USB_DWC2_DAINT_INEPINT_MASK (0xFFFFUL << USB_DWC2_DAINT_INEPINT_POS)
787+
788+
USB_DWC2_GET_FIELD_DEFINE(daint_outepint, DAINT_OUTEPINT)
789+
USB_DWC2_GET_FIELD_DEFINE(daint_inepint, DAINT_INEPINT)
790+
783791
#define USB_DWC2_DAINTMSK 0x081CUL
784792
#define USB_DWC2_DAINT_OUTEPINT(ep_num) BIT(16UL + ep_num)
785793
#define USB_DWC2_DAINT_INEPINT(ep_num) BIT(ep_num)

drivers/usb/udc/udc_dwc2.c

Lines changed: 20 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2570,32 +2570,31 @@ static inline void dwc2_handle_in_xfercompl(const struct device *dev,
25702570
static inline void dwc2_handle_iepint(const struct device *dev)
25712571
{
25722572
struct usb_dwc2_reg *const base = dwc2_get_base(dev);
2573-
const uint8_t n_max = 16;
25742573
uint32_t diepmsk;
2575-
uint32_t daint;
2574+
uint32_t epint;
25762575

25772576
diepmsk = sys_read32((mem_addr_t)&base->diepmsk);
2578-
daint = sys_read32((mem_addr_t)&base->daint);
2577+
epint = usb_dwc2_get_daint_inepint(sys_read32((mem_addr_t)&base->daint));
25792578

2580-
for (uint8_t n = 0U; n < n_max; n++) {
2579+
while (epint) {
2580+
uint8_t n = find_lsb_set(epint) - 1;
25812581
mem_addr_t diepint_reg = (mem_addr_t)&base->in_ep[n].diepint;
25822582
uint32_t diepint;
25832583
uint32_t status;
25842584

2585-
if (daint & USB_DWC2_DAINT_INEPINT(n)) {
2586-
/* Read and clear interrupt status */
2587-
diepint = sys_read32(diepint_reg);
2588-
status = diepint & diepmsk;
2589-
sys_write32(status, diepint_reg);
2590-
2591-
LOG_DBG("ep 0x%02x interrupt status: 0x%x",
2592-
n | USB_EP_DIR_IN, status);
2585+
/* Read and clear interrupt status */
2586+
diepint = sys_read32(diepint_reg);
2587+
status = diepint & diepmsk;
2588+
sys_write32(status, diepint_reg);
25932589

2594-
if (status & USB_DWC2_DIEPINT_XFERCOMPL) {
2595-
dwc2_handle_in_xfercompl(dev, n);
2596-
}
2590+
LOG_DBG("ep 0x%02x interrupt status: 0x%x",
2591+
n | USB_EP_DIR_IN, status);
25972592

2593+
if (status & USB_DWC2_DIEPINT_XFERCOMPL) {
2594+
dwc2_handle_in_xfercompl(dev, n);
25982595
}
2596+
2597+
epint &= ~BIT(n);
25992598
}
26002599

26012600
/* Clear IEPINT interrupt */
@@ -2678,22 +2677,18 @@ static inline void dwc2_handle_oepint(const struct device *dev)
26782677
{
26792678
struct usb_dwc2_reg *const base = dwc2_get_base(dev);
26802679
struct udc_dwc2_data *const priv = udc_get_private(dev);
2681-
const uint8_t n_max = 16;
26822680
uint32_t doepmsk;
2683-
uint32_t daint;
2681+
uint32_t epint;
26842682

26852683
doepmsk = sys_read32((mem_addr_t)&base->doepmsk);
2686-
daint = sys_read32((mem_addr_t)&base->daint);
2684+
epint = usb_dwc2_get_daint_outepint(sys_read32((mem_addr_t)&base->daint));
26872685

2688-
for (uint8_t n = 0U; n < n_max; n++) {
2686+
while (epint) {
2687+
uint8_t n = find_lsb_set(epint) - 1;
26892688
mem_addr_t doepint_reg = (mem_addr_t)&base->out_ep[n].doepint;
26902689
uint32_t doepint;
26912690
uint32_t status;
26922691

2693-
if (!(daint & USB_DWC2_DAINT_OUTEPINT(n))) {
2694-
continue;
2695-
}
2696-
26972692
/* Read and clear interrupt status */
26982693
doepint = sys_read32(doepint_reg);
26992694
status = doepint & doepmsk;
@@ -2739,6 +2734,8 @@ static inline void dwc2_handle_oepint(const struct device *dev)
27392734
if (status & USB_DWC2_DOEPINT_XFERCOMPL) {
27402735
dwc2_handle_out_xfercompl(dev, n);
27412736
}
2737+
2738+
epint &= ~BIT(n);
27422739
}
27432740

27442741
/* Clear OEPINT interrupt */

0 commit comments

Comments
 (0)