drivers: udc_dwc2: Optimize endpoint interrupt handling

tmon-nordic · kartben · commit 8b4c53e05ade · 2025-05-20T12:47:34.000+02:00
SEGGER Ozone J-Trace Code Profile identified iterations over daint value
as hot path. The iterations show at the very top of code profile because
full iteration happens whenever there is any activity on endpoint.

Optimize daint handling loops so only set bits are iterated over. While
this optimization depends on find_lsb_set() efficiency, it seems to be
worth it solely on the basis that quite often only few bits are set.

After a bit deeper analysis, I was suprised that on ARM Cortex-M33 the
find_lsb_set() approach is faster than naive iteration even if all bits
are set (which is extreme case because USB applications are unlikely to
use all 16 IN and 16 OUT endpoints simultaneously). This is due to fact
that there is only one conditional jump CBNZ and find_lsb_set() - 1
translates to RBIT + CLZ and then clearing the bit uses LSL.W + BIC.W.
Whereas the naive itation uses ADDS + CMP + BNE for the loop handling
and also has LSR.W + LSLS + BPL (+ ADD.W instruction on each iteration
to add 16 for OUT endpoints) for the continue check. Therefore the
optimized code on ARM Cortex-M33 is never worse than naive iteration.

Signed-off-by: Tomasz Moń &lt;tomasz.mon@nordicsemi.no&gt;
diff --git a/drivers/usb/common/usb_dwc2_hw.h b/drivers/usb/common/usb_dwc2_hw.h
@@ -780,6 +780,14 @@ USB_DWC2_GET_FIELD_DEFINE(dsts_enumspd, DSTS_ENUMSPD)
 
 /* Device all endpoints interrupt registers */
 #define USB_DWC2_DAINT				0x0818UL
+#define USB_DWC2_DAINT_OUTEPINT_POS		16UL
+#define USB_DWC2_DAINT_OUTEPINT_MASK		(0xFFFFUL << USB_DWC2_DAINT_OUTEPINT_POS)
+#define USB_DWC2_DAINT_INEPINT_POS		0UL
+#define USB_DWC2_DAINT_INEPINT_MASK		(0xFFFFUL << USB_DWC2_DAINT_INEPINT_POS)
+
+USB_DWC2_GET_FIELD_DEFINE(daint_outepint, DAINT_OUTEPINT)
+USB_DWC2_GET_FIELD_DEFINE(daint_inepint, DAINT_INEPINT)
+
 #define USB_DWC2_DAINTMSK			0x081CUL
 #define USB_DWC2_DAINT_OUTEPINT(ep_num)		BIT(16UL + ep_num)
 #define USB_DWC2_DAINT_INEPINT(ep_num)		BIT(ep_num)
diff --git a/drivers/usb/udc/udc_dwc2.c b/drivers/usb/udc/udc_dwc2.c
@@ -2570,32 +2570,31 @@ static inline void dwc2_handle_in_xfercompl(const struct device *dev,
 static inline void dwc2_handle_iepint(const struct device *dev)
 {
 	struct usb_dwc2_reg *const base = dwc2_get_base(dev);
-	const uint8_t n_max = 16;
 	uint32_t diepmsk;
-	uint32_t daint;
+	uint32_t epint;
 
 	diepmsk = sys_read32((mem_addr_t)&base->diepmsk);
-	daint = sys_read32((mem_addr_t)&base->daint);
+	epint = usb_dwc2_get_daint_inepint(sys_read32((mem_addr_t)&base->daint));
 
-	for (uint8_t n = 0U; n < n_max; n++) {
+	while (epint) {
+		uint8_t n = find_lsb_set(epint) - 1;
 		mem_addr_t diepint_reg = (mem_addr_t)&base->in_ep[n].diepint;
 		uint32_t diepint;
 		uint32_t status;
 
-		if (daint & USB_DWC2_DAINT_INEPINT(n)) {
-			/* Read and clear interrupt status */
-			diepint = sys_read32(diepint_reg);
-			status = diepint & diepmsk;
-			sys_write32(status, diepint_reg);
-
-			LOG_DBG("ep 0x%02x interrupt status: 0x%x",
-				n | USB_EP_DIR_IN, status);
+		/* Read and clear interrupt status */
+		diepint = sys_read32(diepint_reg);
+		status = diepint & diepmsk;
+		sys_write32(status, diepint_reg);
 
-			if (status & USB_DWC2_DIEPINT_XFERCOMPL) {
-				dwc2_handle_in_xfercompl(dev, n);
-			}
+		LOG_DBG("ep 0x%02x interrupt status: 0x%x",
+			n | USB_EP_DIR_IN, status);
 
+		if (status & USB_DWC2_DIEPINT_XFERCOMPL) {
+			dwc2_handle_in_xfercompl(dev, n);
 		}
+
+		epint &= ~BIT(n);
 	}
 
 	/* Clear IEPINT interrupt */
@@ -2678,22 +2677,18 @@ static inline void dwc2_handle_oepint(const struct device *dev)
 {
 	struct usb_dwc2_reg *const base = dwc2_get_base(dev);
 	struct udc_dwc2_data *const priv = udc_get_private(dev);
-	const uint8_t n_max = 16;
 	uint32_t doepmsk;
-	uint32_t daint;
+	uint32_t epint;
 
 	doepmsk = sys_read32((mem_addr_t)&base->doepmsk);
-	daint = sys_read32((mem_addr_t)&base->daint);
+	epint = usb_dwc2_get_daint_outepint(sys_read32((mem_addr_t)&base->daint));
 
-	for (uint8_t n = 0U; n < n_max; n++) {
+	while (epint) {
+		uint8_t n = find_lsb_set(epint) - 1;
 		mem_addr_t doepint_reg = (mem_addr_t)&base->out_ep[n].doepint;
 		uint32_t doepint;
 		uint32_t status;
 
-		if (!(daint & USB_DWC2_DAINT_OUTEPINT(n))) {
-			continue;
-		}
-
 		/* Read and clear interrupt status */
 		doepint = sys_read32(doepint_reg);
 		status = doepint & doepmsk;
@@ -2739,6 +2734,8 @@ static inline void dwc2_handle_oepint(const struct device *dev)
 		if (status & USB_DWC2_DOEPINT_XFERCOMPL) {
 			dwc2_handle_out_xfercompl(dev, n);
 		}
+
+		epint &= ~BIT(n);
 	}
 
 	/* Clear OEPINT interrupt */