-
Notifications
You must be signed in to change notification settings - Fork 308
Closed
Labels
performancePerformance seems to be not as good as it could bePerformance seems to be not as good as it could be
Milestone
Description
Actionable items from this investigation:
- Find a way to use ROM memset/memcpy - if it can't work, we should ensure memcpy and memset are in RAM somehow #1254
- Replace
const
s thisstatic
's if they're referenced in interrupt handling code - Find a way to get LLVM to either
- Put jump tables in RAM
- Don't emit jump tables for specific functions
- Linker magic to place them in RAM on a best effort basis
- Place compiler builtins into RAM Find a way to use ROM memset/memcpy - if it can't work, we should ensure memcpy and memset are in RAM somehow #1254 (comment)
- Xtensa: optimize get_configured_interrupts (see how RISCV does it in Improve RISC-V interrupt latency #1679) Improve xtensa interrupt latency #1735
Original issue:
With these two non-scientifical tests
Vectored
#![no_std]
#![no_main]
use core::cell::RefCell;
use critical_section::Mutex;
use esp32s3_hal::{
clock::ClockControl,
interrupt::{self},
peripherals::{self, Peripherals},
prelude::*,
system::{SoftwareInterrupt, SoftwareInterruptControl},
Delay,
};
use esp_backtrace as _;
static SWINT: Mutex<RefCell<Option<SoftwareInterruptControl>>> = Mutex::new(RefCell::new(None));
#[entry]
fn main() -> ! {
let peripherals = Peripherals::take();
let system = peripherals.SYSTEM.split();
let sw_int = system.software_interrupt_control;
let clocks = ClockControl::boot_defaults(system.clock_control).freeze();
critical_section::with(|cs| SWINT.borrow_ref_mut(cs).replace(sw_int));
interrupt::enable(
peripherals::Interrupt::FROM_CPU_INTR0,
interrupt::Priority::Priority3,
)
.unwrap();
let mut delay = Delay::new(&clocks);
loop {
delay.delay_ms(500u32);
critical_section::with(|cs| {
unsafe {
CLCK = esp32s3_hal::xtensa_lx::timer::get_cycle_count();
}
SWINT
.borrow_ref_mut(cs)
.as_mut()
.unwrap()
.raise(SoftwareInterrupt::SoftwareInterrupt0);
});
}
}
static mut CLCK: u32 = 0u32;
#[interrupt]
fn FROM_CPU_INTR0() {
let diff = unsafe { esp32s3_hal::xtensa_lx::timer::get_cycle_count() - CLCK };
esp_println::println!("diff = {}", diff);
critical_section::with(|cs| {
SWINT
.borrow_ref_mut(cs)
.as_mut()
.unwrap()
.reset(SoftwareInterrupt::SoftwareInterrupt0);
});
}
Non-vectored
#![no_std]
#![no_main]
use core::cell::RefCell;
use critical_section::Mutex;
use esp32s3_hal::{
clock::ClockControl,
interrupt::{self},
peripherals::{self, Peripherals},
prelude::*,
system::{SoftwareInterrupt, SoftwareInterruptControl},
Delay,
};
use esp_backtrace as _;
static SWINT: Mutex<RefCell<Option<SoftwareInterruptControl>>> = Mutex::new(RefCell::new(None));
#[entry]
fn main() -> ! {
let peripherals = Peripherals::take();
let system = peripherals.SYSTEM.split();
let sw_int = system.software_interrupt_control;
let clocks = ClockControl::boot_defaults(system.clock_control).freeze();
critical_section::with(|cs| SWINT.borrow_ref_mut(cs).replace(sw_int));
unsafe {
interrupt::disable(esp32s3_hal::get_core(), peripherals::Interrupt::SDIO_HOST);
interrupt::map(esp32s3_hal::get_core(), peripherals::Interrupt::FROM_CPU_INTR0, interrupt::CpuInterrupt::Interrupt23LevelPriority3);
esp32s3_hal::xtensa_lx::interrupt::enable_mask(
esp32s3_hal::xtensa_lx::interrupt::get_mask() | 1 << interrupt::CpuInterrupt::Interrupt23LevelPriority3 as u32,
);
}
let mut delay = Delay::new(&clocks);
loop {
delay.delay_ms(500u32);
critical_section::with(|cs| {
unsafe {
CLCK = esp32s3_hal::xtensa_lx::timer::get_cycle_count();
}
SWINT
.borrow_ref_mut(cs)
.as_mut()
.unwrap()
.raise(SoftwareInterrupt::SoftwareInterrupt0);
});
}
}
static mut CLCK: u32 = 0u32;
#[no_mangle]
extern "C" fn level3_interrupt() {
let diff = unsafe { esp32s3_hal::xtensa_lx::timer::get_cycle_count() - CLCK };
esp_println::println!("diff = {}", diff);
critical_section::with(|cs| {
SWINT
.borrow_ref_mut(cs)
.as_mut()
.unwrap()
.reset(SoftwareInterrupt::SoftwareInterrupt0);
});
}
I get these results
with saving floats
vectored = 1111 cycles
non-vectored = 214
w/o saving floats
vectored = 1096
non-vectored = 199
w/o saving floats, w/o spilling registers
vectored = 1043
non-vectored = 146
Latency is more than five times with vectoring enabled.
While it's more or less possible to use vectored
and non-vectored interrupts together on RISC-V that is not possible in a sane way for Xtensa currently. This hurts esp-wifi
a lot but also hurts async performance.
Metadata
Metadata
Assignees
Labels
performancePerformance seems to be not as good as it could bePerformance seems to be not as good as it could be
Type
Projects
Status
Done