diff --git a/components/sdmmc/Kconfig b/components/sdmmc/Kconfig index 0d2ed43feeec..0acfebdc4da0 100644 --- a/components/sdmmc/Kconfig +++ b/components/sdmmc/Kconfig @@ -5,4 +5,21 @@ menu "SD Protocol Layer Configuration" help Enable SDIO support. Disabling this will skip SDIO-specific initialization steps + config SD_UNALIGNED_MULTI_BLOCK_RW_MAX_CHUNK_SIZE + int "The maximum size of the chunks a SDMMC read/write to/from an unaligned buffer will be split into" + range 1 99999 + default 1 + help + The maximum size in blocks of the chunks a SDMMC read/write with an unaligned buffer will be split into. + The SDMMC driver requires aligned buffers for DMA access. If unaligned buffers are passed and the host's + dma_aligned_buffer is NULL, an aligned temporary buffer must be allocated for the actual transfer. + This option defines the maximum size for the temporary buffer, which equals this option's value multiplied + with the block size (typically 512 Bytes). A value of 16 therefore leads to up to 8192 bytes being + allocated on the heap for each transfer. The allocated buffer will never be larger than the number of bytes + to transfer in total. + It also decides whether single (value == 1) or multi block read/write (value > 1) commands are used. + With the default value of 1 single-block read/write commands will be used with the allocated buffer size + matching the block size. + You should keep this option at 1 if your card or configuration doesn't support the read or write multiple + blocks commands (CMD18 & CMD25). endmenu diff --git a/components/sdmmc/include/sd_protocol_types.h b/components/sdmmc/include/sd_protocol_types.h index d7c5196278db..1e0595c3a2b1 100644 --- a/components/sdmmc/include/sd_protocol_types.h +++ b/components/sdmmc/include/sd_protocol_types.h @@ -224,7 +224,17 @@ typedef struct { sdmmc_delay_phase_t input_delay_phase; /*!< input delay phase, this will only take into effect when the host works in SDMMC_FREQ_HIGHSPEED or SDMMC_FREQ_52M. Driver will print out how long the delay is*/ esp_err_t (*set_input_delay)(int slot, sdmmc_delay_phase_t delay_phase); /*!< set input delay phase */ esp_err_t (*set_input_delayline)(int slot, sdmmc_delay_line_t delay_line); /*!< set input delay line */ - void* dma_aligned_buffer; /*!< Leave it NULL. Reserved for cache aligned buffers for SDIO mode */ + /** + * @brief Cache aligned buffer for multi-block RW and IO commands + * + * Use cases: + * - Temporary buffer for multi-block read/write transactions to/from unaligned buffers. + * Allocate with DMA capable memory, size should be an integer multiple of your card's sector size. + * See also Kconfig option SD_UNALIGNED_MULTI_BLOCK_RW_MAX_CHUNK_SIZE. + * - Cache aligned buffer for IO commands in SDIO mode. + * If you allocate manually, make sure it is at least SDMMC_IO_BLOCK_SIZE bytes large. + */ + void* dma_aligned_buffer; sd_pwr_ctrl_handle_t pwr_ctrl_handle; /*!< Power control handle */ bool (*check_buffer_alignment)(int slot, const void *buf, size_t size); /*!< Check if buffer meets alignment requirements */ esp_err_t (*is_slot_set_to_uhs1)(int slot, bool *is_uhs1); /*!< host slot is set to uhs1 or not*/ diff --git a/components/sdmmc/sdmmc_cmd.c b/components/sdmmc/sdmmc_cmd.c index a928dee6d2ec..2e7f992ea86b 100644 --- a/components/sdmmc/sdmmc_cmd.c +++ b/components/sdmmc/sdmmc_cmd.c @@ -5,8 +5,12 @@ */ #include +#include // for MIN/MAX #include "esp_private/sdmmc_common.h" +// the maximum size in blocks of the chunks a SDMMC write/read will be split into +#define MAX_NUM_BLOCKS_PER_MULTI_BLOCK_RW (16u) + static const char* TAG = "sdmmc_cmd"; @@ -462,31 +466,47 @@ esp_err_t sdmmc_write_sectors(sdmmc_card_t* card, const void* src, err = sdmmc_write_sectors_dma(card, src, start_block, block_count, block_size * block_count); } else { // SDMMC peripheral needs DMA-capable buffers. Split the write into - // separate single block writes, if needed, and allocate a temporary + // separate (multi) block writes, if needed, and allocate a temporary // DMA-capable buffer. - void *tmp_buf = NULL; - size_t actual_size = 0; - // We don't want to force the allocation into SPIRAM, the allocator - // will decide based on the buffer size and memory availability. - tmp_buf = heap_caps_malloc(block_size, MALLOC_CAP_DMA); - if (!tmp_buf) { - ESP_LOGE(TAG, "%s: not enough mem, err=0x%x", __func__, ESP_ERR_NO_MEM); - return ESP_ERR_NO_MEM; + size_t blocks_per_write = MIN(CONFIG_SD_UNALIGNED_MULTI_BLOCK_RW_MAX_CHUNK_SIZE, block_count); + + // prefer using DMA aligned buffer if available over allocating local temporary buffer + bool use_dma_aligned_buffer = (card->host.dma_aligned_buffer != NULL); + void* buf = use_dma_aligned_buffer ? card->host.dma_aligned_buffer : NULL; + + // only allocate temporary buffer if we can't use the dma_aligned buffer + if (!use_dma_aligned_buffer) { + // We don't want to force the allocation into SPIRAM, the allocator + // will decide based on the buffer size and memory availability. + buf = heap_caps_malloc(block_size * blocks_per_write, MALLOC_CAP_DMA); + if (!buf) { + ESP_LOGE(TAG, "%s: not enough mem, err=0x%x", __func__, ESP_ERR_NO_MEM); + return ESP_ERR_NO_MEM; + } + } + size_t actual_size = heap_caps_get_allocated_size(buf); + blocks_per_write = actual_size / card->csd.sector_size; + if (blocks_per_write == 0) { + ESP_LOGE(TAG, "%s: buffer smaller than sector size: buf=%d, sector=%d", actual_size, card->csd.sector_size); + return ESP_ERR_INVALID_SIZE; } - actual_size = heap_caps_get_allocated_size(tmp_buf); const uint8_t* cur_src = (const uint8_t*) src; - for (size_t i = 0; i < block_count; ++i) { - memcpy(tmp_buf, cur_src, block_size); - cur_src += block_size; - err = sdmmc_write_sectors_dma(card, tmp_buf, start_block + i, 1, actual_size); + for (size_t i = 0; i < block_count; i += blocks_per_write) { + // make sure not to write more than the remaining blocks, i.e. block_count - i + blocks_per_write = MIN(blocks_per_write, (block_count - i)); + memcpy(buf, cur_src, block_size * blocks_per_write); + cur_src += block_size * blocks_per_write; + err = sdmmc_write_sectors_dma(card, buf, start_block + i, blocks_per_write, actual_size); if (err != ESP_OK) { - ESP_LOGD(TAG, "%s: error 0x%x writing block %d+%d", - __func__, err, start_block, i); + ESP_LOGD(TAG, "%s: error 0x%x writing blocks %d+[%d..%d]", + __func__, err, start_block, i, i + blocks_per_write - 1); break; } } - free(tmp_buf); + if (!use_dma_aligned_buffer) { + free(buf); + } } return err; } @@ -600,33 +620,50 @@ esp_err_t sdmmc_read_sectors(sdmmc_card_t* card, void* dst, err = sdmmc_read_sectors_dma(card, dst, start_block, block_count, block_size * block_count); } else { // SDMMC peripheral needs DMA-capable buffers. Split the read into - // separate single block reads, if needed, and allocate a temporary + // separate (multi) block reads, if needed, and allocate a temporary // DMA-capable buffer. - void *tmp_buf = NULL; - size_t actual_size = 0; - tmp_buf = heap_caps_malloc(block_size, MALLOC_CAP_DMA); - if (!tmp_buf) { - ESP_LOGE(TAG, "%s: not enough mem, err=0x%x", __func__, ESP_ERR_NO_MEM); - return ESP_ERR_NO_MEM; + size_t blocks_per_read = MIN(CONFIG_SD_UNALIGNED_MULTI_BLOCK_RW_MAX_CHUNK_SIZE, block_count); + + // prefer using DMA aligned buffer if available over allocating local temporary buffer + bool use_dma_aligned_buffer = (card->host.dma_aligned_buffer != NULL); + void* buf = use_dma_aligned_buffer ? card->host.dma_aligned_buffer : NULL; + + // only allocate temporary buffer if we can't use the dma_aligned buffer + if (!use_dma_aligned_buffer) { + // We don't want to force the allocation into SPIRAM, the allocator + // will decide based on the buffer size and memory availability. + buf = heap_caps_malloc(block_size * blocks_per_read, MALLOC_CAP_DMA); + if (!buf) { + ESP_LOGE(TAG, "%s: not enough mem, err=0x%x", __func__, ESP_ERR_NO_MEM); + return ESP_ERR_NO_MEM; + } + } + size_t actual_size = heap_caps_get_allocated_size(buf); + blocks_per_read = actual_size / card->csd.sector_size; + if (blocks_per_read == 0) { + ESP_LOGE(TAG, "%s: buffer smaller than sector size: buf=%d, sector=%d", actual_size, card->csd.sector_size); + return ESP_ERR_INVALID_SIZE; } - actual_size = heap_caps_get_allocated_size(tmp_buf); uint8_t* cur_dst = (uint8_t*) dst; - for (size_t i = 0; i < block_count; ++i) { - err = sdmmc_read_sectors_dma(card, tmp_buf, start_block + i, 1, actual_size); + for (size_t i = 0; i < block_count; i += blocks_per_read) { + // make sure not to read more than the remaining blocks, i.e. block_count - i + blocks_per_read = MIN(blocks_per_read, (block_count - i)); + err = sdmmc_read_sectors_dma(card, buf, start_block + i, blocks_per_read, actual_size); if (err != ESP_OK) { - ESP_LOGD(TAG, "%s: error 0x%x writing block %d+%d", - __func__, err, start_block, i); + ESP_LOGD(TAG, "%s: error 0x%x reading blocks %d+[%d..%d]", + __func__, err, start_block, i, i + blocks_per_read - 1); break; } - memcpy(cur_dst, tmp_buf, block_size); - cur_dst += block_size; + memcpy(cur_dst, buf, block_size * blocks_per_read); + cur_dst += block_size * blocks_per_read; + } + if (!use_dma_aligned_buffer) { + free(buf); } - free(tmp_buf); } return err; } - esp_err_t sdmmc_read_sectors_dma(sdmmc_card_t* card, void* dst, size_t start_block, size_t block_count, size_t buffer_len) {