Improve read performance by switching to DMA and mult-block operations

This commit is contained in:
Mario Hüttel 2020-03-01 20:13:31 +01:00
parent e616e22dd7
commit 04a1714237
2 changed files with 354 additions and 118 deletions

View File

@ -2,14 +2,20 @@
#include "shimatta_sdio_config.h"
#include <cmsis/core_cm4.h>
#include <stm32f4xx.h>
#include <string.h>
#include <stdbool.h>
#ifndef CONCAT
#define CONCAT(x,y) x##y
#define XCONCAT(x,y) CONCAT(x,y)
#endif
extern void sdio_wait_ms(unsigned int i);
#define SETAF(PORT,PIN,AF) PORT->AFR[(PIN < 8 ? 0 : 1)] |= AF << ((PIN < 8 ? PIN : (PIN - 8)) * 4)
#define READCTRL ((BLOCKSIZE << 4) | SDIO_DCTRL_DMAEN)
#define DMAP2M (DMA_SxCR_CHSEL_2 | DMA_SxCR_PBURST_0 | DMA_SxCR_MBURST_0 | DMA_SxCR_MSIZE_1 | DMA_SxCR_PSIZE_1 | DMA_SxCR_MINC | DMA_SxCR_PFCTRL)
#define DMAM2P (DMA_SxCR_CHSEL_2 | DMA_SxCR_PBURST_0 | DMA_SxCR_MBURST_0 | DMA_SxCR_MSIZE_1 | DMA_SxCR_PSIZE_1 | DMA_SxCR_MINC | DMA_SxCR_PFCTRL | DMA_SxCR_DIR_0)
#define DMAP2M (DMA_SxCR_CHSEL_2 | DMA_SxCR_PBURST_0 | /*DMA_SxCR_MBURST_0 |*/ DMA_SxCR_MSIZE_1 | DMA_SxCR_PSIZE_1 | DMA_SxCR_MINC | DMA_SxCR_PFCTRL)
#define DMAM2P (DMA_SxCR_CHSEL_2 | DMA_SxCR_PBURST_0 | /*DMA_SxCR_MBURST_0 |*/ DMA_SxCR_MSIZE_1 | DMA_SxCR_PSIZE_1 | DMA_SxCR_MINC | DMA_SxCR_PFCTRL | DMA_SxCR_DIR_0)
#define SHORT_ANS 1
#define LONG_ANS 3
#define NO_ANS 0
@ -28,6 +34,12 @@ typedef uint8_t CID_t;
static struct sd_info card_info; // = {.type = CARD_NONE};
#if USE_DMA
static volatile char aligned_sector_buff_one[1<<BLOCKSIZE];
static volatile char aligned_sector_buff_two[1<<BLOCKSIZE];
static volatile char *aligned_sector_buffs[2] = {aligned_sector_buff_one, aligned_sector_buff_two};
#endif
/**
* @brief checkNotInserted
* @return return 0 if card is inserted, else 1
@ -77,13 +89,6 @@ static int sdio_get_response(uint8_t expected_command, uint8_t type_of_answer, u
while (1) {
sdio_status = SDIO->STA;
/* Check if a valid response was received */
if (sdio_status & SDIO_STA_CMDREND)
break;
if ((sdio_status & SDIO_STA_CMDSENT) && (type_of_answer == NO_ANS))
break; // No response required
/* Exclude ACMD41 and CMD2 from valid CRC check */
if ((sdio_status & SDIO_STA_CCRCFAIL)) {
if(expected_command == 0xff) {
@ -95,6 +100,14 @@ static int sdio_get_response(uint8_t expected_command, uint8_t type_of_answer, u
if (sdio_status & SDIO_STA_CTIMEOUT)
return -CTIMEOUT;
/* Check if a valid response was received */
if (sdio_status & SDIO_STA_CMDREND)
break;
if ((sdio_status & SDIO_STA_CMDSENT) && (type_of_answer == NO_ANS))
break; // No response required
}
/* Valid Respone Received */
@ -183,13 +196,12 @@ static int sdio_send_csd_cmd9(uint16_t rca, uint32_t *response_buffer) {
* @param dlen Data length. Must be a multiple of 4 bytes
* @param blklen Log2 of block length (9 in case of 512 byte block)
* @param buff Buffer to send
* @return -1 in case of error like underrun
*/
static void sdio_write_buffer(uint32_t dlen, uint32_t log_blklen, const unsigned char *buff)
static int __attribute__((optimize("O3"))) sdio_write_buffer(uint32_t dlen, uint32_t log_blklen, const unsigned char *buff)
{
uint32_t count;
int byte_count;
int byte_max;
uint32_t fifo;
uint32_t fifo_buff[8];
SDIO->DLEN = dlen;
@ -199,28 +211,32 @@ static void sdio_write_buffer(uint32_t dlen, uint32_t log_blklen, const unsigned
SDIO_ICR_STBITERRC | SDIO_ICR_DBCKENDC | SDIO_ICR_SDIOITC | SDIO_ICR_CEATAENDC;
SDIO->DCTRL = (log_blklen<<4) | SDIO_DCTRL_DTEN;
for (count = 0; count < dlen; count += 4) {
fifo = 0;
if ((dlen - count) < 4)
byte_max = dlen - count;
else
byte_max = 4;
for (byte_count = 0; byte_count < byte_max; byte_count++) {
fifo >>= 8;
fifo |= (((uint32_t)*(buff++)) << 24) & 0xFF000000;
while (dlen >= 32) {
memcpy(fifo_buff, buff, 32);
/* Wait for 8 data words to be available */
while (!(SDIO->STA & SDIO_STA_TXFIFOHE));
for (count = 0; count < 8; count++) {
SDIO->FIFO = fifo_buff[count];
}
dlen -= 32;
buff += 32;
}
/* Wait as long as FIFO is full */
while (SDIO->STA & SDIO_STA_TXFIFOF);
/* Write data to FIFO */
SDIO->FIFO = fifo;
if (dlen) {
memcpy(fifo_buff, buff, dlen);
while (!(SDIO->STA & SDIO_STA_TXFIFOHE));
for (count = 0; count < (dlen / 4); count++) {
SDIO->FIFO = fifo_buff[count];
}
}
/* Wait for TX to complete */
while (SDIO->STA & SDIO_STA_TXACT);
if (SDIO->STA & SDIO_STA_TXUNDERR)
return -1;
else
return 0;
}
static int sdio_send_write_block_cmd24(uint32_t addr)
@ -232,6 +248,15 @@ static int sdio_send_write_block_cmd24(uint32_t addr)
return sdio_get_response(24, SHORT_ANS, &response);
}
static int sdio_send_stop_cmd12()
{
uint32_t response;
sdio_send_cmd(12, 0UL, SHORT_ANS);
return sdio_get_response(12, SHORT_ANS, &response);
}
static int sdio_check_status_register_cmd13(uint16_t rca, uint32_t *status)
{
int timeout = 0x20;
@ -373,7 +398,6 @@ static void sdio_init_hw()
static int sdio_send_read_block_cmd17(uint32_t addr)
{
uint32_t response;
int retry;
int ret;
sdio_send_cmd(17, addr, SHORT_ANS);
@ -381,6 +405,16 @@ static int sdio_send_read_block_cmd17(uint32_t addr)
return ret;
}
static int sdio_send_read_multiple_blocks_cmd18(uint32_t addr)
{
uint32_t response;
int ret;
sdio_send_cmd(18, addr, SHORT_ANS);
ret = sdio_get_response(18, SHORT_ANS, &response);
return ret;
}
static int sdio_send_all_send_cid_cmd2()
{
uint32_t response[4];
@ -475,6 +509,262 @@ static int sdio_send_select_card_cmd7(uint16_t rca) {
return res;
}
static void sdio_dma_clear_flags()
{
#if USE_DMA
/* Configure read DMA */
#if DMASTREAM_NO > 3
DMA2->HIFCR |= XCONCAT(DMA_HIFCR_CFEIF, DMASTREAM_NO) |
XCONCAT(DMA_HIFCR_CHTIF, DMASTREAM_NO) |
XCONCAT(DMA_HIFCR_CTCIF, DMASTREAM_NO) |
XCONCAT(DMA_HIFCR_CTEIF, DMASTREAM_NO) |
XCONCAT(DMA_HIFCR_CDMEIF, DMASTREAM_NO);
#else
DMA2->LIFCR |= XCONCAT(DMA_HIFCR_CFEIF, DMASTREAM_NO) |
XCONCAT(DMA_HIFCR_CHTIF, DMASTREAM_NO) |
XCONCAT(DMA_HIFCR_CTCIF, DMASTREAM_NO) |
XCONCAT(DMA_HIFCR_CTEIF, DMASTREAM_NO) |
XCONCAT(DMA_HIFCR_CDMEIF, DMASTREAM_NO);
#endif
#endif
}
static void sdio_dma_disable()
{
DMASTREAM->CR = 0x0UL;
}
static void sdio_data_transfer_disable()
{
SDIO->DCTRL = 0;
}
static int sdio_dma_check_error()
{
uint32_t status_reg;
#if DMASTREAM_NO > 3
status_reg = DMA2->HISR;
if (status_reg & XCONCAT(DMA_HISR_TEIF, DMASTREAM_NO))
return -1;
else
return 0;
#else
status_reg = DMA2->LISR;
if (status_reg & XCONCAT(DMA_LISR_TEIF, DMASTREAM_NO))
return -1;
else
return 0;
#endif
}
static int sdio_wait_for_dma_transfer(bool read)
{
uint32_t sdio_sta_reg;
while (1) {
sdio_sta_reg = SDIO->STA;
if (sdio_sta_reg & SDIO_STA_DCRCFAIL) {
sdio_dma_disable();
return -2;
}
if (sdio_sta_reg & SDIO_STA_DTIMEOUT) {
sdio_dma_disable();
return -1;
}
/* Handle FIFO over- / underruns */
if (read) {
if (sdio_sta_reg & SDIO_STA_RXOVERR)
return -3;
} else {
if (sdio_sta_reg & SDIO_STA_TXUNDERR)
return -3;
}
/* Data transferred */
if (sdio_sta_reg & SDIO_STA_DATAEND) {
break;
}
}
/* Wait for DMA to finish copying */
while(DMASTREAM->CR & DMA_SxCR_EN);
if (sdio_dma_check_error())
return -4;
return 0;
}
static void sdio_config_rx_dma(volatile void *buff)
{
sdio_dma_clear_flags();
DMASTREAM->NDTR = 0;
DMASTREAM->FCR = DMA_SxFCR_FTH_0 | DMA_SxFCR_FTH_1 | DMA_SxFCR_DMDIS;
DMASTREAM->M0AR = (uint32_t)(buff);
DMASTREAM->PAR = (uint32_t)&(SDIO->FIFO);
DMASTREAM->CR = DMAP2M | DMA_SxCR_PL_1 | DMA_SxCR_PL_1;
DMASTREAM->CR |= DMA_SxCR_EN;
}
static void sdio_config_sdio_data_tran(uint32_t byte_len, bool read, bool dma)
{
SDIO->DLEN = byte_len;
SDIO->DTIMER = DTIMEOUT;
SDIO->ICR = SDIO_ICR_CCRCFAILC | SDIO_ICR_DCRCFAILC | SDIO_ICR_CTIMEOUTC | SDIO_ICR_DTIMEOUTC |
SDIO_ICR_TXUNDERRC | SDIO_ICR_RXOVERRC | SDIO_ICR_CMDRENDC | SDIO_ICR_CMDSENTC |
SDIO_ICR_DATAENDC |
SDIO_ICR_STBITERRC | SDIO_ICR_DBCKENDC | SDIO_ICR_SDIOITC | SDIO_ICR_CEATAENDC;
SDIO->DCTRL = (BLOCKSIZE<<4) | (read ? SDIO_DCTRL_DTDIR : 0) | (dma ? SDIO_DCTRL_DMAEN : 0) | SDIO_DCTRL_DTEN;
}
#if USE_DMA
static int sdio_read_blocks_dma(uint32_t block_count, uint32_t sector_addr, void *dest_buffer)
{
uint32_t addr;
int status;
bool use_unaligned_workaround = false;
uint32_t buff_id = 0;
uint32_t count = 0;
char *ins_ptr;
if ((uint32_t)dest_buffer & 0x3UL)
use_unaligned_workaround = true;
else
use_unaligned_workaround = false;
if (card_info.type == SD_V2_HC)
addr = sector_addr;
else
addr = sector_addr * (1U<<BLOCKSIZE);
if (use_unaligned_workaround) {
while (count < block_count) {
sdio_config_rx_dma(aligned_sector_buffs[buff_id]);
sdio_config_sdio_data_tran(1UL<<BLOCKSIZE, true, true);
/* Init Transfer */
if (sdio_send_read_block_cmd17(addr)) {
sdio_dma_disable();
sdio_data_transfer_disable();
return -1;
}
/* Copy the old buffer */
if (count >= 1) {
ins_ptr = &((char *)dest_buffer)[(count - 1) * (1UL<<BLOCKSIZE)];
memcpy(ins_ptr, (const void *)aligned_sector_buffs[buff_id ^ 1UL], (1UL<<BLOCKSIZE));
}
/* Switch to incative buffer */
buff_id ^= 1;
count++;
addr += ((card_info.type == SD_V2_HC) ? 1 : (1UL<<BLOCKSIZE));
status = sdio_wait_for_dma_transfer(true);
if (status) {
/* Handle error */
return status;
}
}
/* Copy the last transfer */
ins_ptr = &((char *)dest_buffer)[(count - 1) * (1U<<BLOCKSIZE)];
memcpy(ins_ptr, (const void *)aligned_sector_buffs[buff_id ^ 1U], (1U<<BLOCKSIZE));
} else {
/* Do pure DMA transfer. This is also able to handle multi-sector reads */
sdio_config_rx_dma(dest_buffer);
sdio_config_sdio_data_tran(block_count * (1UL<<BLOCKSIZE), true, true);
/* Send multi-block read cmd in case of multiple blocks */
if (block_count > 1)
status = sdio_send_read_multiple_blocks_cmd18(addr);
else
status = sdio_send_read_block_cmd17(addr);
if (status) {
sdio_dma_disable();
sdio_data_transfer_disable();
return -1;
}
status = sdio_wait_for_dma_transfer(true);
if (status)
return -1;
if (block_count > 1)
sdio_send_stop_cmd12();
}
return 0;
}
#else
static int sdio_read_blocks_polling(uint32_t block_count, uint32_t sector_addr, void *dest_buffer)
{
uint32_t sdio_sta_reg;
uint32_t fifo_read;
uint32_t addr;
char *ptr;
int ret_val;
int status;
ptr = (char *)dest_buffer;
addr = (card_info.type == SD_V2_HC ? sector_addr : sector_addr * (1UL<<BLOCKSIZE));
sdio_config_sdio_data_tran(block_count * (1UL<<BLOCKSIZE), true, false);
if (block_count > 1)
status = sdio_send_read_multiple_blocks_cmd18(addr);
else
status = sdio_send_read_block_cmd17(addr);
if (status) {
ret_val = -1;
goto return_val;
}
while (1) {
sdio_sta_reg = SDIO->STA;
if (sdio_sta_reg & SDIO_STA_RXDAVL) {
fifo_read = SDIO->FIFO;
memcpy(ptr, &fifo_read, sizeof(uint32_t));
}
if (sdio_sta_reg & SDIO_STA_RXOVERR ||
sdio_sta_reg & SDIO_STA_DCRCFAIL ||
sdio_sta_reg & SDIO_STA_DTIMEOUT) {
sdio_data_transfer_disable();
ret_val = -3;
goto stop_transmission;
}
if (sdio_sta_reg & SDIO_STA_DATAEND && sdio_sta_reg & SDIO_STA_DBCKEND) {
ret_val = 0;
break;
}
}
stop_transmission:
if (block_count > 1)
sdio_send_stop_cmd12();
return_val:
return ret_val;
}
#endif /* USE_DMA */
DSTATUS sdio_status()
{
DSTATUS returnval = 0;
@ -553,7 +843,7 @@ DSTATUS sdio_initialize(){
return STA_NOINIT;
break;
case CMD8_RESP_TIMEOUT: // SDV1 Card
hcs_flag=0;
hcs_flag = 0;
break;
default:
return STA_NOINIT;
@ -604,92 +894,33 @@ DSTATUS sdio_initialize(){
}
DRESULT sdio_disk_read(BYTE *buff, DWORD sector, UINT count){
uint32_t addr;
uint32_t sdio_status;
uint32_t fifo;
uint32_t counter;
union sdio_status_conv status;
int status;
union sdio_status_conv card_status;
addr = (card_info.type == SD_V2_HC ? (sector) : (sector*512));
for (; count > 0; count--) {
if (!buff || !count)
return RES_PARERR;
/* configure read DMA */
// DMA2->LIFCR = 0xffffffff;
// DMA2->HIFCR = 0xffffffff;
// DMASTREAM->NDTR = 0;
// DMASTREAM->FCR = DMA_SxFCR_FTH_0 | DMA_SxFCR_FTH_1 | DMA_SxFCR_DMDIS;
// DMASTREAM->M0AR = (uint32_t)(buff);
// DMASTREAM->PAR = (uint32_t)&(SDIO->FIFO);
// DMASTREAM->CR = DMAP2M | DMA_SxCR_PL_1 | DMA_SxCR_PL_1;
// DMASTREAM->CR |= DMA_SxCR_EN;
do {
sdio_check_status_register_cmd13(card_info.rca, &card_status.value);
} while (card_status.statusstruct.CURRENT_STATE == CURRENT_STATE_PRG);
do {
sdio_check_status_register_cmd13(card_info.rca, &status.value);
} while (status.statusstruct.CURRENT_STATE != CURRENT_STATE_TRAN);
SDIO->DLEN = (1 << BLOCKSIZE);
SDIO->DTIMER = DTIMEOUT;
SDIO->ICR = SDIO_ICR_CCRCFAILC | SDIO_ICR_DCRCFAILC | SDIO_ICR_CTIMEOUTC | SDIO_ICR_DTIMEOUTC |
SDIO_ICR_TXUNDERRC | SDIO_ICR_RXOVERRC | SDIO_ICR_CMDRENDC | SDIO_ICR_CMDSENTC | SDIO_ICR_DATAENDC |
SDIO_ICR_STBITERRC | SDIO_ICR_DBCKENDC | SDIO_ICR_SDIOITC | SDIO_ICR_CEATAENDC;
SDIO->DCTRL = (BLOCKSIZE<<4) | SDIO_DCTRL_DTDIR | /*SDIO_DCTRL_DMAEN |*/ SDIO_DCTRL_DTEN;
/* Init Transfer */
if (sdio_send_read_block_cmd17(addr)) {
if (card_status.statusstruct.CURRENT_STATE != CURRENT_STATE_TRAN) {
status = sdio_send_select_card_cmd7(card_info.rca);
if (status)
return RES_ERROR;
}
counter = 0;
while (counter < (1<<(BLOCKSIZE-2)) || !(SDIO->STA & (SDIO_STA_DBCKEND | SDIO_STA_DATAEND))) {
/* TODO: Handle errors */
if (SDIO->STA & (SDIO_STA_DCRCFAIL | SDIO_STA_DTIMEOUT | SDIO_STA_STBITERR))
{
return RES_ERROR;
}
if (SDIO->STA & SDIO_STA_RXDAVL) {
counter++;
fifo = SDIO->FIFO;
*(buff++) = (BYTE)(fifo & 0xFF);
fifo >>= 8;
*(buff++) = (BYTE)(fifo & 0xFF);
fifo >>= 8;
*(buff++) = (BYTE)(fifo & 0xFF);
fifo >>= 8;
*(buff++) = (BYTE)(fifo & 0xFF);
}
}
if (SDIO->STA & SDIO_STA_DCRCFAIL) return RES_ERROR;
//while(DMASTREAM->CR & DMA_SxCR_EN);
while(1) {
__DSB();
__DMB();
sdio_status = SDIO->STA;
if (sdio_status & SDIO_STA_DCRCFAIL) {
return RES_ERROR;
}
if (sdio_status & SDIO_STA_DTIMEOUT) {
return RES_ERROR;
}
if (sdio_status & SDIO_STA_DATAEND) {
if (!(sdio_status & SDIO_STA_RXACT)) {
break;
}
}
}
if (card_info.type == SD_V2_HC) {
addr++;
} else {
addr += (1<<BLOCKSIZE);
}
}
#if USE_DMA
status = sdio_read_blocks_dma(count, sector, buff);
if (status)
return RES_ERROR;
#else
status = sdio_read_blocks_polling(count, sector, buff);
if (status)
return RES_ERROR;
#endif /* USE_DMA */
return RES_OK;
}
@ -719,10 +950,12 @@ DRESULT sdio_disk_write(const BYTE *buff, DWORD sector, UINT count)
} while (status.statusstruct.READY_FOR_DATA != 1);
ret = sdio_send_write_block_cmd24(addr);
if (ret) {
if (ret)
return RES_ERROR;
ret = sdio_write_buffer(512, 9, &buff[buff_offset]);
if (ret)
return RES_ERROR;
}
sdio_write_buffer(512, 9, &buff[buff_offset]);
buff_offset += 512;
addr += (card_info.type == SD_V2_HC ? 1 : 512);

View File

@ -14,13 +14,16 @@
//Initial Transfer CLK (ca. 400kHz)
#define INITCLK 130 //120
//Working CLK (Maximum)
#define WORKCLK 8 //0
#define WORKCLK 2 //0
//Data Timeout in CLK Cycles
#define DTIMEOUT 0x8000 //150
//DMA Stream used for TX and RX DMA2 Stream 3 or 6 possible
// Currently not used due to possible misalignment of the data buffer.
//#define DMASTREAM DMA2_Stream6
#define USE_DMA 1
#define DMASTREAM DMA2_Stream6
#define DMASTREAM_NO 6
/* Port Definitions */