[Cryptech-Commits] [sw/stm32] 02/02: Put thread stack buffers in SDRAM, because pkey uses a lot of stack.
git at cryptech.is
git at cryptech.is
Fri Jun 10 02:27:51 UTC 2016
This is an automated email from the git hooks/post-receive script.
paul at psgd.org pushed a commit to branch master
in repository sw/stm32.
commit 2a14d36ebd7bde9a3a6c98871050b14b54598389
Author: Paul Selkirk <paul at psgd.org>
AuthorDate: Thu Jun 9 18:53:31 2016 -0400
Put thread stack buffers in SDRAM, because pkey uses a lot of stack.
Also rearchitect the way we handle RPC requests - have a bunch of waiting
dispatch threads rather than continually creating and deleting threads.
---
.../TOOLCHAIN_GCC_ARM/STM32F429BI.ld | 52 +++++++-
.../TARGET_CRYPTECH_ALPHA/stm32f4xx_hal_msp.c | 1 +
projects/hsm/hsm.c | 138 ++++++++++++---------
3 files changed, 125 insertions(+), 66 deletions(-)
diff --git a/libraries/mbed/targets/cmsis/TARGET_STM/TARGET_STM32F4/TARGET_CRYPTECH_ALPHA/TOOLCHAIN_GCC_ARM/STM32F429BI.ld b/libraries/mbed/targets/cmsis/TARGET_STM/TARGET_STM32F4/TARGET_CRYPTECH_ALPHA/TOOLCHAIN_GCC_ARM/STM32F429BI.ld
index ad7ddaf..2f80bce 100644
--- a/libraries/mbed/targets/cmsis/TARGET_STM/TARGET_STM32F4/TARGET_CRYPTECH_ALPHA/TOOLCHAIN_GCC_ARM/STM32F429BI.ld
+++ b/libraries/mbed/targets/cmsis/TARGET_STM/TARGET_STM32F4/TARGET_CRYPTECH_ALPHA/TOOLCHAIN_GCC_ARM/STM32F429BI.ld
@@ -10,9 +10,11 @@ MEMORY
BOOTLOADER (rx) : ORIGIN = 0x08000000, LENGTH = 128K
FIRMWARE (rx) : ORIGIN = 0x08000000 + 128K, LENGTH = 2048K - 128K
FLASH (rx) : ORIGIN = 0x08000000 + 128K, LENGTH = 2048K - 128K
- RAM (rwx) : ORIGIN = 0x20000000, LENGTH = 192K - 4
+ CCMRAM (rwx) : ORIGIN = 0x10000000, LENGTH = 64K
+ RAM (rwx) : ORIGIN = 0x20000000, LENGTH = 192K - 4
+ SDRAM1 (rwx) : ORIGIN = 0xC0000000, LENGTH = 64M
+ SDRAM2 (rwx) : ORIGIN = 0xD0000000, LENGTH = 64M
}
-/* original: FLASH (rx) : ORIGIN = 0x08000000, LENGTH = 2048K */
/* Linker script to place sections and symbol values. Should be used together
* with other linker script that defines memory regions FLASH and RAM.
@@ -86,7 +88,7 @@ SECTIONS
__etext = .;
_sidata = .;
- .data : AT (__etext)
+ .data :
{
__data_start__ = .;
_sdata = .;
@@ -120,7 +122,49 @@ SECTIONS
__data_end__ = .;
_edata = .;
- } > RAM
+ } > RAM AT> FLASH
+
+ /* If initialized variables are placed in this section,
+ * the startup code needs to be modified to copy the init-values.
+ */
+ .ccmram :
+ {
+ . = ALIGN(4);
+ _sccmram = .;
+ *(.ccmram)
+ *(.ccmram*)
+
+ . = ALIGN(4);
+ _eccmram = .;
+ } >CCMRAM AT> FLASH
+
+ /* If initialized variables are placed in this section,
+ * the startup code needs to be modified to copy the init-values.
+ */
+ .sdram1 :
+ {
+ . = ALIGN(4);
+ _ssdram1 = .;
+ *(.sdram1)
+ *(.sdram1*)
+
+ . = ALIGN(4);
+ _esdram1 = .;
+ } >SDRAM1 AT> FLASH
+
+ /* If initialized variables are placed in this section,
+ * the startup code needs to be modified to copy the init-values.
+ */
+ .sdram2 :
+ {
+ . = ALIGN(4);
+ _ssdram2 = .;
+ *(.sdram2)
+ *(.sdram2*)
+
+ . = ALIGN(4);
+ _esdram2 = .;
+ } >SDRAM2 AT> FLASH
.bss :
{
diff --git a/libraries/mbed/targets/cmsis/TARGET_STM/TARGET_STM32F4/TARGET_CRYPTECH_ALPHA/stm32f4xx_hal_msp.c b/libraries/mbed/targets/cmsis/TARGET_STM/TARGET_STM32F4/TARGET_CRYPTECH_ALPHA/stm32f4xx_hal_msp.c
index fbd0adf..7eeb6df 100644
--- a/libraries/mbed/targets/cmsis/TARGET_STM/TARGET_STM32F4/TARGET_CRYPTECH_ALPHA/stm32f4xx_hal_msp.c
+++ b/libraries/mbed/targets/cmsis/TARGET_STM/TARGET_STM32F4/TARGET_CRYPTECH_ALPHA/stm32f4xx_hal_msp.c
@@ -190,6 +190,7 @@ void HAL_UART_MspInit(UART_HandleTypeDef* huart)
hdma->Init.PeriphBurst = DMA_PBURST_SINGLE;
*/
if (HAL_DMA_Init(hdma) != HAL_OK) {
+ extern void mbed_die(void);
mbed_die();
}
}
diff --git a/projects/hsm/hsm.c b/projects/hsm/hsm.c
index 79c567b..e3c1c36 100644
--- a/projects/hsm/hsm.c
+++ b/projects/hsm/hsm.c
@@ -53,6 +53,7 @@
#include "stm-led.h"
#include "stm-fmc.h"
#include "stm-uart.h"
+#include "stm-sdram.h"
/* stm32f4xx_hal_def.h and hal.h both define HAL_OK as an enum value */
#define HAL_OK HAL_OKAY
@@ -65,13 +66,24 @@
/* RPC buffers. For each active RPC, there will be two - input and output.
*/
-#ifndef NUM_RPC_BUFFER
+#ifndef NUM_RPC_TASK
/* An arbitrary number, but we don't expect to have more than 8 concurrent
* RPC requests.
*/
-#define NUM_RPC_BUFFER 16
+#define NUM_RPC_TASK 8
#endif
+#ifndef TASK_STACK_SIZE
+/* Define an absurdly large task stack, because some pkey operation use a
+ * lot of stack variables.
+ */
+#define TASK_STACK_SIZE 64*1024
+#endif
+
+/* Put the task stack buffers in SDRAM, because ARM RAM is too small.
+ */
+__attribute__((section(".sdram1"))) uint8_t stack[NUM_RPC_TASK][TASK_STACK_SIZE];
+
#ifndef MAX_PKT_SIZE
/* Another arbitrary number, more or less driven by the 4096-bit RSA
* keygen test.
@@ -87,65 +99,30 @@ typedef struct {
uint8_t buf[MAX_PKT_SIZE];
} rpc_buffer_t;
-osPoolDef(rpc_buffer_pool, NUM_RPC_BUFFER, rpc_buffer_t);
-osPoolId rpc_buffer_pool;
-
-static rpc_buffer_t *rpc_buffer_alloc(void)
-{
- return (rpc_buffer_t *)osPoolCAlloc(rpc_buffer_pool);
-}
-
/* A mutex to arbitrate concurrent UART transmits, from RPC responses.
*/
osMutexId uart_mutex;
osMutexDef(uart_mutex);
-/* Thread entry point for the RPC request handler.
+/* A mutex so only one dispatch thread can receive requests.
*/
-static void dispatch_thread(void const *args)
-{
- rpc_buffer_t *ibuf = (rpc_buffer_t *)args;
- rpc_buffer_t *obuf = rpc_buffer_alloc();
- if (obuf == NULL) {
- uint8_t buf[8];
- uint8_t * bufptr = &buf[4];
- const uint8_t * const limit = buf + sizeof(buf);
- memcpy(buf, ibuf->buf, 4);
- hal_xdr_encode_int(&bufptr, limit, HAL_ERROR_ALLOCATION_FAILURE);
- osMutexWait(uart_mutex, osWaitForever);
- hal_rpc_sendto(ibuf->buf, sizeof(buf), NULL);
- osMutexRelease(uart_mutex);
- osPoolFree(rpc_buffer_pool, ibuf);
- Error_Handler();
- }
- /* copy client ID from request to response */
- memcpy(obuf->buf, ibuf->buf, 4);
- obuf->len = sizeof(obuf->buf) - 4;
- hal_rpc_server_dispatch(ibuf->buf + 4, ibuf->len - 4, obuf->buf + 4, &obuf->len);
- osPoolFree(rpc_buffer_pool, ibuf);
- osMutexWait(uart_mutex, osWaitForever);
- hal_error_t ret = hal_rpc_sendto(obuf->buf, obuf->len + 4, NULL);
- osMutexRelease(uart_mutex);
- osPoolFree(rpc_buffer_pool, obuf);
- if (ret != HAL_OK)
- Error_Handler();
-}
-osThreadDef(dispatch_thread, osPriorityNormal, DEFAULT_STACK_SIZE);
+osMutexId dispatch_mutex;
+osMutexDef(dispatch_mutex);
-/* Semaphore to inform the main thread that there's a new RPC request.
+/* Semaphore to inform the dispatch thread that there's a new RPC request.
*/
-osSemaphoreId rpc_sem;
+osSemaphoreId rpc_sem;
osSemaphoreDef(rpc_sem);
-static uint8_t c; /* current character received from UART */
-static rpc_buffer_t *ibuf; /* current RPC input buffer */
+static uint8_t c; /* current character received from UART */
+static rpc_buffer_t * volatile rbuf; /* current RPC input buffer */
/* Callback for HAL_UART_Receive_IT().
*/
void HAL_UART2_RxCpltCallback(UART_HandleTypeDef *huart)
{
int complete;
- hal_slip_recv_char(ibuf->buf, &ibuf->len, sizeof(ibuf->buf), &complete);
+ hal_slip_recv_char(rbuf->buf, &rbuf->len, sizeof(rbuf->buf), &complete);
if (complete)
osSemaphoreRelease(rpc_sem);
@@ -164,8 +141,47 @@ hal_error_t hal_serial_recv_char(uint8_t *cp)
return HAL_OK;
}
-/* The main thread. After the system setup, it waits for the RPC-request
- * semaphore from HAL_UART_RxCpltCallback, and spawns a dispatch thread.
+/* Thread entry point for the RPC request handler.
+ */
+static void dispatch_thread(void const *args)
+{
+ rpc_buffer_t ibuf, obuf;
+
+ while (1) {
+ memset(&ibuf, 0, sizeof(ibuf));
+ memset(&obuf, 0, sizeof(obuf));
+
+ /* Wait for access to the uart */
+ osMutexWait(dispatch_mutex, osWaitForever);
+
+ /* Wait for the complete rpc request */
+ rbuf = &ibuf;
+ osSemaphoreWait(rpc_sem, osWaitForever);
+
+ /* Let the next thread handle the next request */
+ osMutexRelease(dispatch_mutex);
+ /* Let the next thread take the mutex */
+ osThreadYield();
+
+ /* Copy client ID from request to response */
+ memcpy(obuf.buf, ibuf.buf, 4);
+ obuf.len = sizeof(obuf.buf) - 4;
+
+ /* Process the request */
+ hal_rpc_server_dispatch(ibuf.buf + 4, ibuf.len - 4, obuf.buf + 4, &obuf.len);
+
+ /* Send the response */
+ osMutexWait(uart_mutex, osWaitForever);
+ hal_error_t ret = hal_rpc_sendto(obuf.buf, obuf.len + 4, NULL);
+ osMutexRelease(uart_mutex);
+ if (ret != HAL_OK)
+ Error_Handler();
+ }
+}
+osThreadDef_t thread_def[NUM_RPC_TASK];
+
+/* The main thread. This does all the setup, and the worker threads handle
+ * the rest.
*/
int main()
{
@@ -183,6 +199,7 @@ int main()
led_on(LED_GREEN);
/* Prepare FMC interface. */
fmc_init();
+ sdram_init();
/* Haaaack. probe_cores() calls malloc(), which works from the main
* thread, but not from a spawned thread. It would be better to
@@ -191,8 +208,8 @@ int main()
*/
hal_core_iterate(NULL);
- rpc_buffer_pool = osPoolCreate(osPool(rpc_buffer_pool));
uart_mutex = osMutexCreate(osMutex(uart_mutex));
+ dispatch_mutex = osMutexCreate(osMutex(dispatch_mutex));
rpc_sem = osSemaphoreCreate(osSemaphore(rpc_sem), 0);
#ifdef TARGET_CRYPTECH_ALPHA
@@ -205,22 +222,19 @@ int main()
if (hal_rpc_server_init() != HAL_OK)
Error_Handler();
- ibuf = rpc_buffer_alloc();
- if (ibuf == NULL)
- /* Something is badly wrong. */
- Error_Handler();
+ /* Create the rpc dispatch threads */
+ for (int i = 0; i < NUM_RPC_TASK; ++i) {
+ osThreadDef_t *ot = &thread_def[i];
+ ot->pthread = dispatch_thread;
+ ot->tpriority = osPriorityNormal;
+ ot->stacksize = TASK_STACK_SIZE;
+ ot->stack_pointer = (uint32_t *)stack[i];
+ if (osThreadCreate(ot, (void *)i) == NULL)
+ Error_Handler();
+ }
/* Start the non-blocking receive */
HAL_UART_Receive_IT(&huart_user, &c, 1);
- while (1) {
- osSemaphoreWait(rpc_sem, osWaitForever);
- if (osThreadCreate(osThread(dispatch_thread), (void *)ibuf) == NULL)
- Error_Handler();
- while ((ibuf = rpc_buffer_alloc()) == NULL);
- /* XXX There's a potential race condition, where another request
- * could write into the old ibuf, or into the null pointer if
- * we're out of ibufs.
- */
- }
+ while (1) { ; }
}
More information about the Commits
mailing list