From 2f6590416de4acebfd064649852925d382c87ba8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mario=20H=C3=BCttel?= Date: Mon, 7 Sep 2020 23:52:12 +0200 Subject: [PATCH] Improve documentation of Stack Checking --- doc/source/firmware/index.rst | 2 +- .../firmware/{ => safety}/backup-ram.rst | 0 .../firmware/{ => safety}/error-handling.rst | 0 doc/source/firmware/{ => safety}/flags.rst | 23 ++++++++++- .../firmware/{safety.rst => safety/index.rst} | 1 + doc/source/firmware/safety/stack-checking.rst | 39 +++++++++++++++++++ .../reflow-controller/safety/safety-config.h | 3 ++ .../reflow-controller/safety/stack-check.h | 32 +++++++++++++++ 8 files changed, 98 insertions(+), 2 deletions(-) rename doc/source/firmware/{ => safety}/backup-ram.rst (100%) rename doc/source/firmware/{ => safety}/error-handling.rst (100%) rename doc/source/firmware/{ => safety}/flags.rst (80%) rename doc/source/firmware/{safety.rst => safety/index.rst} (98%) create mode 100644 doc/source/firmware/safety/stack-checking.rst diff --git a/doc/source/firmware/index.rst b/doc/source/firmware/index.rst index 7550976..028147c 100644 --- a/doc/source/firmware/index.rst +++ b/doc/source/firmware/index.rst @@ -11,6 +11,6 @@ mechanisms and the behavior. For a detailed code documentation see the doxygen o :maxdepth: 2 pt1000-processing - safety + safety/index code/index diff --git a/doc/source/firmware/backup-ram.rst b/doc/source/firmware/safety/backup-ram.rst similarity index 100% rename from doc/source/firmware/backup-ram.rst rename to doc/source/firmware/safety/backup-ram.rst diff --git a/doc/source/firmware/error-handling.rst b/doc/source/firmware/safety/error-handling.rst similarity index 100% rename from doc/source/firmware/error-handling.rst rename to doc/source/firmware/safety/error-handling.rst diff --git a/doc/source/firmware/flags.rst b/doc/source/firmware/safety/flags.rst similarity index 80% rename from doc/source/firmware/flags.rst rename to doc/source/firmware/safety/flags.rst index 7012abc..10941c5 100644 --- a/doc/source/firmware/flags.rst +++ b/doc/source/firmware/safety/flags.rst @@ -8,6 +8,10 @@ The safety flags are represented in software by the following enums .. doxygenenum:: safety_flag The safety flags can be temporarily or permanent. Some temporary flags are reset automatically, once the error condition disappears. Others have to be explicitly cleared. +The safety weights (if a flag stops the PID controller, or triggers the panic mode) are configured by default as described below. However, it will be possible to override these weights by +setting config entries in the safety memory. + +.. todo:: Change docu of config entires in memory ---------------------------------------------------------------------------------------------------------------------------------- @@ -90,4 +94,21 @@ safety backup memory is cleared persistent self-clearing Stops PID Panic Mode ========== ============= ============= =========== yes no yes no -========== ============= ============= =========== \ No newline at end of file +========== ============= ============= =========== + + +.. _safety_flags_stack: + +ERR_FLAG_STACK +--------------------------- + +``ERR_FLAG_STACK`` ialization of the controller, in case a corrupted safety memory is encountered. +This error is not recoverable and will trigger the panic mode. + +.. seealso:: :ref:`safety_stack_checking` + +========== ============= ============= =========== +persistent self-clearing Stops PID Panic Mode +========== ============= ============= =========== +yes no yes yes +========== ============= ============= =========== diff --git a/doc/source/firmware/safety.rst b/doc/source/firmware/safety/index.rst similarity index 98% rename from doc/source/firmware/safety.rst rename to doc/source/firmware/safety/index.rst index e6684af..f47c5f8 100644 --- a/doc/source/firmware/safety.rst +++ b/doc/source/firmware/safety/index.rst @@ -21,3 +21,4 @@ which are used to retain boot information across resets, for example to communic flags backup-ram error-handling + stack-checking diff --git a/doc/source/firmware/safety/stack-checking.rst b/doc/source/firmware/safety/stack-checking.rst new file mode 100644 index 0000000..f975d31 --- /dev/null +++ b/doc/source/firmware/safety/stack-checking.rst @@ -0,0 +1,39 @@ +.. _safety_stack_checking: + +Safety Stack Checking +===================== + +To ensure correct operation of the controller, the stack is continuously monitored. For this, the :ref:`firmware_safety` checks the stack in each run. +These checks include: + +1. Checking of used stack space and limit to end of stack +2. Checking a protection area between heap and stack for memory corruption + +Any detected error will set the :ref:`safety_flags_stack` error flag. + +Stack Pointer Checking +---------------------- + +The stack pointer is checked using :c:func:`stack_check_get_free`. The returned value for the remaining stack space is checked against + +.. doxygendefine:: SAFETY_MIN_STACK_FREE + +.. doxygenfunction:: stack_check_get_free + + +Stack and Heap Corruption Checking +---------------------------------- + +A section of memory is located between the stack and the heap. It is defined inside the linker script. It's size is configured by the linker script parameter ``__stack_corruption_area_size``, which is set to ``128`` by default. +This section is filled at the initializazion of the safety controller by a call to + +.. doxygenfunction:: stack_check_init_corruption_detect_area + +On each run of the safety controller's handling function (:c:func:`safety_controller_handle`) the following function is called: + +.. doxygenfunction:: stack_check_corruption_detect_area + + +This function constantly checks the memory area for write modifications, and therefore detects, if the stack or heap have grown outside their boundaries. + + diff --git a/stm-firmware/include/reflow-controller/safety/safety-config.h b/stm-firmware/include/reflow-controller/safety/safety-config.h index d20ef37..846afcd 100644 --- a/stm-firmware/include/reflow-controller/safety/safety-config.h +++ b/stm-firmware/include/reflow-controller/safety/safety-config.h @@ -72,6 +72,9 @@ enum analog_value_monitor { #define WATCHDOG_PRESCALER 8 +/** + * @brief Minimum number of bytes that have to be free on the stack. If this is not the case, an error is detected + */ #define SAFETY_MIN_STACK_FREE 0x100 #define PID_CONTROLLER_ERR_CAREMASK (ERR_FLAG_STACK | ERR_FLAG_AMON_UC_TEMP | ERR_FLAG_AMON_VREF | \ diff --git a/stm-firmware/include/reflow-controller/safety/stack-check.h b/stm-firmware/include/reflow-controller/safety/stack-check.h index c51a3a7..df88d90 100644 --- a/stm-firmware/include/reflow-controller/safety/stack-check.h +++ b/stm-firmware/include/reflow-controller/safety/stack-check.h @@ -25,8 +25,16 @@ #define STACK_CHECK_MIN_HEAP_GAP 16UL +/** + * @brief Get usage of the stack + * @return Usage of the stack in bytes + */ int32_t stack_check_get_usage(); +/** + * @brief Get free stack space + * @return free stack space in bytes. If negative, a stack overflow occured + */ int32_t stack_check_get_free(); static inline int stack_check_collision() @@ -50,8 +58,32 @@ static inline uint32_t read_stack_pointer() return stack_pointer; } +/** + * @brief Init the stack corruption detection area. + * + * This function initializes the memory area between heap and stack with random values generated by the + * STM's random number generator. A 32 bit CRC generated by the CRC unit of the STM is appended for verification of the + * area. + * + * + * @return 0 if successful, else an error has occured in generating a random number. This should never happen + * @note This function turns on the CRC unit but does not disable it afterwards. Therefore, the CRC unit does not have + * to be explicitly initialized before calling @ref stack_check_corruption_detect_area. + */ int stack_check_init_corruption_detect_area(void); +/** + * @brief Check the CRC of the stack corruption detection area + * + * This function checks the stack corruption detection area, which must be initialized by + * @ref stack_check_init_corruption_detect_area beforehand. + * + * The CRC unit must be enabled for this function to work properly. + * After calling @stack_check_init_corruption_detect_area, this is the case. + * + * @return 0 if no error is detected, all other values are an error. + * @note Make sure CRC unit is enabled. + */ int stack_check_corruption_detect_area(void); #endif /* __STACK_CHECK_H__ */