Main firmware entry point for the STM32N6570-DK pose estimation application running on the Neural-ART NPU. More...

#include "cmw_camera.h"
#include "stm32n6570_discovery_bus.h"
#include "stm32n6570_discovery_lcd.h"
#include "stm32n6570_discovery_xspi.h"
#include "stm32n6570_discovery.h"
#include "stm32_lcd.h"
#include "app_fuseprogramming.h"
#include "stm32_lcd_ex.h"
#include "app_postprocess.h"
#include "ll_aton_runtime.h"
#include "app_camerapipeline.h"
#include "main.h"
#include <stdio.h>
#include "app_config.h"
#include "crop_img.h"
#include "stlogo.h"
#include "utils.h"
#include "display_mpe.h"

Include dependency graph for main.c:

Classes
struct	Rectangle_TypeDef
	Rectangle descriptor for LCD layer positioning. More...

Macros
#define	MAX_NUMBER_OUTPUT 5
	Conditional postprocessing include based on POSTPROCESS_TYPE in app_config.h. More...

#define	LCD_FG_WIDTH SCREEN_WIDTH
	LCD foreground layer width = full screen width (800 pixels) More...

#define	LCD_FG_HEIGHT SCREEN_HEIGHT
	LCD foreground layer height = full screen height (480 pixels) More...

#define	LCD_FG_FRAMEBUFFER_SIZE (LCD_FG_WIDTH * LCD_FG_HEIGHT * 2)
	Foreground framebuffer size in bytes (RGB565 = 2 bytes/pixel) More...

#define	ALIGN_TO_16(value) (((value) + 15) & ~15)
	Align a value up to the next multiple of 16. More...

Functions
	__attribute__ ((section(".psram_bss")))
	LCD background framebuffer — camera preview. More...

static void	Hardware_init (void)
	Initialize all hardware peripherals required for the application. More...

static void	NeuralNetwork_init (uint32_t nnin_length, float32_t nn_out[], int *number_output, int32_t nn_out_len[])
	Initialize the Neural Network input/output buffer pointers. More...

static void	NPURam_enable (void)
	Enable the NPU clock and all four NPU SRAM banks. More...

static void	set_clk_sleep_mode (void)
	Configure peripheral clocks to remain active during CPU sleep. More...

static void	NPUCache_config (void)
	Initialize and enable the NPU AXI cache. More...

static void	Security_Config (void)
	Configure TrustZone security attributes for hardware peripherals. More...

static void	IAC_Config (void)
	Configure the Illegal Access Controller (IAC). More...

void	IAC_IRQHandler (void)
	IAC interrupt handler — traps illegal memory access violations. More...

static int	clamp_point (int x, int y)
	Clamp a point to the LCD background area boundaries. More...

static void	convert_length (float32_t wi, float32_t hi, int wo, int ho)
	Convert normalized [0,1] dimensions to LCD pixel dimensions. More...

static void	convert_point (float32_t xi, float32_t yi, int xo, int yo)
	Convert normalized [0,1] coordinates to LCD pixel coordinates. More...

static void	Display_binding_line (int x0, int y0, int x1, int y1, uint32_t color)
	Draw a skeleton connection line between two keypoints on the LCD. More...

static void	Display_NetworkOutput (void *p_postprocess, uint32_t inference_ms)
	Render inference results on the LCD foreground overlay layer. More...

static void	LCD_init (void)
	Initialize the LCD display with dual-layer configuration. More...

static void	Display_WelcomeScreen (void)
	Display the welcome screen for the first 4 seconds after boot. More...

HAL_StatusTypeDef	MX_DCMIPP_ClockConfig (DCMIPP_HandleTypeDef *hdcmipp)
	Configure DCMIPP pixel clock (IC17) and CSI clock (IC18). More...

static void	SystemClock_Config (void)
	Configure the system clock tree using four PLLs. More...

Variables
Rectangle_TypeDef	lcd_bg_area
	LCD background area — camera preview region. More...

Rectangle_TypeDef	lcd_fg_area
	LCD foreground area — full-screen overlay for inference results. More...

mpe_yolov8_pp_static_param_t	pp_params
	Postprocessing parameters — statically allocated, model-specific. More...

mpe_pp_out_t	pp_output

volatile int32_t	cameraFrameReceived
	Camera frame ready flag. More...

uint8_t *	nn_in
	Pointer to the NPU input buffer in AXISRAM (set by NeuralNetwork_init) More...

BSP_LCD_LayerConfig_t	LayerConfig = {0}
	LTDC layer configuration structure. More...

uint8_t *	dcmipp_out_nn
	DCMIPP intermediate buffer for non-16-aligned NN inputs. More...

Detailed Description

Main firmware entry point for the STM32N6570-DK pose estimation application running on the Neural-ART NPU.

Author: GPM Application Team — documented by Politecnico di Milano

This file implements the complete embedded AI inference pipeline that runs on the STM32N6570-DK board. It was generated by the ModelZoo Services deployment script and then flashed via STM32CubeIDE.

System Architecture:

┌─────────────┐    MIPI CSI-2    ┌──────────────────────────────────────────┐
│ B-CAMS-IMX  │ ──────────────►  │          STM32N6570-DK                   │
│  Camera     │                  │                                          │
└─────────────┘                  │  ┌─────────┐   DMA    ┌──────────────┐  │
                                 │  │  DCMIPP  │ ──────►  │  PSRAM       │  │
                                 │  │  (ISP)   │          │  lcd_bg_buf  │  │
                                 │  └─────────┘          └──────┬───────┘  │
                                 │       │                       │          │
                                 │       │ DMA (NN pipe)         │ LTDC     │
                                 │       ▼                       ▼          │
                                 │  ┌─────────┐          ┌──────────────┐  │
                                 │  │  AXISRAM │          │    LCD       │  │
                                 │  │  nn_in   │          │  Display     │  │
                                 │  └────┬────┘          └──────────────┘  │
                                 │       │                                  │
                                 │       ▼                                  │
                                 │  ┌─────────────────────────────────┐    │
                                 │  │    Neural-ART NPU               │    │
                                 │  │    LL_ATON_RT_Main()            │    │
                                 │  │    (MoveNet / YOLOv8 / BERT)   │    │
                                 │  └──────────────┬──────────────────┘    │
                                 │                 │                        │
                                 │                 ▼                        │
                                 │  ┌──────────────────────────────────┐   │
                                 │  │    Postprocessing (CPU)          │   │
                                 │  │    app_postprocess_run()         │   │
                                 │  │    → keypoint coordinates        │   │
                                 │  └──────────────┬───────────────────┘   │
                                 │                 │                        │
                                 │                 ▼                        │
                                 │  ┌──────────────────────────────────┐   │
                                 │  │    Display (CPU + DMA2D)         │   │
                                 │  │    Display_NetworkOutput()       │   │
                                 │  │    → skeleton overlay on LCD     │   │
                                 │  └──────────────────────────────────┘   │
                                 └──────────────────────────────────────────┘

Dual-pipe camera architecture: The DCMIPP (Digital Camera Memory Interface and Processing Pipeline) runs TWO simultaneous pipes:

Display pipe: delivers full-resolution RGB565 frames directly to PSRAM for LCD background display via LTDC hardware overlay
NN pipe: delivers cropped/resized frames to AXISRAM for NPU inference

Inference loop timing: For MoveNet Lightning at 192×192:

Camera capture: ~5 ms
NPU inference (LL_ATON_RT_Main): ~18 ms
Postprocessing (CPU): ~1 ms
Display update (DMA2D): ~2 ms
Total: ~26 ms (~38 FPS)

Note: This file is part of the STM32AI Model Zoo Services repository. Doxygen documentation added for educational purposes as part of the Multidisciplinary Project — Neural Network Deployment on STM32N6 NPU. Politecnico di Milano, A.Y. 2024-2025. Authors: Giacomo Colosio, Sebastiano Colosio, Patrizio Acquadro, Tito Nicola Drugman

Copyright: Copyright (c) 2023 STMicroelectronics. All rights reserved.

Definition in file main.c.

Macro Definition Documentation

◆ ALIGN_TO_16

#define ALIGN_TO_16 ( value ) (((value) + 15) & ~15)

Align a value up to the next multiple of 16.

Used to compute DCMIPP output buffer sizes, since the DCMIPP hardware requires line lengths to be multiples of 16 bytes.

Definition at line 192 of file main.c.

◆ LCD_FG_FRAMEBUFFER_SIZE

#define LCD_FG_FRAMEBUFFER_SIZE (LCD_FG_WIDTH * LCD_FG_HEIGHT * 2)

Foreground framebuffer size in bytes (RGB565 = 2 bytes/pixel)

Definition at line 117 of file main.c.

◆ LCD_FG_HEIGHT

#define LCD_FG_HEIGHT SCREEN_HEIGHT

LCD foreground layer height = full screen height (480 pixels)

Definition at line 114 of file main.c.

◆ LCD_FG_WIDTH

#define LCD_FG_WIDTH SCREEN_WIDTH

LCD foreground layer width = full screen width (800 pixels)

Definition at line 111 of file main.c.

◆ MAX_NUMBER_OUTPUT

#define MAX_NUMBER_OUTPUT 5

Conditional postprocessing include based on POSTPROCESS_TYPE in app_config.h.

< LL ATON runtime — Neural-ART NPU inference engine < Model configuration (NN dimensions, thresholds) < Multi-person display for YOLOv8n

Maximum number of output tensors supported (YOLOv8 has multiple outputs)

Definition at line 108 of file main.c.

Function Documentation

◆ attribute()

__attribute__ ( (section(".psram_bss")) )

LCD background framebuffer — camera preview.

Placed in external PSRAM (.psram_bss section) since it is too large (800*480*2 = 768 KB) for on-chip SRAM. Aligned to 32 bytes for efficient DMA transfers via the DCMIPP display pipe.

Main program entry point — implements the real-time inference loop.

The main function orchestrates the complete pose estimation pipeline:

Initialization sequence:

Hardware_init() — clocks, cache, NPU RAM, external Flash/PSRAM
NeuralNetwork_init() — resolve NPU input/output buffer addresses
app_postprocess_init() — configure postprocessor thresholds and params
CameraPipeline_Init() — initialize MIPI CSI-2, DCMIPP, dual-pipe config
LCD_init() — configure LTDC dual-layer display
CameraPipeline_DisplayPipe_Start() — start continuous camera preview

Main inference loop (executed ~38 FPS for MoveNet):

while(1) {
  CameraPipeline_IspUpdate();              // Update ISP auto-exposure/white-balance
  CameraPipeline_NNPipe_Start(snapshot);   // Trigger single-frame NN pipe capture
  while(!cameraFrameReceived);             // Wait for DCMIPP interrupt
 
  [optional] img_crop();                   // Crop if NN_WIDTH not 16-byte aligned
 
  ts[0] = HAL_GetTick();
  LL_ATON_RT_Main(&NN_Instance_Default);   // RUN NPU INFERENCE
  ts[1] = HAL_GetTick();                   // Measure inference time
 
  app_postprocess_run(nn_out, &pp_output); // Decode heatmaps → keypoints
  Display_NetworkOutput(&pp_output, dt);   // Draw skeleton on LCD
  SCB_InvalidateDCache(nn_out);            // Invalidate cache for next frame
}

Key design decisions:

The NPU pipe uses SNAPSHOT mode (one frame per trigger) rather than CONTINUOUS mode to synchronize inference with frame capture precisely.
LL_ATON_RT_Main() is a blocking call — it dispatches the model's epoch sequence to the Neural-ART NPU and waits for completion.
Cache is explicitly invalidated after inference to prevent the postprocessor from reading stale NPU output tensor data from the D-Cache.

Parameters

None

Return values

None	— this function never returns (embedded infinite loop)

Declare the LL_ATON neural network instance.

LL_ATON_DECLARE_NAMED_NN_INSTANCE_AND_INTERFACE(Default) expands to declare the NN_Instance_Default object and its interface — a handle used by LL_ATON_RT_Main() to dispatch the epoch sequence to the NPU. The "Default" name corresponds to the single network in this application.

Initializes the postprocessor with static parameters from app_config.h:

Confidence threshold (AI_POSE_PP_CONF_THRESHOLD = 0.4)
Number of keypoints (AI_SPE_MOVENET_POSTPROC_NB_KEYPOINTS = 13)
Heatmap dimensions (48×48)

Initializes the MIPI CSI-2 camera interface and DCMIPP dual-pipe:

Display pipe: full resolution → lcd_bg_buffer (continuous DMA)
NN pipe: cropped to NN_WIDTH×NN_HEIGHT → nn_in (snapshot trigger) Fills lcd_bg_area.XSize and YSize with the actual preview dimensions. pitch_nn returns the DCMIPP output line pitch (may differ from NN_WIDTH*NN_BPP).

Start the continuous display pipe — camera preview runs independently

Update ISP parameters (auto-exposure, auto-white-balance)

DCMIPP cannot write directly to nn_in because the output line pitch (padded to 16-byte alignment) differs from NN_WIDTH * NN_BPP. Use intermediate buffer, then crop to exact dimensions.

Direct capture to NPU input buffer (most common case)

Busy-wait until DCMIPP interrupt signals frame ready

Invalidate D-Cache for the intermediate buffer before CPU reads it. Required because DCMIPP DMA writes bypass the cache — without this, the CPU would read stale cached data from the previous frame.

Crop the DCMIPP padded output to the exact NN input dimensions. Required when NN_WIDTH is not a multiple of 16.

Clean + invalidate to ensure NPU DMA sees the updated nn_in data

Measure inference start time

Execute Neural-ART NPU inference.

LL_ATON_RT_Main() dispatches the complete model inference:

Iterates through all epochs in the compiled network
EC (hardware) epochs: dispatched to the Neural-ART NPU units (CONVACC, POOL, ACTIV) — executes in parallel hardware
SW (software) epochs: executed on the Cortex-M55 CPU (Resize, DequantizeLinear for MoveNet; Softmax for YOLOv8/TinyBERT)
Blocks until all epochs complete

For MoveNet Lightning: 71 EC epochs + 4 SW epochs = 75 total Measured latency: ~18 ms on STM32N6570-DK

Measure inference end time

Run postprocessing on the NPU output tensors. For MoveNet: decodes (48,48,13) heatmaps → 13 (x,y,conf) keypoints For YOLOv8n: runs NMS on bounding boxes + decodes keypoints

Render inference results on LCD — skeleton overlay + latency text

Invalidate D-Cache for all output tensor regions. The NPU writes output tensors via DMA, bypassing the cache. Invalidation ensures the next inference reads fresh NPU outputs, not stale CPU-cached values from the previous frame.

Definition at line 221 of file main.c.

References cameraFrameReceived, CameraPipeline_DisplayPipe_Start(), CameraPipeline_Init(), CameraPipeline_IspUpdate(), CameraPipeline_NNPipe_Start(), dcmipp_out_nn, Display_NetworkOutput(), Hardware_init(), img_crop(), lcd_bg_area, LCD_init(), MAX_NUMBER_OUTPUT, NeuralNetwork_init(), NN_BPP, NN_HEIGHT, nn_in, NN_WIDTH, pp_output, pp_params, Rectangle_TypeDef::XSize, and Rectangle_TypeDef::YSize.

Here is the call graph for this function:

◆ clamp_point()

static int clamp_point	(	int *	x,
		int *	y
	)

static

Clamp a point to the LCD background area boundaries.

Ensures that lines drawn between keypoints do not extend outside the camera preview area. Called before every UTIL_LCD_DrawLine() to prevent drawing outside the valid display region.

Parameters

[in,out]	x	X coordinate — clamped to [lcd_bg_area.X0, X0+XSize-1]
[in,out]	y	Y coordinate — clamped to [lcd_bg_area.Y0, Y0+YSize-1]

Returns: 1 if the point was clamped, 0 if it was already within bounds

Definition at line 751 of file main.c.

References lcd_bg_area, Rectangle_TypeDef::X0, Rectangle_TypeDef::XSize, Rectangle_TypeDef::Y0, and Rectangle_TypeDef::YSize.

Referenced by Display_binding_line(), and LCD_init().

Here is the caller graph for this function:

◆ convert_length()

static void convert_length	(	float32_t	wi,
		float32_t	hi,
		int *	wo,
		int *	ho
	)

static

Convert normalized [0,1] dimensions to LCD pixel dimensions.

The postprocessor outputs keypoint coordinates in normalized form [0.0, 1.0] relative to the input image size. This function scales them to actual pixel lengths in the LCD background area.

Parameters

[in]	wi	Normalized width [0.0, 1.0]
[in]	hi	Normalized height [0.0, 1.0]
[out]	wo	Pixel width in the LCD background area
[out]	ho	Pixel height in the LCD background area

Definition at line 774 of file main.c.

References lcd_bg_area, Rectangle_TypeDef::XSize, and Rectangle_TypeDef::YSize.

Referenced by LCD_init().

Here is the caller graph for this function:

◆ convert_point()

static void convert_point	(	float32_t	xi,
		float32_t	yi,
		int *	xo,
		int *	yo
	)

static

Convert normalized [0,1] coordinates to LCD pixel coordinates.

Maps normalized keypoint positions (output of the postprocessor) to absolute pixel positions on the LCD, accounting for the background area offset (X0, Y0) introduced by the crop/fit mode.

Parameters

[in]	xi	Normalized X coordinate [0.0, 1.0]
[in]	yi	Normalized Y coordinate [0.0, 1.0]
[out]	xo	Absolute X pixel position on LCD
[out]	yo	Absolute Y pixel position on LCD

Definition at line 793 of file main.c.

References lcd_bg_area, Rectangle_TypeDef::X0, Rectangle_TypeDef::XSize, Rectangle_TypeDef::Y0, and Rectangle_TypeDef::YSize.

Referenced by LCD_init().

Here is the caller graph for this function:

◆ Display_binding_line()

static void Display_binding_line	(	int	x0,
		int	y0,
		int	x1,
		int	y1,
		uint32_t	color
	)

static

Draw a skeleton connection line between two keypoints on the LCD.

Clamps both endpoints to the LCD background area before drawing to prevent artifacts at image boundaries.

Parameters

x0	Start point X (absolute LCD pixels)
y0	Start point Y (absolute LCD pixels)
x1	End point X (absolute LCD pixels)
y1	End point Y (absolute LCD pixels)
color	ARGB4444 color value for the line

Definition at line 812 of file main.c.

References clamp_point().

Referenced by LCD_init().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ Display_NetworkOutput()

static void Display_NetworkOutput	(	void *	p_postprocess,
		uint32_t	inference_ms
	)

static

Render inference results on the LCD foreground overlay layer.

This function updates the LCD display with the latest inference results. It uses double-buffering to eliminate screen tearing:

Updates LTDC to read from the current read buffer (previously written)
Clears the write buffer (now the previous read buffer)
Draws keypoints/skeleton or bounding boxes on the write buffer
Displays the inference time at the bottom of the screen
Cleans the D-Cache region for the write buffer (ensures DMA coherency)
Triggers LTDC layer reload at the next vertical blanking interval
Swaps read/write buffer indices

The vertical blanking reload (LTDC_RELOAD_VERTICAL_BLANKING) ensures the layer address is updated only during the display's blanking period, preventing partial-frame tearing artifacts.

Parameters

p_postprocess	Pointer to postprocessing output structure: spe_pp_out_t for MoveNet (single person) mpe_pp_out_t for YOLOv8 (multi person)
inference_ms	Inference time in milliseconds (displayed on screen)

Switch LTDC to display the previously written buffer

Clear the overlay (transparent black)

Draw bounding boxes and keypoints for all detected persons

Display inference time at bottom of screen

Clean D-Cache to ensure LTDC DMA reads the updated framebuffer

Schedule LTDC layer reload at next vertical blanking — prevents tearing

Swap double-buffer indices

Definition at line 843 of file main.c.

References Display_mpe_Detection(), Display_spe_Detection(), Display_WelcomeScreen(), lcd_fg_area, LCD_FG_FRAMEBUFFER_SIZE, Rectangle_TypeDef::X0, Rectangle_TypeDef::XSize, Rectangle_TypeDef::Y0, and Rectangle_TypeDef::YSize.

Referenced by __attribute__().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ Display_WelcomeScreen()

static void Display_WelcomeScreen ( void )

static

Display the welcome screen for the first 4 seconds after boot.

Shows the ST logo and application information during boot. After 4000 ms have elapsed since first call, this function does nothing. Called inside Display_NetworkOutput() on every frame.

Parameters

None

Return values

None

Definition at line 963 of file main.c.

References WELCOME_MSG_1, and WELCOME_MSG_2.

Referenced by Display_NetworkOutput().

Here is the caller graph for this function:

◆ Hardware_init()

static void Hardware_init ( void )

static

Initialize all hardware peripherals required for the application.

Performs the complete hardware initialization sequence:

Instruction Cache — enables the 32KB I-Cache for faster code fetch
HAL initialization — sets up SysTick timer, default interrupt priorities
Data Cache — enables 32KB D-Cache (if USE_DCACHE defined in app_config.h)
System clocks — configures 4 PLLs for CPU (800 MHz), AXI (400 MHz), NPU (1000 MHz), and AXISRAM3-6 (900 MHz)
NPU RAM — enables all 4×448KB NPU SRAM banks (npuRAM3-6)
Fuse programming — configures OTP security fuses if needed
NPU cache — enables the NPU's dedicated AXI cache
External memory — initializes and memory-maps OctoFlash (128MB) and HexaRAM PSRAM (32MB) via the XSPI interfaces
Security — configures RISC/RIMC to grant NPU, DCMIPP, LTDC, DMA2D secure privileged access (required for TrustZone compliance)
IAC — configures Illegal Access Controller for security monitoring
Sleep clocks — configures which peripheral clocks remain active during WFE (Wait For Event) sleep, allowing NPU inference during CPU sleep

Note: The NPU clock is set to 1000 MHz (from PLL2), which is the maximum frequency and enables the peak 600 GOPS INT8 throughput.

Parameters

None

Return values

None

Definition at line 463 of file main.c.

References IAC_Config(), NPUCache_config(), NPURam_enable(), Security_Config(), set_clk_sleep_mode(), and SystemClock_Config().

Referenced by __attribute__().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ IAC_Config()

static void IAC_Config ( void )

static

Configure the Illegal Access Controller (IAC).

The IAC monitors all AXI bus transactions and fires an interrupt (IAC_IRQHandler) if any peripheral attempts to access a memory region it is not authorized to access. This is a security and debugging feature.

Parameters

None

Return values

None

Definition at line 715 of file main.c.

Referenced by Hardware_init().

Here is the caller graph for this function:

◆ IAC_IRQHandler()

void IAC_IRQHandler ( void )

IAC interrupt handler — traps illegal memory access violations.

Called when the IAC detects an unauthorized memory access. In production this would log the violation; here it halts the system for debugging purposes.

Definition at line 730 of file main.c.

◆ LCD_init()

static void LCD_init ( void )

static

Initialize the LCD display with dual-layer configuration.

Configures the LTDC controller for the two-layer display architecture:

Layer 1 (Background) — Camera preview:

Format: RGB565 (2 bytes/pixel)
Source: lcd_bg_buffer in PSRAM (written continuously by DCMIPP display pipe)
Position: centered square (480×480) or full screen depending on aspect ratio

Layer 2 (Foreground) — Inference overlay:

Format: ARGB4444 (2 bytes/pixel, 4-bit alpha for transparency)
Source: lcd_fg_buffer (double-buffered, written by CPU)
Position: full screen (800×480)
Alpha blending: transparent regions show Layer 1 underneath

Also initializes the display function pointers used by display_spe.c and display_mpe.c for coordinate conversion and line drawing.

Parameters

None

Return values

None

Configure Layer 1: camera preview background

Configure Layer 2: transparent inference overlay

Register coordinate conversion callbacks for the display module

Definition at line 914 of file main.c.

References clamp_point(), convert_length(), convert_point(), Display_binding_line(), Display_mpe_InitFunctions(), Display_spe_InitFunctions(), LayerConfig, lcd_bg_area, lcd_fg_area, Rectangle_TypeDef::X0, Rectangle_TypeDef::XSize, Rectangle_TypeDef::Y0, and Rectangle_TypeDef::YSize.

Referenced by __attribute__().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ MX_DCMIPP_ClockConfig()

HAL_StatusTypeDef MX_DCMIPP_ClockConfig ( DCMIPP_HandleTypeDef * hdcmipp )

Configure DCMIPP pixel clock (IC17) and CSI clock (IC18).

The DCMIPP clock is derived from PLL2 via IC17 (divider=3 → ~333MHz). The CSI clock (MIPI CSI-2 interface) is derived from PLL1 via IC18 (divider=40 → 20MHz). These frequencies are chosen to support the B-CAMS-IMX sensor's required clock rates.

Parameters

hdcmipp DCMIPP handle (unused, required by HAL callback signature)

Return values

HAL_OK on success, HAL error code on failure

Definition at line 990 of file main.c.

◆ NeuralNetwork_init()

static void NeuralNetwork_init	(	uint32_t *	nnin_length,
		float32_t *	nn_out[],
		int *	number_output,
		int32_t	nn_out_len[]
	)

static

Initialize the Neural Network input/output buffer pointers.

This function queries the LL_ATON runtime for the physical memory addresses of the NPU input and output buffers. These addresses point into the NPU SRAM banks (npuRAM3-6) where the AI runtime has allocated the model's activation buffers.

Why this is needed: The LL_ATON runtime allocates activation buffers at specific addresses within the NPU SRAM banks according to the memory map generated by ST Edge AI Core. These addresses are not known at compile time — they depend on the specific model and its memory layout. The LL_ATON API provides accessor functions to retrieve them at runtime.

Buffer layout for MoveNet Lightning:

nn_in → npuRAM4 (0x34270000) : uint8[192*192*3] = 110,592 bytes

nn_out[0] → npuRAM5 (0x342E0000) : float32[48*48*13] = 119,808 bytes

Parameters

[out]	nnin_length	Size of the input buffer in bytes
[out]	nn_out	Array of pointers to each output tensor buffer
[out]	number_output	Number of output tensors in the model
[out]	nn_out_len	Size of each output buffer in bytes

Return values

None

Get buffer descriptors from the LL_ATON runtime

Resolve input buffer address — points into NPU SRAM

Count output tensors (terminated by NULL name sentinel)

Resolve output buffer addresses and sizes

Definition at line 531 of file main.c.

References MAX_NUMBER_OUTPUT, and nn_in.

Referenced by __attribute__().

Here is the caller graph for this function:

◆ NPUCache_config()

static void NPUCache_config ( void )

static

Initialize and enable the NPU AXI cache.

The NPU has a dedicated AXI cache that buffers weight data streamed from external OctoFlash. Enabling it is critical for achieving the rated 600 GOPS throughput — without caching, weight streaming from OctoFlash (at ~200 MHz) would bottleneck the CONVACC units.

Parameters

None

Return values

None

Definition at line 660 of file main.c.

Referenced by Hardware_init().

Here is the caller graph for this function:

◆ NPURam_enable()

static void NPURam_enable ( void )

static

Enable the NPU clock and all four NPU SRAM banks.

The STM32N6 NPU requires four dedicated AXISRAM banks (npuRAM3 through npuRAM6, each 448KB) for storing activation buffers during inference. These banks are powered off by default and must be explicitly enabled before any NPU operation.

Memory map of NPU SRAM banks:

Bank	Address	Size	Doxygen tag
AXISRAM3 (npuRAM3)	0x34200000	448 KB	cpuRAM2 overflow
AXISRAM4 (npuRAM4)	0x34270000	448 KB	MoveNet nn_in buffer
AXISRAM5 (npuRAM5)	0x342E0000	448 KB	MoveNet nn_out buffer
AXISRAM6 (npuRAM6)	0x34350000	448 KB	Reserved

Note: The NPU clock is derived from PLL2 at 1000 MHz (configured in SystemClock_Config). The RAMCFG peripheral enables each SRAM bank.

Parameters

None

Return values

None

Enable all 4 NPU SRAM banks via RAMCFG

Definition at line 581 of file main.c.

Referenced by Hardware_init().

Here is the caller graph for this function:

◆ Security_Config()

static void Security_Config ( void )

static

Configure TrustZone security attributes for hardware peripherals.

The STM32N6 implements ARM TrustZone. All peripherals used by the AI inference pipeline (NPU, DMA2D, DCMIPP, LTDC) must be granted secure privileged access via the RISC (Resource Isolation Controller) and RIMC (Resource Isolation Master Controller).

This function sets CID=1 (the application's compartment ID) as the master for all AI-related peripherals, with SEC|PRIV attributes.

Note: Without this configuration, the NPU cannot access OctoFlash for weight streaming, causing immediate inference failure.

Parameters

None

Return values

None

Definition at line 684 of file main.c.

Referenced by Hardware_init().

Here is the caller graph for this function:

◆ set_clk_sleep_mode()

static void set_clk_sleep_mode ( void )

static

Configure peripheral clocks to remain active during CPU sleep.

During NPU inference, the Cortex-M55 CPU can enter sleep mode (WFE — Wait For Event) while the NPU continues executing autonomously. This function enables the sleep-mode clock retention for all peripherals that must remain active during inference:

Peripheral	Reason for sleep-mode clock retention
XSPI1	LCD framebuffer in PSRAM
XSPI2	Model weights in OctoFlash
NPU	NPU inference execution
CACHEAXI	NPU AXI cache — required for weight streaming
LTDC	Display refresh continues during inference
DMA2D	Display composition
DCMIPP	Camera configuration retained
CSI	Camera link retained
AXISRAM1-6	NPU activation buffers

Note: Enabling all peripheral sleep clocks is a conservative approach that maximizes functionality. For production power optimization, selectively disable clocks for unused peripherals.

Parameters

None

Return values

None

Definition at line 629 of file main.c.

Referenced by Hardware_init().

Here is the caller graph for this function:

◆ SystemClock_Config()

static void SystemClock_Config ( void )

static

Configure the system clock tree using four PLLs.

Configures the STM32N6 clock tree to achieve maximum performance for AI inference:

Clock	Source	Frequency	Used By
CPU (IC1)	PLL1 / 1	800 MHz	Cortex-M55 execution
AXI (IC2)	PLL1 / 2	400 MHz	AXI bus, XSPI, DMA
NPU (IC6)	PLL2 / 1	1000 MHz	Neural-ART NPU
AXISRAM3-6 (IC11)	PLL3 / 1	900 MHz	NPU SRAM banks
HCLK	AXI / 2	200 MHz	AHB peripherals
PCLKx	HCLK / 1	200 MHz	APB1/2/4/5 peripherals
XSPI1/2	HCLK	200 MHz	OctoFlash + PSRAM

Note: PLL input is HSI (64 MHz internal oscillator, no crystal needed). The NPU clock at 1000 MHz exceeds the CPU clock (800 MHz) — this is intentional: the NPU is the performance-critical unit.

Parameters

None

Return values

None	— halts on error

PLL1 = HSI(64) * 25 / 2 = 800 MHz → CPU clock

PLL2 = HSI(64) * 125 / 8 = 1000 MHz → NPU clock

PLL3 = HSI(64) * 225 / 8 / 2 = 900 MHz → AXISRAM3-6

PLL4 = HSI(64) * 225 / 8 / 36 = 50 MHz → low-speed peripherals

Definition at line 1033 of file main.c.

Referenced by Hardware_init().

Here is the caller graph for this function:

Variable Documentation

◆ cameraFrameReceived

volatile int32_t cameraFrameReceived

Camera frame ready flag.

Set to 1 by the DCMIPP interrupt handler when a new NN pipe frame has been captured in AXISRAM. The main loop polls this flag. Declared volatile to prevent compiler optimization of the polling loop.

Definition at line 179 of file main.c.

Referenced by __attribute__(), and CMW_CAMERA_PIPE_FrameEventCallback().

◆ dcmipp_out_nn

uint8_t* dcmipp_out_nn

DCMIPP intermediate buffer for non-16-aligned NN inputs.

The DCMIPP hardware requires output image line lengths to be multiples of 16 bytes. If NN_WIDTH * NN_BPP is not a multiple of 16, DCMIPP cannot write directly to the NPU input buffer. In this case:

DCMIPP writes to this padded intermediate buffer (dcmipp_out_nn)
img_crop() copies the exact NN_WIDTH columns to nn_in

For MoveNet 192×192: 192 * 3 = 576 bytes/line → 576 % 16 = 0 → aligned, so this buffer is NOT allocated and nn_in is used directly.

Definition at line 212 of file main.c.

Referenced by __attribute__().

◆ LayerConfig

BSP_LCD_LayerConfig_t LayerConfig = {0}

LTDC layer configuration structure.

Definition at line 185 of file main.c.

Referenced by LCD_init().

◆ lcd_bg_area

Rectangle_TypeDef lcd_bg_area

Initial value:

= {
 
  .X0 = (LCD_FG_WIDTH - LCD_FG_HEIGHT) / 2,   
 
 
 
  .Y0 = 0,
  .XSize = 0,   
  .YSize = 0,   
}

LCD background area — camera preview region.

In ASPECT_RATIO_CROP mode, the preview is a centered square on the landscape LCD (800×480). The X0 offset centers it: X0 = (800 - 480) / 2 = 160 pixels from the left edge. XSize and YSize are filled in by CameraPipeline_Init().

Definition at line 140 of file main.c.

Referenced by __attribute__(), clamp_point(), convert_length(), convert_point(), and LCD_init().

◆ lcd_fg_area

Rectangle_TypeDef lcd_fg_area

Initial value:

= {
  .X0 = 0,
  .Y0 = 0,
  .XSize = LCD_FG_WIDTH,
  .YSize = LCD_FG_HEIGHT,
}

LCD foreground area — full-screen overlay for inference results.

Covers the entire 800×480 LCD surface. Uses ARGB4444 format (4-bit alpha per channel) to allow transparent overlay of keypoints and skeleton lines over the camera background.

Definition at line 157 of file main.c.

Referenced by Display_NetworkOutput(), and LCD_init().

◆ nn_in

uint8_t* nn_in

Pointer to the NPU input buffer in AXISRAM (set by NeuralNetwork_init)

Definition at line 182 of file main.c.

Referenced by __attribute__(), and NeuralNetwork_init().

◆ pp_output

mpe_pp_out_t pp_output

YOLOv8 output: bounding boxes + keypoints

Definition at line 167 of file main.c.

Referenced by __attribute__().

◆ pp_params

mpe_yolov8_pp_static_param_t pp_params

Postprocessing parameters — statically allocated, model-specific.

YOLOv8 NMS and confidence params

Definition at line 166 of file main.c.

Referenced by __attribute__().

Classes

Macros

Functions

Variables

Detailed Description

Macro Definition Documentation

◆ ALIGN_TO_16

◆ LCD_FG_FRAMEBUFFER_SIZE

◆ LCD_FG_HEIGHT

◆ LCD_FG_WIDTH

◆ MAX_NUMBER_OUTPUT

Function Documentation

◆ __attribute__()

◆ clamp_point()

◆ convert_length()

◆ convert_point()

◆ Display_binding_line()

◆ Display_NetworkOutput()

◆ Display_WelcomeScreen()

◆ Hardware_init()

◆ IAC_Config()

◆ IAC_IRQHandler()

◆ LCD_init()

◆ MX_DCMIPP_ClockConfig()

◆ NeuralNetwork_init()

◆ NPUCache_config()

◆ NPURam_enable()

◆ Security_Config()

◆ set_clk_sleep_mode()

◆ SystemClock_Config()

Variable Documentation

◆ cameraFrameReceived

◆ dcmipp_out_nn

◆ LayerConfig

◆ lcd_bg_area

◆ lcd_fg_area

◆ nn_in

◆ pp_output

◆ pp_params

◆ attribute()