STM32N6 NPU Deployment — Politecnico di Milano  1.0
Documentation for Neural Network Deployment on STM32N6 NPU - Politecnico di Milano 2024-2025
external_memory_mgt.py
Go to the documentation of this file.
1 
38 
39 import os
40 import json
41 import sys
42 from typing import Dict
43 from common.utils import get_model_name_and_its_input_shape
44 
45 
46 def update_activation_c_code(c_project_path: str,
47  path_network_c_info: str,
48  available_AXIRAM: int,
49  cfg=None,
50  custom_objects: Dict = None) -> None:
51  """
52  @brief Patches generated C firmware files to configure activation buffer placement.
53 
54  @details
55  This function performs **surgical modification of generated C source files**
56  to ensure that neural network activation buffers, camera capture buffers, and
57  rescaled image buffers are placed in the correct memory sections (AXIRAM or SDRAM)
58  at link time.
59 
60  The function modifies three C files:
61 
62  **1. `main.h` — Camera resolution configuration**
63  Patches `#define CAMERA_RESOLUTION`, `#define CAM_RES_WIDTH`, and
64  `#define CAM_RES_HEIGHT` based on the model's input shape and the
65  preprocessing aspect ratio mode from the Hydra configuration.
66 
67  Resolution selection logic:
68  | Mode | Network ≤ QVGA (320×240) | Network ≤ VGA (640×480) |
69  |------|--------------------------|-------------------------|
70  | crop | CAMERA_R320x240, QVGA_HEIGHT | CAMERA_R640x480, VGA_HEIGHT |
71  | padding | CAMERA_R320x240, QVGA_WIDTH | CAMERA_R640x480, VGA_WIDTH |
72  | fit | CAMERA_R320x240, QVGA_WxH | CAMERA_R640x480, VGA_WxH |
73 
74  **2. `main.c` — Activation buffer declarations**
75  Replaces the generated code block between
76  `/*** @GENERATED CODE START - DO NOT TOUCH@ ***/` markers with
77  explicit buffer declarations using GCC section attributes:
78 
79  @code{.c}
80  // Example output for a model with 2 activation pools:
81  __attribute__((section(".NN_Activation_Buffer_AXIRAM")))
82  __attribute__ ((aligned (32)))
83  static uint8_t NN_Activation_Buffer_AXIRAM[AI_ACTIVATION_1_SIZE_BYTES + 32 - (AI_ACTIVATION_1_SIZE_BYTES%32)];
84 
85  __attribute__((section(".NN_Activation_Buffer_npuRAM4")))
86  __attribute__ ((aligned (32)))
87  static uint8_t NN_Activation_Buffer_npuRAM4[AI_ACTIVATION_2_SIZE_BYTES + 32 - (AI_ACTIVATION_2_SIZE_BYTES%32)];
88 
89  ai_handle NN_Activation_Buffer[AI_ACTIVATION_BUFFERS_COUNT] = {
90  NN_Activation_Buffer_AXIRAM, NN_Activation_Buffer_npuRAM4,
91  };
92  @endcode
93 
94  Buffer assignment strategy:
95  - Buffers are sorted smallest-to-largest
96  - Each buffer is assigned to AXIRAM if space remains, SDRAM otherwise
97  - The 32-byte alignment padding ensures efficient cache line usage
98 
99  The CapturedImage_Buffer and RescaledImage_Buffer are also assigned
100  to AXIRAM or SDRAM based on remaining available space after activations.
101 
102  **3. `ai_interface.h` — Input buffer index**
103  Patches the `AI_NETWORK_INPUTS_IN_ACTIVATIONS_INDEX` and
104  `AI_NETWORK_INPUTS_IN_ACTIVATIONS_SIZE` macros, which tell the firmware
105  which activation buffer pool contains the model's input tensor.
106 
107  @param c_project_path Root path of the STM32CubeIDE C project.
108  Expected structure:
109  @code
110  c_project_path/
111  └── Application/STM32H747I-DISCO/
112  ├── Inc/CM7/main.h
113  ├── Inc/CM7/ai_interface.h
114  └── Src/CM7/main.c
115  @endcode
116  @param path_network_c_info Path to `network_c_info.json` generated by
117  ST Edge AI Core. Contains the list of memory pools
118  with their names, rights (ACC_READ/ACC_WRITE),
119  and used_size_bytes.
120  @param available_AXIRAM Available AXIRAM in bytes after subtracting
121  the ST AI runtime library footprint.
122  @param cfg Hydra DictConfig containing:
123  - cfg.preprocessing.resizing.aspect_ratio
124  - cfg.general.model_path
125  @param custom_objects Custom Keras objects dictionary for model loading
126  (passed to get_model_name_and_its_input_shape).
127 
128  @return None — all modifications are performed in place on the C source files.
129 
130  @note Files are modified using a **write-then-rename** pattern for atomicity:
131  a `_modify.c` / `_modify.h` temporary file is written, then
132  `os.replace()` atomically replaces the original.
133 
134  @warning This function directly modifies generated firmware source files.
135  Any manual edits inside the `@GENERATED CODE START/STOP` blocks
136  will be overwritten on the next deployment run.
137 
138  @note This function is designed for **STM32H747I-DISCO** targets.
139  For STM32N6570-DK (used in this project), the Neural-ART toolchain
140  handles memory placement differently — this function is not called
141  during STM32N6 deployment.
142  """
143 
144  # --- Resolve target file paths within the C project ---
145  path_main_h = os.path.join(
146  c_project_path, "Application/STM32H747I-DISCO/Inc/CM7/main.h")
147  path_main_c = os.path.join(
148  c_project_path, "Application/STM32H747I-DISCO/Src/CM7/main.c")
149  path_ai_interface_h = os.path.join(
150  c_project_path, "Application/STM32H747I-DISCO/Inc/CM7/ai_interface.h")
151 
152  # --- Determine model input dimensions from the model file ---
153 
154  aspect_ratio = cfg.preprocessing.resizing.aspect_ratio
155  _, input_shape = get_model_name_and_its_input_shape(
156  model_path=cfg.general.model_path, custom_objects=custom_objects)
157  network_height = input_shape[0]
158  network_width = input_shape[1]
159  network_channel = input_shape[2] # 1=grayscale, 3=RGB
160 
161  # --- Compute resize buffer size (bytes) ---
162 
163  if network_channel == 1:
164  resize_buffer_size = network_height * network_width # Grayscale
165  if network_channel == 3:
166  resize_buffer_size = network_height * network_width * 2 # RGB565
167 
168  # --- Camera resolution constants (pixels) ---
169  QVGA_width = 320
170  QVGA_height = 240
171  VGA_width = 640
172  VGA_height = 480
173 
174  # --- Select camera resolution based on model input size and aspect ratio mode ---
175 
178  if aspect_ratio == "crop":
179  if network_width <= QVGA_height and network_height <= QVGA_height:
180  cam_res = "CAMERA_R320x240"
181  cam_res_width = cam_buffer_width = "QVGA_RES_HEIGHT"
182  cam_res_height = cam_buffer_height = "QVGA_RES_HEIGHT"
183  elif network_width <= VGA_height and network_height <= VGA_height:
184  cam_res = "CAMERA_R640x480"
185  cam_res_width = cam_buffer_width = "VGA_RES_HEIGHT"
186  cam_res_height = cam_buffer_height = "VGA_RES_HEIGHT"
187  elif aspect_ratio == "padding":
188  if network_width <= QVGA_width and network_height <= QVGA_width:
189  cam_res = "CAMERA_R320x240"
190  cam_res_width = cam_buffer_width = "QVGA_RES_WIDTH"
191  cam_res_height = cam_buffer_height = "QVGA_RES_HEIGHT"
192  elif network_width <= VGA_width and network_height <= VGA_width:
193  cam_res = "CAMERA_R640x480"
194  cam_res_width = cam_buffer_width = "VGA_RES_WIDTH"
195  cam_res_height = cam_buffer_height = "VGA_RES_HEIGHT"
196  else:
197  # Default: fit mode — camera captures at closest standard resolution
198  if network_width <= QVGA_width and network_height <= QVGA_height:
199  cam_res = "CAMERA_R320x240"
200  cam_res_width = cam_buffer_width = "QVGA_RES_WIDTH"
201  cam_res_height = cam_buffer_height = "QVGA_RES_HEIGHT"
202  elif network_width <= VGA_width and network_height <= VGA_height:
203  cam_res = "CAMERA_R640x480"
204  cam_res_width = cam_buffer_width = "VGA_RES_WIDTH"
205  cam_res_height = cam_buffer_height = "VGA_RES_HEIGHT"
206 
207  if 'cam_res' not in locals():
208  raise ValueError("Needed camera resolution ({}x{}) exceeds VGA format.".format(
209  network_width, network_height))
210 
211  # =========================================================================
212  # Patch 1: main.h — Update camera resolution macros
213  # =========================================================================
214  with open(os.path.join(path_main_h), 'r') as f1, \
215  open(os.path.join(os.path.dirname(path_main_h), 'main_modify.h'), 'w') as f2:
216  for lineNumber, line in enumerate(f1):
217  if "#define CAMERA_RESOLUTION" in line:
218  line = "#define CAMERA_RESOLUTION (" + cam_res + ")\n"
219  elif "#define CAM_RES_WIDTH" in line:
220  line = "#define CAM_RES_WIDTH (" + cam_res_width + ")\n"
221  elif "#define CAM_RES_HEIGHT" in line:
222  line = "#define CAM_RES_HEIGHT (" + cam_res_height + ")\n"
223  f2.write(line)
224  os.replace(
225  os.path.join(os.path.dirname(path_main_h), 'main_modify.h'),
226  path_main_h
227  )
228 
229  # --- Resolve symbolic camera buffer dimensions to actual pixel values ---
230  def resolve_cam_dim(sym):
231  if sym == "QVGA_RES_WIDTH": return QVGA_width
232  if sym == "QVGA_RES_HEIGHT": return QVGA_height
233  if sym == "VGA_RES_WIDTH": return VGA_width
234  return VGA_height
235 
236  cam_buffer_width = resolve_cam_dim(cam_buffer_width)
237  cam_buffer_height = resolve_cam_dim(cam_buffer_height)
238 
239 
240  if network_channel == 1:
241  cam_buffer_size = cam_buffer_height * cam_buffer_width
242  if network_channel == 3:
243  cam_buffer_size = cam_buffer_height * cam_buffer_width * 2
244 
245  # =========================================================================
246  # Patch 2: main.c — Declare activation buffers with explicit section attrs
247  # =========================================================================
248 
249  with open(os.path.join(path_network_c_info), 'r') as f:
250  graph = json.load(f)
251 
252 
253  activations = [e for e in graph["memory_pools"]
254  if e["rights"] == "ACC_WRITE" and e["used_size_bytes"] != 0]
255 
256 
257  activations = sorted(activations, key=lambda x: x['used_size_bytes'])
258 
259  writeLine = True
260  with open(os.path.join(path_main_c), 'r') as f1, \
261  open(os.path.join(os.path.dirname(path_main_c), 'main_modify.c'), 'w') as f2:
262  for lineNumber, line in enumerate(f1):
263  if line == " /*** @GENERATED CODE START - DO NOT TOUCH@ ***/\n":
264  pool_list_str = []
265  for i, pool in enumerate(activations):
266 
267  name_pool = (
268  "NN_Activation_Buffer_" + pool["name"]
269  if pool["name"] != "heap_overlay_pool"
270  else "NN_Activation_Buffer_AXIRAM"
271  )
272 
273  line += (
274  f'__attribute__((section(".{name_pool}")))\n'
275  f'__attribute__ ((aligned (32)))\n'
276  f'static uint8_t {name_pool}'
277  f'[AI_ACTIVATION_{i+1}_SIZE_BYTES + 32 - '
278  f'(AI_ACTIVATION_{i+1}_SIZE_BYTES%32)];\n'
279  )
280  pool_list_str.append(name_pool)
281 
282  if name_pool == "NN_Activation_Buffer_AXIRAM":
283  available_AXIRAM -= pool['used_size_bytes']
284 
285 
286  line += "ai_handle NN_Activation_Buffer[AI_ACTIVATION_BUFFERS_COUNT] = { "
287  for pool in pool_list_str:
288  line += pool + ", "
289  line += "};\n\n"
290  f2.write(line)
291  writeLine = False
292 
293  if line == " /*** @GENERATED CODE STOP - DO NOT TOUCH@ ***/\n":
294  writeLine = True
295  if writeLine:
296  f2.write(line)
297  os.replace(
298  os.path.join(os.path.dirname(path_main_c), 'main_modify.c'),
299  path_main_c
300  )
301 
302  # --- Second pass: place CapturedImage and RescaledImage buffers ---
303 
304  writeLine = True
305  with open(os.path.join(path_main_c), 'r') as f1, \
306  open(os.path.join(os.path.dirname(path_main_c), 'main_modify.c'), 'w') as f2:
307  for lineNumber, line in enumerate(f1):
308  if '__attribute__((section(".CapturedImage_Buffer' in line:
309  if cam_buffer_size < available_AXIRAM:
310  available_AXIRAM -= cam_buffer_size
311  line = '__attribute__((section(".CapturedImage_Buffer_AXIRAM")))\n'
312  else:
313  line = '__attribute__((section(".CapturedImage_Buffer_SDRAM")))\n'
314  if '__attribute__((section(".RescaledImage_Buffer' in line:
315  if resize_buffer_size < available_AXIRAM:
316  available_AXIRAM -= resize_buffer_size
317  line = '__attribute__((section(".RescaledImage_Buffer_AXIRAM")))\n'
318  else:
319  line = '__attribute__((section(".RescaledImage_Buffer_SDRAM")))\n'
320  f2.write(line)
321  os.replace(
322  os.path.join(os.path.dirname(path_main_c), 'main_modify.c'),
323  path_main_c
324  )
325 
326  # =========================================================================
327  # Patch 3: ai_interface.h — Set input buffer index and size macros
328  # =========================================================================
329 
330  input_buffer_activation_buffer_index = 0
331  writeLine = True
332  with open(os.path.join(path_ai_interface_h), 'r') as f1, \
333  open(os.path.join(os.path.dirname(path_ai_interface_h),
334  'interface_modify.h'), 'w') as f2:
335  for lineNumber, line in enumerate(f1):
336  if line == " /*** @GENERATED CODE START - DO NOT TOUCH@ ***/\n":
337 
338  line += (
339  f"#define AI_NETWORK_INPUTS_IN_ACTIVATIONS_INDEX "
340  f"{input_buffer_activation_buffer_index}"
341  f"\n#define AI_NETWORK_INPUTS_IN_ACTIVATIONS_SIZE "
342  f"AI_ACTIVATION_{input_buffer_activation_buffer_index+1}_SIZE_BYTES\n\n"
343  )
344  f2.write(line)
345  writeLine = False
346  if line == " /*** @GENERATED CODE STOP - DO NOT TOUCH@ ***/\n":
347  writeLine = True
348  if writeLine:
349  f2.write(line)
350  os.replace(
351  os.path.join(os.path.dirname(path_ai_interface_h), 'interface_modify.h'),
352  path_ai_interface_h
353  )
None update_activation_c_code(str c_project_path, str path_network_c_info, int available_AXIRAM, cfg=None, Dict custom_objects=None)