STM32N6 NPU Deployment — Politecnico di Milano  1.0
Documentation for Neural Network Deployment on STM32N6 NPU - Politecnico di Milano 2024-2025
common_deploy.py
Go to the documentation of this file.
1 
39 
40 import logging
41 import os
42 import sys
43 import warnings
44 import subprocess
45 import platform
46 from hydra.core.hydra_config import HydraConfig
47 from omegaconf import DictConfig
48 import tensorflow as tf
49 import shutil
50 from hydra.core.hydra_config import HydraConfig
51 from typing import Optional
52 from pathlib import Path
53 
54 import common.stm32ai_local as stmaic
55 from common.benchmarking import cloud_connect, cloud_analyze, benchmark_model
56 from common.stm32ai_dc import (CliLibraryIde, CliLibrarySerie, CliParameters)
57 from .external_memory_mgt import update_activation_c_code
58 
59 import json
60 import re
61 from typing import Dict, List
62 
63 
64 def _keep_internal_weights(path_network_data_params: str) -> None:
65  """
66  @brief Tags all model weight arrays for placement in internal Flash memory.
67 
68  @details
69  This function reads the generated C file `network_data_params.c` produced by
70  ST Edge AI Core and injects a GCC section attribute before every weight array
71  declaration, forcing the linker to place all weights in the MCU's internal Flash.
72 
73  The injected attribute is:
74  @code{.c}
75  AI_INTERNAL_FLASH __attribute__((section(".InternalFlashSection")))
76  @endcode
77 
78  This function is called when the model weights fit entirely within the internal
79  Flash of the target board (no weight splitting required). It is the simpler
80  alternative to _dispatch_weights().
81 
82  **How it works:**
83  1. Opens `network_data_params.c` for reading
84  2. Scans line by line for the `#include "network_data_params.h"` directive
85  and injects the macro definition immediately before it
86  3. For each line containing a weight array declaration (matched by regex
87  `const ai_uXX name[size]`), prepends the `AI_INTERNAL_FLASH` attribute
88  4. Writes the modified content to a temporary file, then atomically replaces
89  the original using `os.replace()`
90 
91  @param path_network_data_params Absolute path to the generated
92  `network_data_params.c` file inside
93  the ST Edge AI Core output directory.
94 
95  @return None
96 
97  @note The file is modified **in place** using a write-then-rename pattern
98  to avoid partial writes in case of failure.
99 
100  @see _dispatch_weights() for the alternative function used when weights
101  must be split between internal and external Flash.
102  """
103  with open(path_network_data_params, 'r') as f1, \
104  open(os.path.join(os.path.dirname(path_network_data_params),
105  'network_data_params_modify.c'), 'w') as f2:
106  for lineNumber, line in enumerate(f1):
107  # Inject the macro definition before the header include
108  if line == '#include "network_data_params.h"\n':
109  line = '#define AI_INTERNAL_FLASH __attribute__((section(".InternalFlashSection")))\n' + line
110  # Detect weight array declarations using regex
111  # Pattern matches: const ai_uXX name[size]
112  weight = re.findall("const ai_u(?:\d+) (.*)\[(?:\d+)\]", line)
113  if weight != []:
114  # Prepend the section attribute to force internal Flash placement
115  line = 'AI_INTERNAL_FLASH\n' + line
116  f2.write(line)
117  # Atomically replace the original file with the modified version
118  os.replace(
119  os.path.join(os.path.dirname(path_network_data_params),
120  'network_data_params_modify.c'),
121  path_network_data_params
122  )
123 
124 
125 def _dispatch_weights(internalFlashSizeFlash_KB: str,
126  kernelFlash_KB: str,
127  applicationSizeFlash_KB: str,
128  path_network_c_info: str,
129  path_network_data_params: str) -> None:
130  """
131  @brief Splits model weights between internal and external Flash memory.
132 
133  @details
134  When a model's weights are too large to fit entirely in the MCU's internal
135  Flash, this function distributes them between internal and external Flash
136  (e.g., OctoFlash on STM32N6570-DK) by annotating each weight array in the
137  generated C source with the appropriate GCC section attribute.
138 
139  **Algorithm:**
140  1. Reads `network_c_info.json` — the ST Edge AI Core memory report — to
141  obtain the list of all weight arrays with their sizes.
142  2. Filters to keep only read-only memory pools (`"rights": "ACC_READ"`),
143  which correspond to model weights stored in Flash.
144  3. Sorts weight arrays from largest to smallest (greedy bin-packing strategy).
145  4. Iterates through the sorted list and greedily assigns each weight to
146  internal Flash if space remains, otherwise to external Flash.
147  5. Injects GCC section attributes into `network_data_params.c` accordingly:
148  - `AI_INTERNAL_FLASH __attribute__((section(".InternalFlashSection")))`
149  - `AI_EXTERNAL_FLASH __attribute__((section(".ExternalFlashSection")))`
150 
151  **Memory budget calculation:**
152  @code
153  freeInternalFlash = internalFlashSize - kernelFlash - applicationFlash
154  @endcode
155  The kernel Flash (ST AI runtime library) and application code are subtracted
156  from the total internal Flash to compute the space available for weights.
157 
158  @param internalFlashSizeFlash_KB Total internal Flash size in KB (e.g., "2048KB")
159  @param kernelFlash_KB ST AI runtime library size in KB (e.g., "256KB")
160  @param applicationSizeFlash_KB Application firmware size in KB (e.g., "512KB")
161  @param path_network_c_info Path to `network_c_info.json` generated by
162  ST Edge AI Core (contains memory pool details)
163  @param path_network_data_params Path to `network_data_params.c` to be annotated
164 
165  @return None
166 
167  @note Uses a **greedy largest-first strategy** for bin packing — not optimal
168  but fast and effective for the typical weight distribution of embedded models.
169 
170  @see _keep_internal_weights() for the simpler case where all weights fit internally.
171  """
172  with open(os.path.join(path_network_c_info), 'r') as f:
173  graph = json.load(f)
174 
175  # Keep only Flash (read-only) memory pools — these are the weight arrays
176  for i in range(len(graph["memory_pools"]) - 1, -1, -1):
177  element = graph["memory_pools"][i]
178  if element["rights"] != "ACC_READ":
179  graph["memory_pools"].remove(element)
180 
181  # Sort weights largest-first for greedy bin-packing
182  sorted_weights = sorted(graph["memory_pools"],
183  key=lambda item: item['used_size_bytes'],
184  reverse=True)
185 
186  # Compute free internal Flash after kernel and application code
187  internalFlashSize_inBytes = int(re.split('(\d+)', internalFlashSizeFlash_KB)[1]) * 10**3
188  kernel_flash_inBytes = int(re.split('(\d+)', kernelFlash_KB)[1]) * 10**3
189  application_size_flash_inBytes = int(re.split('(\d+)', applicationSizeFlash_KB)[1]) * 10**3
190  freeInternalFlashSize = internalFlashSize_inBytes - kernel_flash_inBytes - application_size_flash_inBytes
191 
192  ExternalWeightArray = []
193  InternalWeightArray = []
194  for detail in sorted_weights:
195  if (freeInternalFlashSize - detail["used_size_bytes"]) > 0:
196  # Weight fits in internal Flash — assign it there
197  InternalWeightArray.append(detail["name"])
198  freeInternalFlashSize -= detail["used_size_bytes"]
199  else:
200  # No space left in internal Flash — assign to external Flash
201  ExternalWeightArray.append(detail["name"])
202 
203  # Annotate network_data_params.c with the correct section attributes
204  with open(path_network_data_params, 'r') as f1, \
205  open(os.path.join(os.path.dirname(path_network_data_params),
206  'network_data_params_modify.c'), 'w') as f2:
207  for lineNumber, line in enumerate(f1):
208  if line == '#include "network_data_params.h"\n':
209  # Inject both macro definitions before the include
210  line = (
211  '#define AI_EXTERNAL_FLASH __attribute__((section(".ExternalFlashSection")))\n'
212  '#define AI_INTERNAL_FLASH __attribute__((section(".InternalFlashSection")))\n'
213  + line
214  )
215  # Match weight array declarations
216  weight = re.findall(
217  "const ai_u(?:\d+) \\D_network_(.*)_\\D(?:\\d+)\\[(?:\\d+)\\]", line
218  )
219  if weight != []:
220  if weight[0] in InternalWeightArray:
221  line = 'AI_INTERNAL_FLASH\n' + line
222  elif weight[0] in ExternalWeightArray:
223  line = 'AI_EXTERNAL_FLASH\n' + line
224  f2.write(line)
225  os.replace(
226  os.path.join(os.path.dirname(path_network_data_params),
227  'network_data_params_modify.c'),
228  path_network_data_params
229  )
230 
231 
232 def stm32ai_deploy(target: bool = False,
233  stlink_serial_number: str = None,
234  stm32ai_version: str = None,
235  c_project_path: str = None,
236  output_dir: str = None,
237  stm32ai_output: str = None,
238  optimization: str = None,
239  path_to_stm32ai: str = None,
240  path_to_cube_ide: str = None,
241  additional_files: list = None,
242  stmaic_conf_filename: str = 'stmaic_c_project.conf',
243  verbosity: int = None,
244  debug: bool = False,
245  model_path: str = None,
246  get_model_name_output: str = None,
247  stm32ai_ide: str = None,
248  stm32ai_serie: str = None,
249  credentials: list = None,
250  on_cloud: bool = False,
251  check_large_model: bool = False,
252  cfg=None,
253  custom_objects: Dict = None) -> None:
254  """
255  @brief Generic deployment function for STM32 MCU targets (H7, U5, etc.).
256 
257  @details
258  This function orchestrates the complete deployment pipeline for standard
259  STM32 MCU boards (excluding STM32N6, which uses stm32ai_deploy_stm32n6()).
260 
261  **Pipeline steps:**
262  1. **Session creation** — loads the model into an STMAi session workspace
263  2. **Board configuration** — reads the `.conf` file specifying memory pools,
264  linker scripts, and build system paths
265  3. **Model compilation** — runs ST Edge AI Core (offline or cloud) to:
266  - Convert the quantized model to optimized C arrays (`network.c`, `network_data_params.c`)
267  - Generate the AI runtime library (`Lib/`, `Inc/`)
268  - Optionally split weights between internal/external Flash for large models
269  4. **Firmware build and flash** — invokes STM32CubeIDE in headless mode to
270  compile the C project and flash the binary via ST-Link
271 
272  **Large model handling (`check_large_model=True`):**
273  When enabled, the function first benchmarks the model to measure its ROM and RAM
274  requirements, then compares them against the board's available memory pools.
275  If weights overflow internal Flash, `_dispatch_weights()` is called to split them.
276  If activations overflow AXIRAM, `update_activation_c_code()` redistributes
277  activation buffers across AXIRAM and SDRAM.
278 
279  **Cloud vs. local execution:**
280  - `on_cloud=True`: uses the STM32Cube.AI Developer Cloud API for compilation
281  - `on_cloud=False`: uses the local `stedgeai` executable (used in this project)
282 
283  @param target Unused legacy parameter (kept for API compatibility).
284  @param stlink_serial_number ST-Link serial number for multi-board setups.
285  Leave empty if only one board is connected.
286  @param stm32ai_version Version string of ST Edge AI Core (e.g., "2.1.0").
287  @param c_project_path Absolute path to the STM32CubeIDE C project root.
288  @param output_dir Directory for all deployment outputs (logs, generated files).
289  @param stm32ai_output Directory where ST Edge AI Core writes generated C files.
290  @param optimization Compilation optimization level: "balanced", "latency", "ram".
291  @param path_to_stm32ai Absolute path to the `stedgeai` executable.
292  @param path_to_cube_ide Absolute path to the `stm32cubeide` executable.
293  @param additional_files Extra files to copy into the C project before building.
294  @param stmaic_conf_filename Board configuration file name (e.g., "stmaic_STM32N6570-DK.conf").
295  @param verbosity Logging verbosity (None=silent, 1=info, 2=debug).
296  @param debug Enable debug logging for the STMAi driver.
297  @param model_path Absolute path to the quantized model (.tflite or .onnx).
298  @param get_model_name_output Model name string used for Cloud API identification.
299  @param stm32ai_ide IDE/compiler identifier (must be "gcc" for GCC toolchain).
300  @param stm32ai_serie STM32 series string (e.g., "STM32H7", "STM32U5").
301  @param credentials Pre-obtained cloud credentials from cloud_connect().
302  @param on_cloud If True, use STM32Cube.AI Developer Cloud for compilation.
303  @param check_large_model If True, perform memory analysis before compilation
304  and split weights/activations if needed.
305  @param cfg Hydra DictConfig for preprocessing parameters
306  (used by update_activation_c_code).
307  @param custom_objects Custom Keras objects for model loading (if applicable).
308 
309  @return None
310 
311  @throws ValueError If the model is too large to fit in any available memory.
312 
313  @note **Not used in this project.** For STM32N6 deployment, use
314  stm32ai_deploy_stm32n6() which adds Neural-ART NPU support.
315  """
316 
317  def _stmaic_local_call(session):
318  """
319  @brief Inner function: compiles the model using the local stedgeai executable.
320 
321  @details
322  This nested function handles the offline compilation path. It configures
323  the STMAi compile options and invokes stmaic.compile() which internally
324  runs the stedgeai CLI to generate C code from the quantized model.
325 
326  For large models (check_large_model=True), it additionally:
327  - Benchmarks the model to measure exact memory requirements
328  - Checks if weights fit in internal Flash
329  - Calls _dispatch_weights() or _keep_internal_weights() accordingly
330  - Optionally calls update_activation_c_code() for SRAM overflow handling
331 
332  @param session The STMAi session object created by stmaic.load().
333  @return None
334  """
335  if not check_large_model:
336  os.environ["STM32_AI_EXE"] = path_to_stm32ai
337  tools = stmaic.STMAiTools()
338  session.set_tools(tools)
339  print("[INFO] : Offline CubeAI used; Selected tools: ", tools, flush=True)
340  shutil.rmtree(stm32ai_output, ignore_errors=True)
341  opt = stmaic.STMAiCompileOptions(
342  no_inputs_allocation=False,
343  no_outputs_allocation=False
344  )
345  opt.optimization = optimization
346  stmaic.compile(session, opt)
347  else:
348  split_weights = False
349  split_ram = False
350 
351  # Step 1: Measure model footprint
352  benchmark_model(
353  optimization=optimization, model_path=model_path,
354  path_to_stm32ai=path_to_stm32ai, stm32ai_output=stm32ai_output,
355  stm32ai_version=stm32ai_version,
356  get_model_name_output=get_model_name_output
357  )
358  with open(os.path.join(stm32ai_output, 'network_report.json'), 'r') as f:
359  report = json.load(f)
360 
361  needed_rom = report["model_size"]
362  needed_ram = int(report["ram_size"][0]) if isinstance(
363  report["ram_size"], list) else int(report["ram_size"])
364 
365  # Step 2: Read board memory pool configuration
366  with open(os.path.join(board.config.memory_pool_path), 'r') as f:
367  memory_pool = json.load(f)
368 
369  available_default_ram = int(next(
370  item for item in memory_pool['memory']['mempools']
371  if item["name"] == "AXIRAM")["size"]["value"]) * 10**3
372  externalRamSize_inBytes = int(next(
373  item for item in memory_pool['memory']['mempools']
374  if item["name"] == "SDRAM")["size"]["value"]) * 10**3
375 
376  split_ram = available_default_ram < needed_ram
377 
378  internalFlashSize_inBytes = int(
379  re.split('(\d+)', board.config.internalFlash_size)[1]) * 10**3
380  externalFlashSize_inBytes = int(
381  re.split('(\d+)', board.config.externalFlash_size)[1]) * 10**3
382  application_size_flash_inBytes = int(
383  re.split('(\d+)', board.config.application_size)[1]) * 10**3
384 
385  # Step 3: Validate that the model fits in total available memory
386  if needed_rom > externalFlashSize_inBytes + internalFlashSize_inBytes - application_size_flash_inBytes:
387  raise ValueError(
388  "\033[31m The model is too large (too many weights) to fit on the board. "
389  "It won't be compiled.\033[39m")
390  if needed_ram > externalRamSize_inBytes + available_default_ram:
391  raise ValueError(
392  "\033[31m The model is too large (too many activations) to fit on the board. "
393  "It won't be compiled.\033[39m")
394 
395  # Step 4: Determine if weight splitting is needed
396  split_weights = needed_rom > (internalFlashSize_inBytes - application_size_flash_inBytes)
397 
398  os.environ["STM32_AI_EXE"] = path_to_stm32ai
399  tools = stmaic.STMAiTools()
400  session.set_tools(tools)
401  print("[INFO] : Offline CubeAI used; Selected tools: ", tools, flush=True)
402  shutil.rmtree(stm32ai_output, ignore_errors=True)
403 
404  opt = stmaic.STMAiCompileOptions(
405  no_inputs_allocation=False,
406  no_outputs_allocation=False,
407  split_weights=split_weights
408  )
409  opt.optimization = optimization
410 
411  if split_ram:
412  print("[INFO] : Dispatch activations in different RAM pools to fit the large model.")
413  stmaic.compile(session=session, options=opt, target=session._board_config)
414  else:
415  stmaic.compile(session=session, options=opt)
416 
417  path_network_c_info = os.path.join(session.workspace, "network_c_info.json")
418 
419  # Step 5: Update activation buffer placement in C code
421  c_project_path, path_network_c_info=path_network_c_info,
422  available_AXIRAM=available_default_ram, cfg=cfg,
423  custom_objects=custom_objects
424  )
425 
426  # Step 6: Annotate weight arrays with Flash section attributes
427  if split_weights:
428  print("[INFO] : Dispatch weights between internal and external Flash to fit the large model.")
430  internalFlashSizeFlash_KB=board.config.internalFlash_size,
431  kernelFlash_KB=board.config.lib_size,
432  applicationSizeFlash_KB=board.config.application_size,
433  path_network_c_info=path_network_c_info,
434  path_network_data_params=os.path.join(
435  session.generated_dir, "network_data_params.c")
436  )
437  else:
438  print("[INFO] : Weights fit in internal Flash.")
440  path_network_data_params=os.path.join(
441  session.generated_dir, "network_data_params.c")
442  )
443 
444  # --- Main deployment sequence ---
445 
446 
447  os.environ["STM32_CUBE_IDE_EXE"] = path_to_cube_ide
448 
449  if debug:
450  stmaic.set_log_level('debug')
451  elif verbosity is not None:
452  stmaic.set_log_level('info')
453 
454 
455  session = stmaic.load(model_path, workspace_dir=output_dir)
456 
457 
458  board_conf = os.path.join(c_project_path, stmaic_conf_filename)
459  board = stmaic.STMAiBoardConfig(board_conf)
460  session.set_board(board)
461  print("[INFO] : Selected board : ", board, flush=True)
462 
463 
464  user_files = []
465  print("[INFO] : Compiling the model and generating optimized C code + Lib/Inc files: ",
466  model_path, flush=True)
467  if on_cloud:
468  login_success, ai, _ = cloud_connect(
469  stm32ai_version=stm32ai_version, credentials=credentials)
470  if login_success:
471  if not check_large_model:
472  ai.generate(CliParameters(
473  model=model_path, output=stm32ai_output,
474  fromModel=get_model_name_output,
475  includeLibraryForSerie=CliLibrarySerie(stm32ai_serie.upper()),
476  includeLibraryForIde=CliLibraryIde(stm32ai_ide.lower())))
477  else:
478  # [Cloud large model path — same logic as local but using cloud API]
479  pass
480  if os.path.exists(stm32ai_output):
481  shutil.move(stm32ai_output, os.path.join(output_dir, "generated"))
482  stm32ai_output = os.path.join(output_dir, "generated")
483  if not os.listdir(stm32ai_output) or \
484  'Lib' not in os.listdir(stm32ai_output) or 'Inc' not in os.listdir(stm32ai_output):
485  _stmaic_local_call(session)
486  else:
487  _stmaic_local_call(session)
488  else:
489  _stmaic_local_call(session)
490 
491  print("[INFO] : Optimized C code + Lib/Inc files generation done.")
492 
493 
496  print("[INFO] : Building the STM32 c-project..", flush=True)
497  user_files.extend([os.path.join(output_dir, "C_header/ai_model_config.h")])
498  if additional_files:
499  for f in additional_files:
500  user_files.extend([os.path.join(output_dir, f)])
501 
502  stmaic.build(session, user_files=user_files, serial_number=stlink_serial_number)
503 
504 
505 def stm32ai_deploy_stm32n6(target: bool = False,
506  stlink_serial_number: str = None,
507  stm32ai_version: str = None,
508  c_project_path: str = None,
509  output_dir: str = None,
510  stm32ai_output: str = None,
511  optimization: str = None,
512  path_to_stm32ai: str = None,
513  path_to_cube_ide: str = None,
514  additional_files: list = None,
515  stmaic_conf_filename: str = 'stmaic_c_project.conf',
516  verbosity: int = None,
517  debug: bool = False,
518  model_path: str = None,
519  get_model_name_output: str = None,
520  stm32ai_ide: str = None,
521  stm32ai_serie: str = None,
522  credentials: list = None,
523  on_cloud: bool = False,
524  check_large_model: bool = False,
525  build_conf: str = None,
526  cfg=None,
527  custom_objects: Dict = None,
528  input_data_type: str = '',
529  output_data_type: str = '',
530  inputs_ch_position: str = '',
531  outputs_ch_position: str = '') -> None:
532  """
533  @brief STM32N6-specific deployment function with Neural-ART NPU support.
534 
535  @details
536  This is the primary deployment function used in this project for all three
537  case studies (MoveNet Lightning, YOLOv8n-pose, TinyBERT).
538 
539  It differs from stm32ai_deploy() in several critical ways specific to
540  the STM32N6 hardware and its Neural-ART NPU:
541 
542  **Key differences from generic stm32ai_deploy():**
543 
544  1. **Neural-ART path** — the compile options include a `st_neural_art` parameter
545  pointing to the NPU Add-on configuration (`neuralart_user_path`). This enables
546  ST Edge AI Core to generate NPU-optimized code with the 4CA convolution
547  accelerator configuration:
548  @code
549  neural_art_path = profile + "@" + neuralart_user_path
550  opt = STMAiCompileOptions(st_neural_art=neural_art_path, ...)
551  @endcode
552 
553  2. **Data type and channel format** — the STM32N6 camera pipeline delivers
554  images as uint8 in channel-last (NHWC) format. These are specified explicitly:
555  - `input_data_type='uint8'`
556  - `inputs_ch_position='chlast'`
557  The NPU processes activations in channel-last format internally, matching
558  the TFLite model's native format.
559 
560  3. **Build configuration** — supports specifying a build configuration name
561  (`build_conf`, e.g., "Release") for the STM32CubeIDE project.
562 
563  4. **Generated header files** — copies both `ai_model_config.h` AND
564  `app_config.h` into the C project (the generic function only copies the former).
565 
566  5. **No weight splitting** — STM32N6 has 128 MB octoFlash, so weight overflow
567  is never a concern for the models used in this project.
568 
569  **Compilation flow for STM32N6 (offline, used in this project):**
570  @code{.sh}
571  # Equivalent CLI command executed internally by _stmaic_local_call():
572  stedgeai generate \
573  --model movenet_lightning_int8.tflite \
574  --target stm32n6 \
575  --st-neural-art default@/path/to/neuralart_options.json \
576  --input-data-type uint8 \
577  --inputs-ch-position chlast \
578  --output /output/generated/
579  @endcode
580 
581  @param target Unused legacy parameter.
582  @param stlink_serial_number ST-Link serial number (empty if single board).
583  @param stm32ai_version ST Edge AI Core version string.
584  @param c_project_path Path to the STM32CubeIDE C project root.
585  @param output_dir Output directory for deployment artifacts.
586  @param stm32ai_output Directory for ST Edge AI Core generated files.
587  @param optimization Optimization strategy: "balanced" (used in project).
588  @param path_to_stm32ai Path to the stedgeai executable.
589  @param path_to_cube_ide Path to the stm32cubeide executable.
590  @param additional_files Extra files to copy into the C project.
591  @param stmaic_conf_filename Board .conf file (e.g., "stmaic_STM32N6570-DK.conf").
592  @param verbosity Logging verbosity level.
593  @param debug Enable debug mode for STMAi driver.
594  @param model_path Path to the quantized INT8 model (.tflite or .onnx).
595  @param get_model_name_output Model name for Cloud API identification.
596  @param stm32ai_ide IDE string — must be "gcc" for STM32N6.
597  @param stm32ai_serie Series string — must be "STM32N6".
598  @param credentials Cloud credentials (unused if on_cloud=False).
599  @param on_cloud Use Developer Cloud for compilation (False in project).
600  @param check_large_model Enable pre-compilation memory analysis.
601  @param build_conf STM32CubeIDE build configuration name (e.g., "Release").
602  @param cfg Hydra DictConfig object (for activation code updates).
603  @param custom_objects Custom Keras objects for model loading.
604  @param input_data_type NPU input data type — **must be 'uint8'** for camera pipeline.
605  @param output_data_type NPU output data type — empty means auto-detect.
606  @param inputs_ch_position Input channel format — **must be 'chlast'** (NHWC for TFLite).
607  @param outputs_ch_position Output channel format — empty means auto-detect.
608 
609  @return None
610 
611  @note This function is called by deploy.py in the pose_estimation module,
612  which reads all parameters from user_config.yaml via the Hydra cfg object.
613 
614  @see deploy.py for the caller that reads parameters from user_config.yaml.
615  @see stm32ai_deploy() for the generic MCU version without NPU support.
616  """
617 
618  def _stmaic_local_call(session):
619  """
620  @brief Inner function: compiles the model for STM32N6 using local stedgeai.
621 
622  @details
623  STM32N6-specific compile sequence:
624  1. Sets the STM32_AI_EXE environment variable to the stedgeai path
625  2. Constructs the Neural-ART path string from the board config:
626  `"profile@neuralart_options_path"` — this tells stedgeai to enable
627  the Neural-ART NPU accelerator with the 4CA configuration
628  3. Creates STMAiCompileOptions with:
629  - st_neural_art: enables NPU code generation
630  - input_data_type: 'uint8' (camera pipeline delivers uint8 frames)
631  - inputs_ch_position: 'chlast' (NHWC channel-last format)
632  4. Loads the board configuration (memory pools, NPU memory banks)
633  5. Calls stmaic.compile() which invokes stedgeai internally
634 
635  @param session The STMAi session object.
636  @return None
637  """
638  os.environ["STM32_AI_EXE"] = path_to_stm32ai
639  tools = stmaic.STMAiTools()
640  session.set_tools(tools)
641  print("[INFO] : Offline CubeAI used; Selected tools: ", tools, flush=True)
642 
643  shutil.rmtree(stm32ai_output, ignore_errors=True)
644 
645 
648  neural_art_path = (session._board_config.config.profile + "@" +
649  session._board_config.config.neuralart_user_path)
650 
651 
655  opt = stmaic.STMAiCompileOptions(
656  st_neural_art=neural_art_path,
657  input_data_type=input_data_type, # 'uint8'
658  inputs_ch_position=inputs_ch_position, # 'chlast' (NHWC)
659  output_data_type=output_data_type,
660  outputs_ch_position=outputs_ch_position
661  )
662 
663 
664  board_conf = os.path.join(c_project_path, stmaic_conf_filename)
665  board = stmaic.STMAiBoardConfig(board_conf, build_conf)
666  session.set_board(board)
667 
668 
670  stmaic.compile(session=session, options=opt, target=session._board_config)
671 
672  # --- Main deployment sequence for STM32N6 ---
673 
674  os.environ["STM32_CUBE_IDE_EXE"] = path_to_cube_ide
675 
676  if debug:
677  stmaic.set_log_level('debug')
678  elif verbosity is not None:
679  stmaic.set_log_level('info')
680 
681 
682  session = stmaic.load(model_path, workspace_dir=output_dir)
683  board_conf = os.path.join(c_project_path, stmaic_conf_filename)
684  board = stmaic.STMAiBoardConfig(board_conf, build_conf)
685  session.set_board(board)
686  print("[INFO] : Selected board : ", board, flush=True)
687 
688 
689  user_files = []
690  print("[INFO] : Compiling the model and generating optimized C code + Lib/Inc files: ",
691  model_path, flush=True)
692 
693  if on_cloud:
694  login_success, ai, _ = cloud_connect(
695  stm32ai_version=stm32ai_version, credentials=credentials)
696  if login_success:
697  with open(session._board_config.config.neuralart_user_path) as file:
698  neuralart_options = json.load(file)
699 
700  neuralart_options = neuralart_options['Profiles']['default'][
701  "options"].replace('--', "--atonnOptions.")
702  ai.generate(CliParameters(
703  model=model_path, output=stm32ai_output,
704  fromModel=get_model_name_output,
705  target="stm32n6", stNeuralArt="default",
706  allocateInputs=False, allocateOutputs=False,
707  mpool=board._conf.mpool,
708  extraCommandLineArguments=neuralart_options,
709  includeLibraryForSerie=CliLibrarySerie(stm32ai_serie.upper()),
710  includeLibraryForIde=CliLibraryIde(stm32ai_ide.lower())))
711  if os.path.exists(stm32ai_output):
712  shutil.move(stm32ai_output, os.path.join(output_dir, "generated"))
713  stm32ai_output = os.path.join(output_dir, "generated")
714  if not os.listdir(stm32ai_output) or \
715  'Lib' not in os.listdir(stm32ai_output) or 'Inc' not in os.listdir(stm32ai_output):
716  _stmaic_local_call(session)
717  else:
718  _stmaic_local_call(session)
719  else:
720 
721  _stmaic_local_call(session)
722 
723  print("[INFO] : Optimized C code + Lib/Inc files generation done.")
724 
725 
729  print("[INFO] : Building the STM32 c-project..", flush=True)
730  user_files.extend([os.path.join(output_dir, "C_header/app_config.h")])
731  user_files.extend([os.path.join(output_dir, "C_header/ai_model_config.h")])
732  if additional_files:
733  for f in additional_files:
734  user_files.extend([os.path.join(output_dir, f)])
735 
736  stmaic.build(session, user_files=user_files, serial_number=stlink_serial_number)
737 
738 
739 def stm32ai_deploy_mpu(target: str = None,
740  board_ip_address: str = None,
741  board_deploy: str = None,
742  class_names: List = None,
743  c_project_path: str = None,
744  verbosity: int = None,
745  debug: bool = False,
746  model_path: str = None,
747  cfg=None) -> bool:
748  """
749  @brief Deploy an AI model to an STM32MP MPU board over SSH/SCP.
750 
751  @details
752  This function handles deployment on STM32MP-series Microprocessor Units (MPUs),
753  which run Linux and use a fundamentally different deployment mechanism than MCUs:
754  instead of flashing firmware via ST-Link, it transfers application files over the
755  network using SSH and SCP.
756 
757  **Deployment mechanism:**
758  Unlike MCU deployment (which replaces the entire firmware binary), MPU deployment:
759  1. Verifies board reachability via ICMP ping
760  2. Creates the deployment directory on the target via SSH
761  3. Copies application code, resources, and the model file via SCP
762  4. Copies board-specific shell scripts (STM32MP1/*.sh or STM32MP2/*.sh)
763  5. Launches the application remotely via SSH
764 
765  **Supported boards:**
766  - STM32MP257F-EV1 (STM32MP2 series)
767  - STM32MP157F-DK2 (STM32MP1 series)
768  - STM32MP135F-DK (STM32MP1 series)
769 
770  **File transfer structure:**
771  @code
772  c_project_path/
773  ├── Application/ → Copied to board_deploy/Application/
774  │ └── launch_*.sh → Main launch script
775  ├── Resources/ → Copied to board_deploy/Resources/
776  │ └── class_names.txt → Generated from class_names parameter
777  └── STM32MP1/*.sh → Board-specific scripts (MP1) or STM32MP2/*.sh (MP2)
778  @endcode
779 
780  @param target Unused legacy parameter.
781  @param board_ip_address IP address of the target MPU board (e.g., "192.168.1.100").
782  The board must be on the same network as the host PC.
783  @param board_deploy Deployment directory path on the target board's filesystem.
784  @param class_names List of class name strings OR path to a .txt file containing
785  class names (one per line). Used for inference labeling.
786  @param c_project_path Path to the C project containing Application/ and Resources/.
787  @param verbosity Logging verbosity level.
788  @param debug Enable debug logging.
789  @param model_path Path to the AI model file to deploy (.tflite, .onnx, or .nb).
790  @param cfg Hydra DictConfig object (currently unused in MPU path).
791 
792  @return True if deployment succeeded, False on any error.
793 
794  @throws None — errors are caught and logged, returning False instead.
795 
796  @note SSH host key checking is disabled (`StrictHostKeyChecking no`) for
797  convenience in lab/development environments. Do not use in production.
798 
799  @note This function is **not used in this project** (which targets the STM32N6
800  MCU, not an MPU). It is documented here for completeness.
801  """
802 
803  # Step 1: Validate that a board IP address was provided
804  if board_ip_address is None:
805  print("[FAIL] : Board IP address is missing, unable to deploy on target.")
806  return False
807 
808  # Step 2: Verify board reachability via ping
809  count = 5
810  timeout = 100
811  subprocess_timeout = 5
812  count_params = '-n' if platform.system().lower() == 'windows' else '-c'
813  timeout_params = '-w' if platform.system().lower() == 'windows' else '-W'
814 
815  cmd = ['ping', count_params, str(count), timeout_params, str(timeout), board_ip_address]
816  try:
817  res = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
818  timeout=5, text=True)
819  if res.returncode == 0:
820  print(f"[INFO] : Board is reachable at {board_ip_address} address")
821  else:
822  print(f"[FAIL] : Board is not reachable at {board_ip_address} address")
823  return False
824  except subprocess.TimeoutExpired:
825  print(f"[FAIL] : Board is not reachable, ping timed out after {subprocess_timeout}s.")
826  return False
827  except Exception as e:
828  print(f"[FAIL] : Verification of the IP failed : {e}.")
829  return False
830 
831  # Step 3: Create deployment directory on target via SSH
832  command = "mkdir -p " + board_deploy
833  ssh = subprocess.run(
834  "ssh -o \"StrictHostKeyChecking no\" root@" + board_ip_address + " \"" + command + "\"",
835  shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=300)
836  if ssh.returncode != 0:
837  print(f"[FAIL] : Deploy directory creation failed, code: {ssh.returncode}")
838  return False
839 
840  # Step 4: Generate class names file on disk
841  path_to_application = c_project_path + "Application/"
842  path_to_resources = c_project_path + "Resources/"
843  label_file = os.path.join(path_to_resources, 'class_names.txt')
844 
845  if isinstance(class_names, list) and all(isinstance(name, str) for name in class_names):
846  with open(label_file, 'w') as file:
847  for class_name in class_names:
848  file.write(class_name + '\n')
849  elif isinstance(class_names, str) and class_names.endswith('.txt'):
850  shutil.copy(class_names, label_file)
851 
852  # Step 5: Transfer application code and model to the board via SCP
853  command = ("scp -r " + path_to_application + " " + path_to_resources + " " +
854  model_path + " root@" + board_ip_address + ":" + board_deploy)
855  deploy_res = subprocess.run(command, shell=True, stdout=subprocess.PIPE,
856  stderr=subprocess.PIPE, timeout=300)
857  if deploy_res.returncode == 0:
858  print(f"[INFO] : Application code successfully installed on target")
859  else:
860  print(f"[FAIL] : Application code deployment failed : {deploy_res.stderr} ")
861  return False
862 
863  # Step 6: Transfer board-specific shell scripts
864  if "STM32MP2" in target:
865  path_to_target_resources = c_project_path + "/STM32MP2/*.sh"
866  else:
867  path_to_target_resources = c_project_path + "/STM32MP1/*.sh"
868 
869  command = ("scp -r -p " + path_to_target_resources + " root@" +
870  board_ip_address + ":" + board_deploy + "/Resources")
871  deploy_spe_res = subprocess.run(command, shell=True, stdout=subprocess.PIPE,
872  stderr=subprocess.PIPE, timeout=300)
873  if deploy_spe_res.returncode != 0:
874  print(f"[FAIL] : Application code deployment failed : {deploy_spe_res.stderr} ")
875  return False
876 
877  # Step 7: Find and launch the application script on the board
878  script_extension = ".sh"
879  file_names = []
880  for item in os.listdir(path_to_application):
881  if Path(item).suffix == script_extension:
882  file_names.append(os.path.basename(item))
883 
884  launch_script = None
885  for file_name in file_names:
886  if "launch_" in file_name:
887  launch_script = file_name
888  if launch_script is None:
889  print("[FAIL] : No launch_*.sh script found in Application/.")
890  return False
889  command = (board_deploy + "/Application/" + launch_script + " " +
890  board_deploy + " " + os.path.basename(model_path) + " " +
891  os.path.basename(label_file))
892  print(f"[INFO] : To launch application directly on the target please run : {command}")
893  command = ("ssh -o \"StrictHostKeyChecking no\" root@" + board_ip_address +
894  " \"" + command + "\"")
895  print(f"[INFO] : To launch application from your host computer please run : {command}")
bool stm32ai_deploy_mpu(str target=None, str board_ip_address=None, str board_deploy=None, List class_names=None, str c_project_path=None, int verbosity=None, bool debug=False, str model_path=None, cfg=None)
None _keep_internal_weights(str path_network_data_params)
None _dispatch_weights(str internalFlashSizeFlash_KB, str kernelFlash_KB, str applicationSizeFlash_KB, str path_network_c_info, str path_network_data_params)
None stm32ai_deploy(bool target=False, str stlink_serial_number=None, str stm32ai_version=None, str c_project_path=None, str output_dir=None, str stm32ai_output=None, str optimization=None, str path_to_stm32ai=None, str path_to_cube_ide=None, list additional_files=None, str stmaic_conf_filename='stmaic_c_project.conf', int verbosity=None, bool debug=False, str model_path=None, str get_model_name_output=None, str stm32ai_ide=None, str stm32ai_serie=None, list credentials=None, bool on_cloud=False, bool check_large_model=False, cfg=None, Dict custom_objects=None)
None stm32ai_deploy_stm32n6(bool target=False, str stlink_serial_number=None, str stm32ai_version=None, str c_project_path=None, str output_dir=None, str stm32ai_output=None, str optimization=None, str path_to_stm32ai=None, str path_to_cube_ide=None, list additional_files=None, str stmaic_conf_filename='stmaic_c_project.conf', int verbosity=None, bool debug=False, str model_path=None, str get_model_name_output=None, str stm32ai_ide=None, str stm32ai_serie=None, list credentials=None, bool on_cloud=False, bool check_large_model=False, str build_conf=None, cfg=None, Dict custom_objects=None, str input_data_type='', str output_data_type='', str inputs_ch_position='', str outputs_ch_position='')
None update_activation_c_code(str c_project_path, str path_network_c_info, int available_AXIRAM, cfg=None, Dict custom_objects=None)