STM32N6 NPU Deployment — Politecnico di Milano  1.0
Documentation for Neural Network Deployment on STM32N6 NPU - Politecnico di Milano 2024-2025
data_loader.py
Go to the documentation of this file.
1 # /*---------------------------------------------------------------------------------------------
2 # * Copyright (c) 2024 STMicroelectronics.
3 # * All rights reserved.
4 # *
5 # * This software is licensed under terms that can be found in the LICENSE file in
6 # * the root directory of this software component.
7 # * If no LICENSE file comes with this software, it is provided AS-IS.
8 # *--------------------------------------------------------------------------------------------*/
9 
10 
19 Authors: Giacomo Colosio, Sebastiano Colosio, Patrizio Acquadro, Tito Nicola Drugman
20 #
21 # @copyright Copyright (c) 2023-2024 STMicroelectronics. All rights reserved.
22 
23 
24 import os
25 import string
26 import pickle
27 import scipy.io
28 import numpy as np
29 import tensorflow as tf
30 from typing import Tuple, List
31 
32 def _parse_labels(label_path : str):
33  """
34  Parsing of the labels files
35  Args:
36  label_path (str): Path of the label file.
37 
38  Returns:
39  ground_truths (np.array) : shape (ground_truths, 5+3*keypoints) ground truths present in the label file
40  """
41  if label_path == "None":
42  ground_truths = np.array([17*3*[0.]],np.float32)
43  else:
44  file = open(label_path)
45  txt = file.read().split("\n")
46  txt = [x.split(" ") for x in txt]
47  if len(txt[-1])==1:txt=txt[:-1]
48  ground_truths = np.array([[float(j) for j in i] for i in txt],np.float32)
49  return ground_truths
50 
51 def _normalize_labels(label, n : int, l : int):
52  """
53  Normalization of the labels -> same shape for every label regarding the number of ground truths
54  Args:
55  label (np.array): shape (ground_truths, 5+3*keypoints) ground truths present in the label file
56  l (int): shape (1, ) maximum number of ground truths present in a label file
57  n (int): shape (1, ) current number of ground truths present in this label file
58 
59  Returns:
60  normalized_label (np.array) : shape (l, 5+3*keypoints) label with normalized shape
61  """
62  m = n - len(label)
63  miss = np.zeros((m,l)) #-np.ones((m,l)) # create missing labels with 0 values
64  normalized_label = np.concatenate([label,miss])
65  return normalized_label
66 
67 def _get_path_dataset(path : str,
68  seed : int,
69  shuffle : bool = True) -> tf.data.Dataset:
70  """
71  Creates a tf.data.Dataset from a dataset root directory path.
72  The dataset has the following directory structure (checked in parse_config.py):
73  dataset_root_dir:
74  image_1.jpg
75  image_1.txt
76  ...
77  image_2.jpg
78  image_2.txt
79 
80  Args:
81  path (str): Path of the dataset folder.
82  seed (int): seed when performing shuffle.
83  shuffle (bool): Shuffle the dataset.
84 
85  Returns:
86  dataset(tf.data.Dataset) -> dataset with a tuple (path, label) of each sample.
87  """
88 
89  paths_imgs = sorted([os.path.join(path,file) for file in os.listdir(path) if file.endswith(".jpg")])
90  paths_labels = sorted([os.path.join(path,file) for file in os.listdir(path) if file.endswith(".txt")])
91 
92  if len(paths_imgs) != len(paths_labels):
93  paths_labels = ["None"]*len(paths_imgs)
94 
95  paths_labels = list(map(_parse_labels,paths_labels))
96 
97  len_max = max([len(p) for p in paths_labels])
98  len_label = len(paths_labels[0][0])
99 
100  # print("[INFO] : The dataset contains a maximum of ",len_max," poses per image")
101 
102  paths_labels = list(map(lambda x : _normalize_labels(x,n=len_max,l=len_label), paths_labels))
103 
104  data_list = list(zip(paths_imgs,paths_labels))
105 
106  if shuffle:
107  rng = np.random.RandomState(seed)
108  rng.shuffle(data_list)
109 
110  imgs, labels = zip(*data_list)
111 
112  dataset = tf.data.Dataset.from_tensor_slices((list(imgs), list(labels)))
113 
114  return dataset
115 
116 def _get_padded_labels(data,r,R,height,width):
117 
118  sh = tf.shape(data)
119 
120  padded_boxes = data[:,1:5] # shape : (P,4)
121 
122  x1 = padded_boxes[:,0] - padded_boxes[:,2]/2 # shape : (P)
123  y1 = padded_boxes[:,1] - padded_boxes[:,3]/2 # shape : (P)
124  x2 = padded_boxes[:,0] + padded_boxes[:,2]/2 # shape : (P)
125  y2 = padded_boxes[:,1] + padded_boxes[:,3]/2 # shape : (P)
126 
127  xboxes = tf.cast(tf.stack([x1,x2]),tf.float32) # shape : (2,P)
128  yboxes = tf.cast(tf.stack([y1,y2]),tf.float32) # shape : (2,P)
129 
130  padded_keypoints = data[:,5:] # shape : (P,nbr_keypoints*3)
131  padded_keypoints = tf.cast(tf.transpose(tf.reshape(padded_keypoints,[sh[0],-1,3]),[2,0,1]),tf.float32) # shape : (3,P,nbr_keypoints)
132 
133  if r>R :
134  ax = tf.cast(width,tf.float32)
135  ra = tf.cast(R/r,tf.float32)
136  kp = tf.cast(1,tf.float32)
137 
138  else:
139  ax = tf.cast(height,tf.float32)
140  ra = tf.cast(r/R,tf.float32)
141  kp = tf.cast(0,tf.float32)
142 
143  nb_px_added = tf.cast(1 - ax*ra,tf.float32) # the number of pixels added to the original image to form the new aspect ratio
144  odd = tf.cast(nb_px_added%2,tf.float32) # 1 : odd | 0 : even -> to know if the number of pixels added to the original image is odd or even
145  vectorxy = (tf.cast(ra,tf.float32) * (xboxes*(1-kp)+yboxes*kp - 0.5) + 0.5 ) - odd*0.5/(ax-1) # shape : (2,P)
146  vector = (tf.cast(ra,tf.float32) * (padded_keypoints[0]*(1-kp) + padded_keypoints[1]*kp - 0.5) + 0.5 ) - odd*0.5/(ax-1) # shape : (P,nbr_keypoints)
147 
148  pk0 = tf.cast(kp,tf.float32)*padded_keypoints[0] + (1-tf.cast(kp,tf.float32))*vector
149  pk1 = (1-tf.cast(kp,tf.float32))*padded_keypoints[1] + tf.cast(kp,tf.float32)*vector
150 
151  xboxes = (1-tf.cast(kp,tf.float32))*vectorxy + tf.cast(kp,tf.float32)*xboxes # shape : (2,P)
152  yboxes = tf.cast(kp,tf.float32)*vectorxy + (1-tf.cast(kp,tf.float32))*yboxes # shape : (2,P)
153 
154  x = (xboxes[0] + xboxes[1]) / 2 # shape : (P)
155  y = (yboxes[0] + yboxes[1]) / 2 # shape : (P)
156  w = xboxes[1] - xboxes[0] # shape : (P)
157  h = yboxes[1] - yboxes[0] # shape : (P)
158 
159  padded_boxes = tf.stack([x,y,w,h],1) # shape : (P,4)
160 
161  padded_keypoints = tf.stack([pk0,pk1,padded_keypoints[2]]) # shape : (3,P,nbr_keypoints)
162  padded_keypoints = tf.reshape(tf.transpose(padded_keypoints,[1,2,0]),[sh[0],-1]) # shape : (P,nbr_keypoints*3)
163 
164  data = tf.cast(data,tf.float32)
165 
166  padded_labels = tf.concat([data[:,:1],padded_boxes,padded_keypoints],-1) # shape : (P,1+4+nbr_keypoints*3)
167  padded_labels = tf.cast(padded_labels,tf.float32)
168 
169  return padded_labels
170 
171 def _preprocess_function(data_x : tf.Tensor,
172  data_y : tf.Tensor,
173  image_size: tuple[int],
174  interpolation: str,
175  aspect_ratio: str,
176  color_mode: str,
177  nbr_keypoints: int) -> tuple[tf.Tensor, tf.Tensor]:
178  """
179  Load images from path and apply necessary transformations.
180  """
181  height, width = image_size
182  channels = 1 if color_mode == "grayscale" else 3
183 
184  image = tf.io.read_file(data_x)
185  image = tf.image.decode_image(image, channels=channels, expand_animations=False)
186 
187  s = tf.shape(image)
188  r = s[0]/s[1]
189  R = height/width
190 
191  if aspect_ratio == "fit":
192  image = tf.image.resize(image, [height, width], method=interpolation, preserve_aspect_ratio=False)
193  data_y = tf.cast(data_y,tf.float32)
194  elif aspect_ratio == "padding":
195  image = tf.image.resize_with_pad(image, height, width)
196  data_y = _get_padded_labels(data_y,r,R,height,width)
197  else:
198  raise ValueError("In config file, at section preprocessing.aspect_ratio choose 'fit' or 'padding'")
199 
200  return image, data_y
201 
202 
203 def _get_train_val_ds(training_path: str,
204  image_size: tuple[int] = None,
205  nbr_keypoints: int = None,
206  interpolation: str = None,
207  aspect_ratio: str = None,
208  color_mode: str = None,
209  validation_split: float = None,
210  batch_size: int = None,
211  seed: int = None,
212  shuffle: bool = True,
213  to_cache: bool = False) -> Tuple[tf.data.Dataset, tf.data.Dataset]:
214  """
215  Loads the images under a given dataset root directory and returns training
216  and validation tf.Data.datasets.
217  The dataset has the following directory structure (checked in parse_config.py):
218  dataset_root_dir:
219  image_1.jpg
220  image_1.txt
221  ...
222  image_2.jpg
223  image_2.txt
224 
225  Args:
226  training_path (str): Path to the directory containing the training images.
227  image_size (tuple[int]): Size of the input images to resize them to.
228  nbr_keypoints (int): number of keypoints for a person
229  interpolation (float): Interpolation method to use when resizing the images.
230  aspect_ratio (bool): Whether or not to crop the images to the specified aspect ratio.
231  color_mode (str): Color mode to use for the images.
232  validation_split (float): Fraction of the data to use for validation.
233  batch_size (int): Batch size to use for training and validation.
234  seed (int): Seed to use for shuffling the data.
235  shuffle (bool): Whether or not to shuffle the data.
236  to_cache (bool): Whether or not to cache the datasets.
237 
238  Returns:
239  Tuple[tf.data.Dataset, tf.data.Dataset]: Training and validation datasets.
240  """
241  # When calling this function using the config file data, some of the arguments
242  # may be used but equal to None (happens when an attribute is missing in the
243  # config file or has no value). For this reason, all the arguments in the
244  # definition of the function defaults to None and we set default values here
245  # in case the function is called in another context with missing arguments.
246 
247  interpolation = interpolation if interpolation else "bilinear"
248  aspect_ratio = aspect_ratio if aspect_ratio else "fit"
249  color_mode = color_mode if color_mode else "rgb"
250  validation_split = validation_split if validation_split else 0.2
251  batch_size = batch_size if batch_size else 32
252 
253  preprocess_params = (image_size,
254  interpolation,
255  aspect_ratio,
256  color_mode,
257  nbr_keypoints)
258 
259  dataset = _get_path_dataset(training_path, seed=seed)
260 
261  train_size = int(len(dataset)*(1-validation_split))
262  train_ds = dataset.take(train_size)
263  val_ds = dataset.skip(train_size)
264 
265  if shuffle:
266  train_ds = train_ds.shuffle(len(train_ds), reshuffle_each_iteration=True, seed=seed)
267 
268  train_ds = train_ds.map(lambda *data : _preprocess_function(*data,*preprocess_params))
269  val_ds = val_ds.map(lambda *data : _preprocess_function(*data,*preprocess_params))
270 
271  train_ds = train_ds.batch(batch_size, drop_remainder=True)
272  val_ds = val_ds.batch(batch_size, drop_remainder=True)
273 
274  if to_cache:
275  train_ds = train_ds.cache()
276  val_ds = val_ds.cache()
277 
278  train_ds = train_ds.prefetch(buffer_size=tf.data.AUTOTUNE)
279  val_ds = val_ds.prefetch(buffer_size=tf.data.AUTOTUNE)
280 
281  return train_ds, val_ds
282 
283 
284 def _get_ds(data_path: str = None,
285  image_size: tuple[int] = None,
286  nbr_keypoints: int = None,
287  interpolation: str = None,
288  aspect_ratio: str = None,
289  color_mode: str = None,
290  batch_size: int = None,
291  seed: int = None,
292  shuffle: bool = False,
293  to_cache: bool = False) -> tf.data.Dataset:
294  """
295  Loads the images from the given dataset root directory and returns a tf.data.Dataset.
296  The dataset has the following directory structure (checked in parse_config.py):
297  dataset_root_dir:
298  image_1.jpg
299  image_1.txt
300  ...
301  image_2.jpg
302  image_2.txt
303 
304  Args:
305  data_path (str): Path to the directory containing the images.
306  image_size (tuple[int]): Size of the input images to resize them to.
307  nbr_keypoints (int): number of keypoints for a person
308  interpolation (str): Interpolation method to use when resizing the images.
309  aspect_ratio (bool): Whether or not to crop the images to the specified aspect ratio.
310  color_mode (str): Color mode to use for the images.
311  batch_size (int): Batch size to use for the dataset.
312  seed (int): Seed to use for shuffling the data.
313  shuffle (bool): Whether or not to shuffle the data.
314  to_cache (bool): Whether or not to cache the dataset.
315 
316  Returns:
317  tf.data.Dataset: Dataset containing the images.
318  """
319  # When calling this function using the config file data, some of the arguments
320  # may be used but equal to None (happens when an attribute is missing in the
321  # config file or has no value). For this reason, all the arguments in the
322  # definition of the function defaults to None and we set default values here
323  # in case the function is called in another context with missing arguments.
324 
325  interpolation = interpolation if interpolation else "bilinear"
326  aspect_ratio = aspect_ratio if aspect_ratio else "fit"
327  color_mode = color_mode if color_mode else "rgb"
328  batch_size = batch_size if batch_size else 32
329 
330  preprocess_params = (image_size,
331  interpolation,
332  aspect_ratio,
333  color_mode,
334  nbr_keypoints)
335 
336  dataset = _get_path_dataset(data_path, seed=seed)
337 
338  if shuffle:
339  dataset = dataset.shuffle(len(dataset), reshuffle_each_iteration=True, seed=seed)
340 
341  dataset = dataset.map(lambda *data : _preprocess_function(*data, *preprocess_params))
342  dataset = dataset.batch(batch_size, drop_remainder=True)
343 
344  if to_cache:
345  dataset = dataset.cache()
346 
347  dataset = dataset.prefetch(buffer_size=tf.data.AUTOTUNE)
348 
349  return dataset
350 
351 
352 def load_dataset(dataset_name: str = None,
353  training_path: str = None,
354  validation_path: str = None,
355  quantization_path: str = None,
356  test_path: str = None,
357  validation_split: float = None,
358  nbr_keypoints: int = None,
359  image_size: tuple[int] = None,
360  interpolation: str = None,
361  aspect_ratio: str = None,
362  color_mode: str = None,
363  batch_size: int = None,
364  seed: int = None) -> Tuple[tf.data.Dataset, tf.data.Dataset, tf.data.Dataset]:
365  """
366  Loads the images from the given dataset root directories and returns training,
367  validation, and test tf.data.Datasets.
368  The datasets have the following directory structure (checked in parse_config.py):
369  dataset_root_dir:
370  image_1.jpg
371  image_1.txt
372  ...
373  image_2.jpg
374  image_2.txt
375 
376  Args:
377  dataset_name (str): Name of the dataset to load.
378  training_path (str): Path to the directory containing the training images.
379  validation_path (str): Path to the directory containing the validation images.
380  quantization_path (str): Path to the directory containing the quantization images.
381  test_path (str): Path to the directory containing the test images.
382  validation_split (float): Fraction of the data to use for validation.
383  nbr_keypoints (int): number of keypoints for a person
384  image_size (tuple[int]): resizing (width, height) of input images
385  interpolation (str): Interpolation method to use when resizing the images.
386  aspect_ratio (bool): Whether or not to crop the images to the specified aspect ratio.
387  color_mode (str): Color mode to use for the images.
388  batch_size (int): Batch size to use for the datasets.
389  seed (int): Seed to use for shuffling the data.
390 
391  Returns:
392  Tuple[tf.data.Dataset, tf.data.Dataset, tf.data.Dataset]: Training, validation, and test datasets.
393  """
394 
395  if training_path and not validation_path:
396  # There is no validation. We split the
397  # training set in two to create one.
398  train_ds, val_ds = _get_train_val_ds(
399  training_path,
400  nbr_keypoints=nbr_keypoints,
401  image_size=image_size,
402  interpolation=interpolation,
403  aspect_ratio=aspect_ratio,
404  color_mode=color_mode,
405  validation_split=validation_split,
406  batch_size=batch_size,
407  seed=seed)
408  elif training_path and validation_path:
409  train_ds = _get_ds(
410  training_path,
411  nbr_keypoints=nbr_keypoints,
412  image_size=image_size,
413  interpolation=interpolation,
414  aspect_ratio=aspect_ratio,
415  color_mode=color_mode,
416  batch_size=batch_size,
417  seed=seed,
418  shuffle=True)
419 
420  val_ds = _get_ds(
421  validation_path,
422  nbr_keypoints=nbr_keypoints,
423  image_size=image_size,
424  interpolation=interpolation,
425  aspect_ratio=aspect_ratio,
426  color_mode=color_mode,
427  batch_size=batch_size,
428  seed=seed)
429  elif not training_path and validation_path:
430  train_ds = None
431  val_ds = _get_ds(
432  validation_path,
433  nbr_keypoints=nbr_keypoints,
434  image_size=image_size,
435  interpolation=interpolation,
436  aspect_ratio=aspect_ratio,
437  color_mode=color_mode,
438  batch_size=batch_size,
439  seed=seed)
440  else:
441  train_ds = None
442  val_ds = None
443 
444  if quantization_path:
445  quantization_ds = _get_ds(
446  quantization_path,
447  nbr_keypoints=nbr_keypoints,
448  image_size=image_size,
449  interpolation=interpolation,
450  aspect_ratio=aspect_ratio,
451  color_mode=color_mode,
452  batch_size=batch_size,
453  seed=seed)
454  else:
455  quantization_ds = None
456 
457  if test_path:
458  test_ds = _get_ds(
459  test_path,
460  nbr_keypoints=nbr_keypoints,
461  image_size=image_size,
462  interpolation=interpolation,
463  aspect_ratio=aspect_ratio,
464  color_mode=color_mode,
465  batch_size=batch_size,
466  seed=seed)
467  else:
468  test_ds = None
469 
470  return train_ds, val_ds, quantization_ds, test_ds
Tuple[tf.data.Dataset, tf.data.Dataset, tf.data.Dataset] load_dataset(str dataset_name=None, str training_path=None, str validation_path=None, str quantization_path=None, str test_path=None, float validation_split=None, int nbr_keypoints=None, tuple[int] image_size=None, str interpolation=None, str aspect_ratio=None, str color_mode=None, int batch_size=None, int seed=None)
Definition: data_loader.py:364
def _get_padded_labels(data, r, R, height, width)
Definition: data_loader.py:116
Tuple[tf.data.Dataset, tf.data.Dataset] _get_train_val_ds(str training_path, tuple[int] image_size=None, int nbr_keypoints=None, str interpolation=None, str aspect_ratio=None, str color_mode=None, float validation_split=None, int batch_size=None, int seed=None, bool shuffle=True, bool to_cache=False)
Definition: data_loader.py:213
tf.data.Dataset _get_ds(str data_path=None, tuple[int] image_size=None, int nbr_keypoints=None, str interpolation=None, str aspect_ratio=None, str color_mode=None, int batch_size=None, int seed=None, bool shuffle=False, bool to_cache=False)
Definition: data_loader.py:293
def _parse_labels(str label_path)
Definition: data_loader.py:32
def _normalize_labels(label, int n, int l)
Definition: data_loader.py:51
tf.data.Dataset _get_path_dataset(str path, int seed, bool shuffle=True)
Definition: data_loader.py:69
tuple[tf.Tensor, tf.Tensor] _preprocess_function(tf.Tensor data_x, tf.Tensor data_y, tuple[int] image_size, str interpolation, str aspect_ratio, str color_mode, int nbr_keypoints)
Definition: data_loader.py:177