TinyML Edge Computing in Practice: Running AI Models on Microcontrollers
TinyML enables machine learning models to run on extremely resource-constrained microcontrollers, ushering in a new era of edge AI. This article takes you from zero to building TinyML applications step by step.
What Is TinyML?
Core Concepts
TinyML = Tiny (Miniature) + ML (Machine Learning)
Characteristics:
- Model size < 100KB
- RAM usage < 100KB
- Ultra-low power < 1mW
- Fast inference < 100ms
- Fully offline operation
Typical Hardware Platforms:
- Arduino Nano 33 BLE Sense
- ESP32
- STM32
- Raspberry Pi Pico
- Nordic nRF52840
Application Scenarios
- Voice wake word detection
- Gesture recognition control
- Anomalous sound detection
- Predictive maintenance
- Human activity recognition
- Simple object detection
Development Environment Setup
TensorFlow Lite Micro Installation
# Arduino IDE method
# 1. Install the Arduino_TensorFlowLite library
# Tools → Manage Libraries → Search for "Arduino_TensorFlowLite"
# PlatformIO method
# platformio.ini
[env:nano33ble]
platform = nordicnrf52
board = nano33ble
framework = arduino
lib_deps =
https://github.com/tensorflow/tflite-micro-arduino-examples
Python Training Environment
# Create virtual environment
python -m venv tinyml_env
source tinyml_env/bin/activate # Linux/Mac
# tinyml_env\Scripts\activate # Windows
# Install packages
pip install tensorflow
pip install numpy
pip install matplotlib
pip install jupyter
Project 1: Voice Wake Word Detection
Model Training
# train_wake_word.py
import tensorflow as tf
from tensorflow import keras
import numpy as np
# Build a simple CNN model for audio classification
def create_model(input_shape, num_classes=4):
"""
Classes: 'yes', 'no', 'unknown', 'silence'
"""
model = keras.Sequential([
# Input layer
keras.layers.Input(shape=input_shape),
# CNN layers
keras.layers.Conv2D(8, (3,3), activation='relu', padding='same'),
keras.layers.MaxPooling2D((2,2)),
keras.layers.Dropout(0.25),
keras.layers.Conv2D(16, (3,3), activation='relu', padding='same'),
keras.layers.MaxPooling2D((2,2)),
keras.layers.Dropout(0.25),
# Fully connected layers
keras.layers.Flatten(),
keras.layers.Dense(32, activation='relu'),
keras.layers.Dropout(0.5),
keras.layers.Dense(num_classes, activation='softmax')
])
model.compile(
optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy']
)
return model
# Train the model
model = create_model(input_shape=(49, 40, 1), num_classes=4)
# Assuming you already have training data
# X_train, y_train, X_val, y_val
history = model.fit(
X_train, y_train,
epochs=30,
batch_size=32,
validation_data=(X_val, y_val),
callbacks=[
keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)
]
)
# Save model
model.save('wake_word_model.h5')
print(f"Model size: {model.count_params()} parameters")
Converting to TensorFlow Lite
# convert_to_tflite.py
import tensorflow as tf
# Load the trained model
model = tf.keras.models.load_model('wake_word_model.h5')
# Convert to TFLite
converter = tf.lite.TFLiteConverter.from_keras_model(model)
# Quantization optimization (significantly reduces model size)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
# Convert
tflite_model = converter.convert()
# Save TFLite model
with open('wake_word_model.tflite', 'wb') as f:
f.write(tflite_model)
print(f"TFLite model size: {len(tflite_model) / 1024:.2f} KB")
# Convert to C array (for microcontrollers)
def convert_to_c_array(tflite_model):
hex_array = [f'0x{b:02x}' for b in tflite_model]
c_code = f"""
// wake_word_model.h
#ifndef WAKE_WORD_MODEL_H
#define WAKE_WORD_MODEL_H
const unsigned char wake_word_model[] = {{
{', '.join(hex_array)}
}};
const unsigned int wake_word_model_len = {len(tflite_model)};
#endif
"""
with open('wake_word_model.h', 'w') as f:
f.write(c_code)
convert_to_c_array(tflite_model)
print("C header file generated: wake_word_model.h")
Arduino Inference Code
// wake_word_detection.ino
#include <TensorFlowLite.h>
#include <tensorflow/lite/micro/all_ops_resolver.h>
#include <tensorflow/lite/micro/micro_interpreter.h>
#include <tensorflow/lite/schema/schema_generated.h>
#include "wake_word_model.h"
// Audio processing
#include <PDM.h>
// TensorFlow Lite global variables
namespace {
const tflite::Model* model = nullptr;
tflite::MicroInterpreter* interpreter = nullptr;
TfLiteTensor* input = nullptr;
TfLiteTensor* output = nullptr;
// Memory allocation (adjust size to fit model requirements)
constexpr int kTensorArenaSize = 10 * 1024;
uint8_t tensor_arena[kTensorArenaSize];
}
// Classification labels
const char* labels[] = {"yes", "no", "unknown", "silence"};
const int num_labels = 4;
void setup() {
Serial.begin(115200);
while (!Serial);
// Initialize PDM microphone
PDM.onReceive(onPDMdata);
PDM.begin(1, 16000); // 1 channel, 16kHz
// Load model
model = tflite::GetModel(wake_word_model);
if (model->version() != TFLITE_SCHEMA_VERSION) {
Serial.println("Model version mismatch!");
return;
}
// Set up ops resolver
static tflite::AllOpsResolver resolver;
// Create interpreter
static tflite::MicroInterpreter static_interpreter(
model, resolver, tensor_arena, kTensorArenaSize
);
interpreter = &static_interpreter;
// Allocate memory
TfLiteStatus allocate_status = interpreter->AllocateTensors();
if (allocate_status != kTfLiteOk) {
Serial.println("Memory allocation failed!");
return;
}
// Get input/output tensors
input = interpreter->input(0);
output = interpreter->output(0);
Serial.println("TinyML initialized successfully");
Serial.printf("Input shape: [%d, %d, %d]\n",
input->dims->data[1],
input->dims->data[2],
input->dims->data[3]);
}
// Audio data buffer
constexpr int kAudioSampleSize = 16000; // 1 second @ 16kHz
int16_t audio_buffer[kAudioSampleSize];
volatile int audio_idx = 0;
void onPDMdata() {
int bytesAvailable = PDM.available();
PDM.read(audio_buffer + audio_idx, bytesAvailable);
audio_idx += bytesAvailable / 2;
}
void loop() {
// Wait until 1 second of audio is collected
if (audio_idx >= kAudioSampleSize) {
// Audio preprocessing (convert to spectrogram)
preprocessAudio(audio_buffer, input->data.f);
// Run inference
TfLiteStatus invoke_status = interpreter->Invoke();
if (invoke_status != kTfLiteOk) {
Serial.println("Inference failed!");
return;
}
// Parse output
int max_idx = 0;
float max_score = output->data.f[0];
Serial.println("\nPrediction results:");
for (int i = 0; i < num_labels; i++) {
float score = output->data.f[i];
Serial.printf(" %s: %.2f%%\n", labels[i], score * 100);
if (score > max_score) {
max_score = score;
max_idx = i;
}
}
// Determine if wake word was detected
if (max_score > 0.8 && max_idx < 2) { // "yes" or "no"
Serial.printf("\nDetected: %s (confidence: %.2f%%)\n",
labels[max_idx], max_score * 100);
// Trigger action
triggerAction(labels[max_idx]);
}
// Reset buffer
audio_idx = 0;
}
}
void preprocessAudio(int16_t* audio, float* input_tensor) {
// 1. Normalize
for (int i = 0; i < kAudioSampleSize; i++) {
audio[i] = audio[i] / 32768.0f;
}
// 2. Compute MFCC or spectrogram
// (Simplified version; real applications require FFT and MFCC conversion)
// Assuming input_tensor is already in the correct format
}
void triggerAction(const char* command) {
if (strcmp(command, "yes") == 0) {
// Execute "yes" command
digitalWrite(LED_BUILTIN, HIGH);
} else if (strcmp(command, "no") == 0) {
// Execute "no" command
digitalWrite(LED_BUILTIN, LOW);
}
}
Project 2: Gesture Recognition
IMU Data Collection
// gesture_data_collection.ino
#include <Arduino_LSM9DS1.h>
const int SAMPLES_PER_GESTURE = 119;
const int NUM_GESTURES = 4;
// Gesture labels
const char* gestures[] = {"punch", "flex", "wave", "idle"};
void setup() {
Serial.begin(115200);
while (!Serial);
if (!IMU.begin()) {
Serial.println("IMU initialization failed!");
while (1);
}
Serial.println("Ready to collect gesture data");
Serial.println("Format: ax,ay,az,gx,gy,gz,label");
}
void loop() {
float ax, ay, az, gx, gy, gz;
// Detect motion trigger
if (IMU.accelerationAvailable() && detectMotion()) {
Serial.println("\n--- Starting gesture recording ---");
// Collect samples
for (int i = 0; i < SAMPLES_PER_GESTURE; i++) {
while (!IMU.accelerationAvailable());
IMU.readAcceleration(ax, ay, az);
IMU.readGyroscope(gx, gy, gz);
// Output in CSV format
Serial.print(ax, 6); Serial.print(",");
Serial.print(ay, 6); Serial.print(",");
Serial.print(az, 6); Serial.print(",");
Serial.print(gx, 6); Serial.print(",");
Serial.print(gy, 6); Serial.print(",");
Serial.print(gz, 6);
Serial.println();
delay(10); // 100Hz sampling rate
}
Serial.println("--- Recording complete ---\n");
delay(1000); // Wait for next gesture
}
}
bool detectMotion() {
float ax, ay, az;
IMU.readAcceleration(ax, ay, az);
// Calculate total acceleration
float total = sqrt(ax*ax + ay*ay + az*az);
// Detect significant motion (acceleration change)
return total > 1.5; // Threshold is adjustable
}
Training the Gesture Recognition Model
# train_gesture_model.py
import tensorflow as tf
import pandas as pd
import numpy as np
# Load collected data
data = pd.read_csv('gesture_data.csv')
# Features: ax, ay, az, gx, gy, gz
# Label: gesture
# Prepare data
features = ['ax', 'ay', 'az', 'gx', 'gy', 'gz']
X = data[features].values.reshape(-1, 119, 6) # (samples, timesteps, features)
y = pd.get_dummies(data['gesture']).values
# Build LSTM model
model = tf.keras.Sequential([
tf.keras.layers.LSTM(64, input_shape=(119, 6)),
tf.keras.layers.Dense(32, activation='relu'),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(4, activation='softmax')
])
model.compile(
optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy']
)
# Train
history = model.fit(
X_train, y_train,
epochs=50,
batch_size=16,
validation_split=0.2
)
# Convert to TFLite
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()
with open('gesture_model.tflite', 'wb') as f:
f.write(tflite_model)
print(f"Model accuracy: {history.history['accuracy'][-1]:.2%}")
print(f"Model size: {len(tflite_model) / 1024:.2f} KB")
Performance Optimization Techniques
1. Model Quantization
# Post-training quantization
converter.optimizations = [tf.lite.Optimize.DEFAULT]
# Quantization-Aware Training (QAT)
import tensorflow_model_optimization as tfmot
quantize_model = tfmot.quantization.keras.quantize_model
q_aware_model = quantize_model(model)
q_aware_model.compile(
optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy']
)
q_aware_model.fit(X_train, y_train, epochs=10)
2. Model Pruning
# Weight pruning
prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude
pruning_params = {
'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(
initial_sparsity=0.0,
final_sparsity=0.5,
begin_step=0,
end_step=1000
)
}
model_for_pruning = prune_low_magnitude(model, **pruning_params)
model_for_pruning.fit(X_train, y_train, epochs=10)
3. Memory Optimization
// Use a smaller tensor_arena
constexpr int kTensorArenaSize = 8 * 1024; // 8KB instead of 10KB
// Use static memory
static int16_t audio_buffer[16000];
// Avoid dynamic memory allocation
// Bad: String msg = "Hello";
// Good: const char* msg = "Hello";
Real-World Application Examples
Case 1: Industrial Equipment Anomalous Sound Detection
Application Scenario:
- Monitor motor operation sounds
- Detect abnormal vibrations
- Predictive maintenance
Technical Key Points:
- Audio FFT feature extraction
- Autoencoder anomaly detection
- Ultra-low power design (< 1mW)
Case 2: Smart Wearable Device
Application Scenario:
- Fall detection
- Activity recognition (walking/running/sleeping)
- Heart rate anomaly detection
Technical Key Points:
- 6-axis IMU data fusion
- LSTM temporal modeling
- Edge real-time inference
Conclusion
TinyML opens up unlimited possibilities:
- Offline Operation - Privacy protection, no network required
- Ultra-Low Power - Battery life measured in months or even years
- Real-Time Inference - Millisecond-level response
- Low Cost - Only $5-10 in hardware required
At BASHCAT, we have extensive TinyML development experience and can help you bring AI to resource-constrained edge devices. Feel free to contact us to discuss your Edge AI project!
Additional Resources
- TensorFlow Lite Micro
- Edge Impulse - Complete TinyML development platform
- TinyML Book
- Arduino TinyML Examples