TinyML 邊緣運算實戰：在微控制器上運行 AI 模型

TinyML 讓機器學習模型可以在資源極度受限的微控制器上運行，開啟了邊緣 AI 的新時代。本文將帶你從零開始學習 TinyML 開發。

什麼是 TinyML？

核心概念

TinyML = Tiny (微型) + ML (機器學習)

特點：
- 模型大小 < 100KB
- RAM 使用 < 100KB  
- 功耗極低 < 1mW
- 推理速度快 < 100ms
- 完全離線運行

典型硬體平台：

Arduino Nano 33 BLE Sense
ESP32
STM32
Raspberry Pi Pico
Nordic nRF52840

應用場景

✓ 語音喚醒詞檢測
✓ 手勢識別控制
✓ 異常聲音檢測
✓ 預測性維護
✓ 人體活動識別
✓ 簡單物體辨識

開發環境設置

TensorFlow Lite Micro 安裝

# Arduino IDE 方式
# 1. 安裝 Arduino_TensorFlowLite 函式庫
# 工具 → 管理函式庫 → 搜尋 "Arduino_TensorFlowLite"

# PlatformIO 方式
# platformio.ini
[env:nano33ble]
platform = nordicnrf52
board = nano33ble
framework = arduino
lib_deps =
    https://github.com/tensorflow/tflite-micro-arduino-examples

Python 訓練環境

# 建立虛擬環境
python -m venv tinyml_env
source tinyml_env/bin/activate  # Linux/Mac
# tinyml_env\Scripts\activate  # Windows

# 安裝套件
pip install tensorflow
pip install numpy
pip install matplotlib
pip install jupyter

專案 1：語音喚醒詞檢測

模型訓練

# train_wake_word.py
import tensorflow as tf
from tensorflow import keras
import numpy as np

# 建立簡單的 CNN 模型用於語音分類
def create_model(input_shape, num_classes=4):
    """
    分類：'yes', 'no', 'unknown', 'silence'
    """
    model = keras.Sequential([
        # 輸入層
        keras.layers.Input(shape=input_shape),
        
        # CNN 層
        keras.layers.Conv2D(8, (3,3), activation='relu', padding='same'),
        keras.layers.MaxPooling2D((2,2)),
        keras.layers.Dropout(0.25),
        
        keras.layers.Conv2D(16, (3,3), activation='relu', padding='same'),
        keras.layers.MaxPooling2D((2,2)),
        keras.layers.Dropout(0.25),
        
        # 全連接層
        keras.layers.Flatten(),
        keras.layers.Dense(32, activation='relu'),
        keras.layers.Dropout(0.5),
        keras.layers.Dense(num_classes, activation='softmax')
    ])
    
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    return model

# 訓練模型
model = create_model(input_shape=(49, 40, 1), num_classes=4)

# 假設你已經有訓練數據
# X_train, y_train, X_val, y_val

history = model.fit(
    X_train, y_train,
    epochs=30,
    batch_size=32,
    validation_data=(X_val, y_val),
    callbacks=[
        keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)
    ]
)

# 儲存模型
model.save('wake_word_model.h5')

print(f"模型大小: {model.count_params()} 參數")

轉換為 TensorFlow Lite

# convert_to_tflite.py
import tensorflow as tf

# 載入訓練好的模型
model = tf.keras.models.load_model('wake_word_model.h5')

# 轉換為 TFLite
converter = tf.lite.TFLiteConverter.from_keras_model(model)

# 量化優化（大幅減少模型大小）
converter.optimizations = [tf.lite.Optimize.DEFAULT]

# 轉換
tflite_model = converter.convert()

# 儲存 TFLite 模型
with open('wake_word_model.tflite', 'wb') as f:
    f.write(tflite_model)

print(f"TFLite 模型大小: {len(tflite_model) / 1024:.2f} KB")

# 轉換為 C 陣列（用於微控制器）
def convert_to_c_array(tflite_model):
    hex_array = [f'0x{b:02x}' for b in tflite_model]
    
    c_code = f"""
// wake_word_model.h
#ifndef WAKE_WORD_MODEL_H
#define WAKE_WORD_MODEL_H

const unsigned char wake_word_model[] = {{
    {', '.join(hex_array)}
}};

const unsigned int wake_word_model_len = {len(tflite_model)};

#endif
"""
    
    with open('wake_word_model.h', 'w') as f:
        f.write(c_code)

convert_to_c_array(tflite_model)
print("✓ C 標頭檔已生成: wake_word_model.h")

Arduino 推理代碼

// wake_word_detection.ino
#include <TensorFlowLite.h>
#include <tensorflow/lite/micro/all_ops_resolver.h>
#include <tensorflow/lite/micro/micro_interpreter.h>
#include <tensorflow/lite/schema/schema_generated.h>
#include "wake_word_model.h"

// 音訊處理
#include <PDM.h>

// TensorFlow Lite 全域變數
namespace {
    const tflite::Model* model = nullptr;
    tflite::MicroInterpreter* interpreter = nullptr;
    TfLiteTensor* input = nullptr;
    TfLiteTensor* output = nullptr;
    
    // 記憶體配置（調整大小以符合模型需求）
    constexpr int kTensorArenaSize = 10 * 1024;
    uint8_t tensor_arena[kTensorArenaSize];
}

// 分類標籤
const char* labels[] = {"yes", "no", "unknown", "silence"};
const int num_labels = 4;

void setup() {
    Serial.begin(115200);
    while (!Serial);
    
    // 初始化 PDM 麥克風
    PDM.onReceive(onPDMdata);
    PDM.begin(1, 16000);  // 1 通道, 16kHz
    
    // 載入模型
    model = tflite::GetModel(wake_word_model);
    if (model->version() != TFLITE_SCHEMA_VERSION) {
        Serial.println("模型版本不符！");
        return;
    }
    
    // 設定操作解析器
    static tflite::AllOpsResolver resolver;
    
    // 建立解釋器
    static tflite::MicroInterpreter static_interpreter(
        model, resolver, tensor_arena, kTensorArenaSize
    );
    interpreter = &static_interpreter;
    
    // 分配記憶體
    TfLiteStatus allocate_status = interpreter->AllocateTensors();
    if (allocate_status != kTfLiteOk) {
        Serial.println("記憶體分配失敗！");
        return;
    }
    
    // 取得輸入/輸出張量
    input = interpreter->input(0);
    output = interpreter->output(0);
    
    Serial.println("✓ TinyML 已初始化");
    Serial.printf("輸入形狀: [%d, %d, %d]\n", 
                  input->dims->data[1], 
                  input->dims->data[2],
                  input->dims->data[3]);
}

// 音訊數據緩衝
constexpr int kAudioSampleSize = 16000;  // 1 秒 @ 16kHz
int16_t audio_buffer[kAudioSampleSize];
volatile int audio_idx = 0;

void onPDMdata() {
    int bytesAvailable = PDM.available();
    PDM.read(audio_buffer + audio_idx, bytesAvailable);
    audio_idx += bytesAvailable / 2;
}

void loop() {
    // 等待收集 1 秒音訊
    if (audio_idx >= kAudioSampleSize) {
        // 音訊預處理（轉換為頻譜圖）
        preprocessAudio(audio_buffer, input->data.f);
        
        // 執行推理
        TfLiteStatus invoke_status = interpreter->Invoke();
        if (invoke_status != kTfLiteOk) {
            Serial.println("推理失敗！");
            return;
        }
        
        // 解析輸出
        int max_idx = 0;
        float max_score = output->data.f[0];
        
        Serial.println("\n預測結果:");
        for (int i = 0; i < num_labels; i++) {
            float score = output->data.f[i];
            Serial.printf("  %s: %.2f%%\n", labels[i], score * 100);
            
            if (score > max_score) {
                max_score = score;
                max_idx = i;
            }
        }
        
        // 判斷是否檢測到喚醒詞
        if (max_score > 0.8 && max_idx < 2) {  // "yes" 或 "no"
            Serial.printf("\n🎤 檢測到: %s (信心度: %.2f%%)\n", 
                         labels[max_idx], max_score * 100);
            
            // 觸發動作
            triggerAction(labels[max_idx]);
        }
        
        // 重置緩衝
        audio_idx = 0;
    }
}

void preprocessAudio(int16_t* audio, float* input_tensor) {
    // 1. 正規化
    for (int i = 0; i < kAudioSampleSize; i++) {
        audio[i] = audio[i] / 32768.0f;
    }
    
    // 2. 計算 MFCC 或頻譜圖
    // （簡化版，實際應用需要 FFT 和 MFCC 轉換）
    // 這裡假設 input_tensor 已經是正確格式
}

void triggerAction(const char* command) {
    if (strcmp(command, "yes") == 0) {
        // 執行 "yes" 命令
        digitalWrite(LED_BUILTIN, HIGH);
    } else if (strcmp(command, "no") == 0) {
        // 執行 "no" 命令  
        digitalWrite(LED_BUILTIN, LOW);
    }
}

專案 2：手勢識別

IMU 數據收集

// gesture_data_collection.ino
#include <Arduino_LSM9DS1.h>

const int SAMPLES_PER_GESTURE = 119;
const int NUM_GESTURES = 4;

// 手勢標籤
const char* gestures[] = {"punch", "flex", "wave", "idle"};

void setup() {
    Serial.begin(115200);
    while (!Serial);
    
    if (!IMU.begin()) {
        Serial.println("IMU 初始化失敗！");
        while (1);
    }
    
    Serial.println("準備收集手勢數據");
    Serial.println("格式: ax,ay,az,gx,gy,gz,label");
}

void loop() {
    float ax, ay, az, gx, gy, gz;
    
    // 檢測運動觸發
    if (IMU.accelerationAvailable() && detectMotion()) {
        Serial.println("\n--- 開始記錄手勢 ---");
        
        // 收集樣本
        for (int i = 0; i < SAMPLES_PER_GESTURE; i++) {
            while (!IMU.accelerationAvailable());
            
            IMU.readAcceleration(ax, ay, az);
            IMU.readGyroscope(gx, gy, gz);
            
            // 輸出 CSV 格式
            Serial.print(ax, 6); Serial.print(",");
            Serial.print(ay, 6); Serial.print(",");
            Serial.print(az, 6); Serial.print(",");
            Serial.print(gx, 6); Serial.print(",");
            Serial.print(gy, 6); Serial.print(",");
            Serial.print(gz, 6);
            Serial.println();
            
            delay(10);  // 100Hz 採樣率
        }
        
        Serial.println("--- 記錄完成 ---\n");
        delay(1000);  // 等待下一個手勢
    }
}

bool detectMotion() {
    float ax, ay, az;
    IMU.readAcceleration(ax, ay, az);
    
    // 計算總加速度
    float total = sqrt(ax*ax + ay*ay + az*az);
    
    // 檢測顯著運動（加速度變化）
    return total > 1.5;  // 閾值可調整
}

訓練手勢識別模型

# train_gesture_model.py
import tensorflow as tf
import pandas as pd
import numpy as np

# 載入收集的數據
data = pd.read_csv('gesture_data.csv')

# 特徵: ax, ay, az, gx, gy, gz
# 標籤: gesture

# 準備數據
features = ['ax', 'ay', 'az', 'gx', 'gy', 'gz']
X = data[features].values.reshape(-1, 119, 6)  # (samples, timesteps, features)
y = pd.get_dummies(data['gesture']).values

# 建立 LSTM 模型
model = tf.keras.Sequential([
    tf.keras.layers.LSTM(64, input_shape=(119, 6)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(4, activation='softmax')
])

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# 訓練
history = model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=16,
    validation_split=0.2
)

# 轉換為 TFLite
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()

with open('gesture_model.tflite', 'wb') as f:
    f.write(tflite_model)

print(f"模型準確率: {history.history['accuracy'][-1]:.2%}")
print(f"模型大小: {len(tflite_model) / 1024:.2f} KB")

效能優化技巧

1. 模型量化

# 訓練後量化
converter.optimizations = [tf.lite.Optimize.DEFAULT]

# 量化感知訓練（QAT）
import tensorflow_model_optimization as tfmot

quantize_model = tfmot.quantization.keras.quantize_model
q_aware_model = quantize_model(model)

q_aware_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

q_aware_model.fit(X_train, y_train, epochs=10)

2. 模型剪枝

# 權重剪枝
prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude

pruning_params = {
    'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(
        initial_sparsity=0.0,
        final_sparsity=0.5,
        begin_step=0,
        end_step=1000
    )
}

model_for_pruning = prune_low_magnitude(model, **pruning_params)
model_for_pruning.fit(X_train, y_train, epochs=10)

3. 記憶體優化

// 使用較小的 tensor_arena
constexpr int kTensorArenaSize = 8 * 1024;  // 8KB instead of 10KB

// 使用靜態記憶體
static int16_t audio_buffer[16000];

// 避免動態記憶體分配
// ❌ String msg = "Hello";
// ✓ const char* msg = "Hello";

實際應用案例

案例 1：工業設備異常聲音檢測

應用場景：
- 監測馬達運轉聲音
- 檢測異常震動
- 預測性維護

技術要點：
- 音訊 FFT 特徵提取
- 自編碼器異常檢測
- 極低功耗設計（< 1mW）

案例 2：智慧穿戴裝置

應用場景：
- 跌倒檢測
- 活動識別（走路/跑步/睡眠）
- 心律異常檢測

技術要點：
- 6軸 IMU 數據融合
- LSTM 時序建模
- 邊緣即時推理

總結

TinyML 開啟了無限可能：

離線運行 - 保護隱私、無需網路
超低功耗 - 電池可用數月甚至數年
即時推理 - 毫秒級回應
成本低廉 - 僅需 $5-10 硬體

在 BASHCAT，我們擁有豐富的 TinyML 開發經驗，可協助您將 AI 帶到資源受限的邊緣裝置。歡迎與我們聯繫討論您的 Edge AI 專案！

TinyML 邊緣運算實戰：在微控制器上運行 AI 模型

TinyML 邊緣運算實戰：在微控制器上運行 AI 模型

什麼是 TinyML？

核心概念

應用場景

開發環境設置

TensorFlow Lite Micro 安裝

Python 訓練環境

專案 1：語音喚醒詞檢測

模型訓練

轉換為 TensorFlow Lite

Arduino 推理代碼

專案 2：手勢識別

IMU 數據收集

訓練手勢識別模型

效能優化技巧

1. 模型量化

2. 模型剪枝

3. 記憶體優化

實際應用案例

案例 1：工業設備異常聲音檢測

案例 2：智慧穿戴裝置

總結

延伸資源

延伸閱讀

更多 ai 文章