Keyword Spotting on ESP32-S3 with INMP441 and MAX7219

Offline voice keyword detector using ESP32-S3, INMP441 & MAX7219. Powered by Edge Impulse, no cloud needed, runs fully on-device.

BeginnerProtip5 hours355
Keyword Spotting on ESP32-S3 with INMP441 and MAX7219

Things used in this project

Hardware components

Seeed Studio XIAO ESP32S3 Plus
Seeed Studio XIAO ESP32S3 Plus
×1
MB102 Breadboard 830 Point Solderless PCB Bread Project Board
×1
MAX7219 8x32 LED Matrix
×1
INMP441 FRONT MIC
×1
Male/Female Jumper Wires
Male/Female Jumper Wires
×1

Software apps and online services

Arduino IDE
Arduino IDE
Edge Impulse Studio
Edge Impulse Studio

Story

Read more

Schematics

ESP32-S3 Voice Display System

Code

ESP32-S3 Edge Impulse Keyword Spotter with MAX7219 Display

Arduino
#include <kamaru123-project-1_inferencing.h>
#include <MD_Parola.h>
#include <MD_MAX72xx.h>
#include <SPI.h>
#include "driver/i2s.h"
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"

// Konfigurasi MAX7219
#define HARDWARE_TYPE MD_MAX72XX::FC16_HW
#define MAX_DEVICES 4
#define DATA_PIN   6  // DIN MAX7219
#define CLK_PIN    7  // CLK MAX7219
#define CS_PIN     5  // CS MAX7219

MD_Parola mx = MD_Parola(HARDWARE_TYPE, DATA_PIN, CLK_PIN, CS_PIN, MAX_DEVICES);

// Struktur buffer untuk inferensi
typedef struct {
    int16_t *buffer;
    volatile uint8_t buf_ready;
    volatile uint32_t buf_count;
    uint32_t n_samples;
} inference_t;

static inference_t inference;
static const uint32_t sample_buffer_size = 2048;
static int16_t sampleBuffer[sample_buffer_size];
static volatile bool record_status = false;
static bool debug_nn = false;

// Threshold deteksi dan smoothing
#define DETECTION_THRESHOLD 0.30f
#define DETECTION_STREAK_REQUIRED 3
static int detection_streak = 0;

// Fungsi tampilkan teks ke MAX7219 dengan scroll
void tampilkanTeks(const char* teks) {
    mx.displayClear();
    mx.displayScroll(teks, PA_LEFT, PA_SCROLL_LEFT, 100);
    while (!mx.displayAnimate()) {
        // Tunggu animasi selesai
    }
}

// Fungsi callback audio: isi buffer inference dari sampleBuffer
static void audio_inference_callback(uint32_t n_bytes) {
    for (int i = 0; i < (n_bytes >> 1); i++) {
        if (inference.buf_count < inference.n_samples) {
            inference.buffer[inference.buf_count++] = sampleBuffer[i];
        }
        if (inference.buf_count >= inference.n_samples) {
            inference.buf_ready = 1;
            break;
        }
    }
}

// Task capture sampel dari I2S secara terus-menerus
static void capture_samples(void* arg) {
    const int32_t bytes_to_read = (int32_t)arg;
    size_t bytes_read;

    while (record_status) {
        esp_err_t err = i2s_read(I2S_NUM_1, (void*)sampleBuffer, bytes_to_read, &bytes_read, portMAX_DELAY);
        if (err == ESP_OK && bytes_read > 0) {
            // Gain adjustment (kalikan 8)
            for (size_t i = 0; i < bytes_read / 2; i++) {
                sampleBuffer[i] = (int16_t) (sampleBuffer[i] * 6);
            }
            audio_inference_callback(bytes_read);
        }
    }
    vTaskDelete(NULL);
}

// Inisialisasi mikrofon untuk inferensi
static bool microphone_inference_start(uint32_t n_samples) {
    inference.buffer = (int16_t*)malloc(n_samples * sizeof(int16_t));
    if (!inference.buffer) {
        ei_printf("ERR: Failed to allocate buffer\n");
        return false;
    }

    inference.buf_count = 0;
    inference.n_samples = n_samples;
    inference.buf_ready = 0;

    if (i2s_init(EI_CLASSIFIER_FREQUENCY)) {
        ei_printf("ERR: Failed to init I2S\n");
        free(inference.buffer);
        return false;
    }

    ei_sleep(100);
    record_status = true;
    xTaskCreate(capture_samples, "CaptureSamples", 4096, (void*)sample_buffer_size, 1, NULL);
    return true;
}

// Fungsi tunggu sampai buffer siap (sampel sudah lengkap)
static bool microphone_inference_record(void) {
    while (!inference.buf_ready) {
        delay(10);
    }
    inference.buf_ready = 0;
    inference.buf_count = 0;
    return true;
}

// Fungsi untuk menyediakan data float ke Edge Impulse dari buffer int16
static int microphone_audio_signal_get_data(size_t offset, size_t length, float *out_ptr) {
    numpy::int16_to_float(&inference.buffer[offset], out_ptr, length);
    return 0;
}

// Inisialisasi I2S ESP32 untuk INMP441
static int i2s_init(uint32_t sampling_rate) {
    i2s_config_t i2s_config = {
        .mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX),
        .sample_rate = sampling_rate,
        .bits_per_sample = I2S_BITS_PER_SAMPLE_16BIT,
        .channel_format = I2S_CHANNEL_FMT_ONLY_RIGHT,
        .communication_format = I2S_COMM_FORMAT_I2S,
        .intr_alloc_flags = ESP_INTR_FLAG_LEVEL1,
        .dma_buf_count = 8,
        .dma_buf_len = 512,
        .use_apll = false,
        .tx_desc_auto_clear = false,
        .fixed_mclk = 0
    };

    i2s_pin_config_t pin_config = {
        .bck_io_num = 2,    // D1 = GPIO2 (BCLK)
        .ws_io_num = 3,     // D2 = GPIO3 (WS)
        .data_out_num = -1,
        .data_in_num = 1    // D0 = GPIO1 (SD)
    };

    esp_err_t ret;
    ret = i2s_driver_install(I2S_NUM_1, &i2s_config, 0, NULL);
    if (ret != ESP_OK) return 1;
    ret = i2s_set_pin(I2S_NUM_1, &pin_config);
    if (ret != ESP_OK) return 1;
    ret = i2s_zero_dma_buffer(I2S_NUM_1);
    return (ret == ESP_OK) ? 0 : 1;
}

void setup() {
    Serial.begin(115200);
    ei_printf("Edge Impulse Keyword Spotting + MAX7219\n");

    mx.begin();
    mx.setIntensity(3);
    mx.displayClear();
    tampilkanTeks("READY");

    ei_printf("Inferencing settings:\n");
    ei_printf("\tInterval: "); ei_printf_float((float)EI_CLASSIFIER_INTERVAL_MS); ei_printf(" ms.\n");
    ei_printf("\tFrame size: %d\n", EI_CLASSIFIER_DSP_INPUT_FRAME_SIZE);
    ei_printf("\tSample length: %d ms.\n", EI_CLASSIFIER_RAW_SAMPLE_COUNT / 16);
    ei_printf("\tNo. of classes: %d\n", EI_CLASSIFIER_LABEL_COUNT);
    ei_sleep(2000);

    if (!microphone_inference_start(EI_CLASSIFIER_RAW_SAMPLE_COUNT)) {
        ei_printf("ERR: Could not allocate audio buffer.\n");
        while (true) delay(1000);  // Stop program
    }
}

void loop() {
    if (!microphone_inference_record()) {
        ei_printf("ERR: Failed to record audio...\n");
        return;
    }

    signal_t signal;
    signal.total_length = EI_CLASSIFIER_RAW_SAMPLE_COUNT;
    signal.get_data = &microphone_audio_signal_get_data;

    ei_impulse_result_t result;
    EI_IMPULSE_ERROR r = run_classifier(&signal, &result, debug_nn);
    if (r != EI_IMPULSE_OK) {
        ei_printf("ERR: Failed to run classifier (%d)\n", r);
        return;
    }

    bool detected = false;
    for (size_t ix = 0; ix < EI_CLASSIFIER_LABEL_COUNT; ix++) {
        ei_printf("  %s: ", result.classification[ix].label);
        ei_printf_float(result.classification[ix].value);
        ei_printf("\n");

        if (strcmp(result.classification[ix].label, "halo esp") == 0 && result.classification[ix].value > DETECTION_THRESHOLD) {
            detection_streak++;
            if (detection_streak >= DETECTION_STREAK_REQUIRED) {
                tampilkanTeks("HALO");
                detected = true;
                detection_streak = 0;
                break;
            }
        }
    }

    if (!detected) {
        detection_streak = 0;
        tampilkanTeks("NO");
    }

    delay(100);
}

Credits

Adiel Baja Kelana 76884
1 project • 1 follower
Mifthahul Maulana Kamal
1 project • 1 follower
Yabes Henli Salem 90342
1 project • 1 follower
Hendra Kusumah
49 projects • 159 followers
Love hacking and making new things from IoT to robotics

Comments