Evan Rust
Published © GPL3+

Recognizing Keywords with the Sparkfun MicroMod system

By combining a Teensy MicroMod board with an ML Carrier board, keywords can be extracted from speech and displayed

IntermediateFull instructions provided4 hours686
Recognizing Keywords with the Sparkfun MicroMod system

Things used in this project

Hardware components

SparkFun MicroMod Teensy Processor
SparkFun MicroMod Teensy Processor
×1
SparkFun MicroMod Machine Learning Carrier Board
SparkFun MicroMod Machine Learning Carrier Board
×1
TFT LCD ILI9341 Driver
×1

Software apps and online services

Edge Impulse Studio
Edge Impulse Studio
Arduino IDE
Arduino IDE
Visual Studio Code Extension for Arduino
Microsoft Visual Studio Code Extension for Arduino

Hand tools and fabrication machines

3D Printer (generic)
3D Printer (generic)

Story

Read more

Custom parts and enclosures

MicroMod ML Carrier Board + TFT Mounts

Schematics

Wiring

Code

MicroMod_Keyword_Spotter.ino

C/C++
#include <Audio.h>
#include <Wire.h>
#include <SPI.h>
#include <SD.h>
#include <SerialFlash.h>
#include <ILI9341_t3.h>
#include <font_Arial.h>

// Export the model yourself from Edge Impulse and add it as an Arduino library
#include <micromod-keywords_inferencing.h>

AudioInputI2S            i2s1;
AudioRecordQueue         queue1;
AudioOutputUSB           usb1;
AudioConnection          patchCord4(i2s1, 0, queue1, 0);
AudioConnection          patchCord5(i2s1, 0, usb1, 0);
AudioConnection          patchCord6(i2s1, 0, usb1, 1);

#define TFT_WIDTH   320
#define TFT_HEIGHT  240

#define TFT_DC  1       // TX1 on carrier board
#define TFT_CS  4       // D0 on carrier board
#define TFT_RST 0       // RX1 on carrier board
#define TFT_BL  5       // D1 on carrier board

#define TFT_BACKGROUND_COLOR    ILI9341_WHITE
#define TFT_FOREGROUND_COLOR    ILI9341_BLUE
#define TFT_TEXT_COLOR          ILI9341_BLACK

static ILI9341_t3 tft(TFT_CS, TFT_DC, TFT_RST);

#define COLUMN_COUNT        16  
#define COLUMN_WIDTH        (TFT_WIDTH / COLUMN_COUNT)

// Raw audio is sampled from queue1 at 44.1KHz
#define RAW_AUDIO_SAMPLE_RATE 44100

static int16_t rawAudioBuffer[RAW_AUDIO_SAMPLE_RATE] DMAMEM;
static uint32_t rawSamplesCount = 0;
static int16_t downsampledBuffer[EI_CLASSIFIER_RAW_SAMPLE_COUNT] DMAMEM;

typedef struct {
    // Array of int16 values that are passed to the model
    int16_t *buffer;

    // Number of samples for a full buffer
    uint32_t buf_count;
} inference_t;

// Instance of the inference_t struct
static inference_t inference;

void setup()
{
    AudioMemory(16);

    Serial.begin(115200);

    // Initialize TFT
    pinMode(TFT_BL, OUTPUT);
    digitalWrite(TFT_BL, HIGH);
    delay(100);

    tft.begin();
    tft.setRotation(1);
    tft.setTextColor(TFT_TEXT_COLOR);
    tft.setTextSize(1);
    tft.setFont(Arial_10);
    tft.fillScreen(TFT_BACKGROUND_COLOR);

    queue1.begin();

    inference.buffer = downsampledBuffer;
    inference.buf_count = EI_CLASSIFIER_RAW_SAMPLE_COUNT;
}

void loop()
{
    // Get 128-sample (2 bytes per sample or "packet") block when ready
    if(queue1.available() && rawSamplesCount < RAW_AUDIO_SAMPLE_RATE)
    {   
        // Read in new data from queue1
        byte buffer[256];

        memcpy(buffer, queue1.readBuffer(), 256);
        queue1.freeBuffer();

        for(uint8_t i = 0; i < 128; i++)
        {
            // If buffer is full, set the flag and break
            if(rawSamplesCount >= RAW_AUDIO_SAMPLE_RATE)
                break;
            
            // Copy the new data into the buffer
            rawAudioBuffer[rawSamplesCount] = *((int16_t*)(&buffer[i * 2]));
            //Serial.printf("Value: %d\n", rawAudioBuffer[rawSamplesCount]);
            rawSamplesCount++;

            Serial.printf("Sample count is now: %d\n", rawSamplesCount);
        } 
    }

    // Check if buffer is full
    if(rawSamplesCount >= RAW_AUDIO_SAMPLE_RATE)
    {
        // Downsample the audio and pass it to the model for inferencing
        Serial.println("Downsampling...");
        downsampleAudio();
        Serial.println("Done");

        rawSamplesCount = 0;

        // Set up inferencing
        signal_t signal;
        signal.total_length = EI_CLASSIFIER_RAW_SAMPLE_COUNT;
        signal.get_data = &signal_get_data;
        ei_impulse_result_t result = { 0 };

        EI_IMPULSE_ERROR r = run_classifier(&signal, &result, false);
        if (r != EI_IMPULSE_OK) {
            Serial.printf("ERR: Failed to run classifier (%d)\n", r);
        }

        // print the predictions
        Serial.printf("Predictions ");
        Serial.printf("(DSP: %d ms., Classification: %d ms., Anomaly: %d ms.)",
            result.timing.dsp, result.timing.classification, result.timing.anomaly);
        Serial.printf(": \n");

        uint16_t cursorY = 0;
        tft.fillScreen(TFT_BACKGROUND_COLOR);
        
        for (size_t ix = 0; ix < EI_CLASSIFIER_LABEL_COUNT; ix++) {
            Serial.printf("    %s: %.5f\n", result.classification[ix].label, result.classification[ix].value);
            tft.setCursor(0, cursorY);
            tft.print(result.classification[ix].label);
            tft.fillRect(0, cursorY + 15, result.classification[ix].value * 200, 20, TFT_FOREGROUND_COLOR);
            tft.setCursor(210, cursorY);
            tft.printf("%.4f", result.classification[ix].value);
            cursorY += 40;
        }
    }
}

#define DOWNSAMPLE_SCALE_FACTOR 2.756254

// Transform the 44.1KHz samples into 16KHz
void downsampleAudio()
{
    for(uint16_t i = 0; i < EI_CLASSIFIER_RAW_SAMPLE_COUNT; i++)
    {
        int32_t sum = 0;

        // Grab the closest value in the 44.1KHz buffer
        uint16_t index = (uint16_t)(i * DOWNSAMPLE_SCALE_FACTOR);

        // Get the 3 closest values to average
        for(int8_t ind_offset = -1; ind_offset < 2; ind_offset++)
        {
            uint16_t newIndex = ind_offset + index;

            // Boundary checking
            if(newIndex < -1 || newIndex >= RAW_AUDIO_SAMPLE_RATE)
                continue;
            
            sum += rawAudioBuffer[newIndex];
        }

        downsampledBuffer[i] = (int16_t)(sum / 3);
    }
}

static int signal_get_data(size_t offset, size_t length, float *out_ptr)
{
    numpy::int16_to_float(&inference.buffer[offset], out_ptr, length);

    return 0;
}

Credits

Evan Rust
124 projects • 1121 followers
Embedded Software Engineer II @ Amazon's Project Kuiper. Contact me for product reviews or custom project requests.

Comments