Yarana Iot Guru
Published © MIT

Voice-Controlled Security Door Lock using ESP32 + Face

Build a secure, voice-controlled door lock using an ESP32 (or ESP32-CAM) for face detection and a voice-control front end — works offline

BeginnerFull instructions provided8 hours9
Voice-Controlled Security Door Lock using ESP32 + Face

Things used in this project

Software apps and online services

Arduino IDE
Arduino IDE

Story

Read more

Code

1) ESP32-CAM — Camera webserver and simple face-detection example (Arduino framework)

C/C++
This sketch starts the camera web server and exposes endpoints for enrollment and captures. It also prints face-detection metadata if the library supports it. (Use the Arduino ESP32 board support with AI THINKER
camera module selected.)
This code provides the base camera webserver. For face detection/recognition:

Option 1 (local advanced): use ESP-WHO (Espressif) and build with ESP-IDF — this supports local face recognition and embedding matching.

Option 2 (cloud): capture /capture image and POST to your server for face recognition (OpenCV/FaceNet) and receive boolean face_ok.
// ESP32-CAM basic webserver + capture (Yarana IoT Guru)
// NOTE: For full face recognition use ESP-WHO (ESP-IDF) or a server side solution.

#include "esp_camera.h"
#include <WiFi.h>
#include <WebServer.h>

// Replace with your network credentials (for enrollment/testing UI)
const char* ssid = "YOUR_SSID";
const char* password = "YOUR_PASS";

WebServer server(80);

//
// Camera pin definitions for AI THINKER module (most ESP32-CAM boards)
//
#define PWDN_GPIO_NUM     32
#define RESET_GPIO_NUM    -1
#define XCLK_GPIO_NUM     0
#define SIOD_GPIO_NUM     26
#define SIOC_GPIO_NUM     27
#define Y9_GPIO_NUM       35
#define Y8_GPIO_NUM       34
#define Y7_GPIO_NUM       39
#define Y6_GPIO_NUM       36
#define Y5_GPIO_NUM       21
#define Y4_GPIO_NUM       19
#define Y3_GPIO_NUM       18
#define Y2_GPIO_NUM       5
#define VSYNC_GPIO_NUM    25
#define HREF_GPIO_NUM     23
#define PCLK_GPIO_NUM     22

void startCameraServer();

// Simple state variables used by main lock logic
volatile bool face_detected = false; // set when face detected (simple)
unsigned long face_detected_time = 0;

void setup() {
  Serial.begin(115200);
  Serial.println("Yarana IoT Guru - ESP32-CAM Webserver");

  // wifi for enrollment UI (optional)
  WiFi.begin(ssid, password);
  Serial.print("Connecting to WiFi");
  while (WiFi.status() != WL_CONNECTED) { delay(500); Serial.print("."); }
  Serial.println("\nWiFi connected: " + WiFi.localIP().toString());

  // Camera config
  camera_config_t config;
  config.ledc_channel = LEDC_CHANNEL_0;
  config.ledc_timer = LEDC_TIMER_0;
  config.pin_pwdn = PWDN_GPIO_NUM;
  config.pin_reset = RESET_GPIO_NUM;
  config.pin_xclk = XCLK_GPIO_NUM;
  config.pin_sscb_sda = SIOD_GPIO_NUM;
  config.pin_sscb_scl = SIOC_GPIO_NUM;
  config.pin_d7 = Y9_GPIO_NUM;
  config.pin_d6 = Y8_GPIO_NUM;
  config.pin_d5 = Y7_GPIO_NUM;
  config.pin_d4 = Y6_GPIO_NUM;
  config.pin_d3 = Y5_GPIO_NUM;
  config.pin_d2 = Y4_GPIO_NUM;
  config.pin_d1 = Y3_GPIO_NUM;
  config.pin_d0 = Y2_GPIO_NUM;
  config.pin_vsync = VSYNC_GPIO_NUM;
  config.pin_href = HREF_GPIO_NUM;
  config.pin_pclk = PCLK_GPIO_NUM;
  config.xclk_freq_hz = 20000000;
  config.pixel_format = PIXFORMAT_JPEG;
  config.frame_size = FRAMESIZE_VGA;
  config.jpeg_quality = 10;
  config.fb_count = 2;

  esp_err_t err = esp_camera_init(&config);
  if (err != ESP_OK) {
    Serial.printf("Camera init failed with error 0x%x", err);
    return;
  }

  startCameraServer();
}

void loop() {
  server.handleClient();

  // Placeholder: if you add a face detector library, set face_detected true here
  // Example: run face detector on grayscale frame every n loops, set time.
  if (face_detected) {
    // set a timestamp to be consumed by lock logic
    face_detected_time = millis();
    face_detected = false; // reset - real code will handle face verification state
  }
}

// --- minimal camera webserver endpoints ---
void handleRoot(){
  server.send(200, "text/plain", "ESP32-CAM - Yarana IoT Guru");
}

void handleCapture(){
  camera_fb_t * fb = esp_camera_fb_get();
  if (!fb) {
    server.send(500, "text/plain", "Camera capture failed");
    return;
  }
  server.sendHeader("Content-Type", "image/jpeg");
  server.send_P(200, "image/jpeg", (const char*)fb->buf, fb->len);
  esp_camera_fb_return(fb);
}

void startCameraServer(){
  server.on("/", handleRoot);
  server.on("/capture", HTTP_GET, handleCapture);
  server.begin();
  Serial.println("Camera server started");
}

2) Elechouse Voice Recognition module — sample Arduino code (ESP32 side)

C/C++
Train Elechouse module using its example on a PC/Arduino UNO. After training, wire it to ESP32 serial and use this sketch to read recognized commands.
Use Elechouse library / PC tool to train phrases and get the correct parsing logic. This code is a placeholder showing serial read.
// Example: read Elechouse VR Module on ESP32 Serial2
#include <HardwareSerial.h>

HardwareSerial VRSerial(2); // use UART2 (RX2=16 TX2=17)

volatile int lastCommand = -1;
volatile unsigned long lastCmdTime = 0;

void setup(){
  Serial.begin(115200);
  VRSerial.begin(9600, SERIAL_8N1, 16, 17); // RX pin 16 <- VR TX, TX pin 17 -> VR RX
  Serial.println("Voice module test (Yarana IoT Guru)");
}

void loop(){
  if (VRSerial.available()){
    int c = VRSerial.read();
    // Elechouse VR module sends frames like: 0xAA 0x37 0x01 0x01 0x00 0xFF maybe. Check module docs.
    // Many users use library for parsing; here we assume the module sends ASCII command ID for simplicity.
    Serial.print("VR raw: "); Serial.println(c, HEX);
    // Example: if module sends ASCII '1' for command 1:
    if (c >= '0' && c <= '9') {
      lastCommand = c - '0';
      lastCmdTime = millis();
      Serial.print("Command ID: "); Serial.println(lastCommand);
    }
  }

  // In production parse properly per Elechouse protocol and include confidence checks.
}

3) Lock control logic: combine face + voice verification (Arduino-style pseudocode)

C/C++
This is the main controller logic that you run on the ESP32. It checks timestamps for face & voice windows, and actuates the relay.
// Lock control pseudo-implementation (combine parts)
#define RELAY_PIN 12
#define MANUAL_BUTTON_PIN 13

bool face_ok = false;
unsigned long face_time = 0;
bool voice_ok = false;
unsigned long voice_time = 0;

// Config: window in ms to accept second factor (voice after face or face after voice)
const unsigned long auth_window = 8000; // 8 seconds

void setup() {
  pinMode(RELAY_PIN, OUTPUT);
  digitalWrite(RELAY_PIN, LOW); // locked = LOW (assume)
  pinMode(MANUAL_BUTTON_PIN, INPUT_PULLUP);

  // initialize camera server and voice serial (see earlier code)
}

void loop() {
  // Example: when face is recognized by face module or server, set:
  // face_ok = true; face_time = millis();
  // Similarly, when voice module sees command, set:
  // voice_ok = true; voice_time = millis();

  // Manual override
  if (digitalRead(MANUAL_BUTTON_PIN) == LOW) {
    actuateLockOpen(5000); // open for 5 sec
    delay(1000); // debounce
  }

  // If both happened within auth window, unlock
  if (face_ok && voice_ok) {
    if (abs((long)face_time - (long)voice_time) <= auth_window) {
      Serial.println("AUTH SUCCESS — unlocking");
      actuateLockOpen(5000);
      logEvent("unlock_success");
      face_ok = voice_ok = false;
    }
  }

  // Optional: if face_ok is too old, reset
  if (face_ok && millis() - face_time > 30000) face_ok = false;
  if (voice_ok && millis() - voice_time > 30000) voice_ok = false;

  // rest of loop...
}

void actuateLockOpen(unsigned long duration_ms) {
  digitalWrite(RELAY_PIN, HIGH); // energize strike/activate servo
  delay(duration_ms);
  digitalWrite(RELAY_PIN, LOW);
}

void logEvent(String s) {
  Serial.println("LOG: " + s);
  // Optionally send to MQTT / cloud
}

4) Optional: MQTT listener for online assistant unlocking

C/C++
If you use Google Assistant → IFTTT → webhook → your server → MQTT publish, ESP32 can subscribe to a topic and receive unlock command. Example using PubSubClient:
If you use public brokers, add auth and TLS. IFTTT + Adafruit IO or a small Node-RED server can translate assistant triggers into MQTT messages.
#include <WiFi.h>
#include <PubSubClient.h>

const char* ssid = "YOUR_SSID";
const char* pass = "YOUR_PASS";
const char* mqtt_server = "broker.hivemq.com"; // use secure private broker in prod

WiFiClient espClient;
PubSubClient client(espClient);

void callback(char* topic, byte* payload, unsigned int length) {
  String msg;
  for (int i=0;i<length;i++) msg += (char)payload[i];
  Serial.print("MQTT msg: "); Serial.println(msg);
  if (msg == "unlock_request") {
    voice_ok = true; voice_time = millis(); // treat incoming as voice factor
  }
}

void setup_mqtt() {
  WiFi.begin(ssid, pass);
  while (WiFi.status() != WL_CONNECTED) delay(500);
  client.setServer(mqtt_server, 1883);
  client.setCallback(callback);
  while (!client.connected()) {
    client.connect("esp_lock_001");
    delay(500);
  }
  client.subscribe("home/lock/esp1");
}

Credits

Yarana Iot Guru
41 projects • 11 followers
Yarana Iot Guru Yarana IoT Guru: Arduino, ESP32, GSM, NodeMCU & more. Projects, Tutorials & App Development. Innovate with us!
Thanks to Yarana IoT Guru.

Comments