Hardware components | ||||||
![]() |
| × | 1 | |||
![]() |
| × | 1 | |||
| × | 2 | ||||
![]() |
| × | 1 |
After spending hours immersed in electronics projects, have you ever found yourself too exhausted to clean up your desk, wishing you could just collapse into bed?
But if you leave your workspace in disarray, it can be hard to find what you need when you want to get back to work—and that can dampen your creative motivation.
This robotic arm is designed to support makers like you, automatically tidying up your desk so you can focus on what you love.
How does this work:- Using a camera, the system searches for tools that have been pre-registered in its database from those placed on the desk.
- Once the target tool is identified, the robotic arm moves to its location and securely grasps it.
- The arm then moves the selected tool to its designated location.
- After releasing the tool at the designated location, the robotic arm returns to its original position.
The tool detection AI model built into Unit V2 performs inference on images captured by the camera and outputs the identification results via serial communication.
Model specifications
- base model: yolov8n
- target tools: ['driver', 'nipper', 'needle-nose plier', 'wire stripper']
- input image size: 320 x 320 x 3
- format: tflite(float32)
Collecting training images
- number: about 5 photos for each tool
- content: Tool closed, open, different shooting angles, etc.
- labeling: labelImg(Python package)
Data augmentation
- augmentation: albumentations (Python package)
- augmentation types: rotation, scaling, blur, brightness, contrast
- magnification: 50
import albumentations as A
transform = A.Compose(
[
A.Affine(
rotate=(-15, 15),
scale=(0.8, 1.2),
p=1,
),
A.RandomBrightnessContrast(p=1),
A.MotionBlur(p=0.2),
],
bbox_params=A.BboxParams(format="yolo", label_fields=["class_labels"]),
)
Transfer training and exporting tflite format
- training: ultralytics (Python package)
- train/val = 0.8/0.2, 300 epochs
- performance: mAP50-95=0.99
yolo export model=tools_inference_model/yolov8n_320_custom3/weights/best.pt format=tflite device=0
Capture and inference application(camera.cpp)
- cross-compile environment: Built with reference to https://www.hackster.io/naveenbskumar/object-detection-using-m5stack-unitv2-and-edge-impulse-650a2f on WSL Linux
- source code: coded with reference to https://www.hackster.io/naveenbskumar/object-detection-using-m5stack-unitv2-and-edge-impulse-650a2f
Serial communication between the M5Stack Core and the UnitV2(start.py)
- receives instructions from M5Stack via the serial port and sends inference execution instructions to the inference execution code(camera.cpp).
- reads the command line output of 'camera.cpp' and outputs it to M5Stack.
#include <M5Unified.h>
#include "config.h"
#include "modules/arm-controller.h"
#include <math.h>
// arm controller object
ArmController arm(ARM1_SHOULDER_PIN, ARM2_ELBOW_PIN, GRIPPER_PIN);
/*
Calculate the inverse kinematics of a two-joint robot arm and
the rotation angle from the initial position to the target position.
Args:
x1, y1 (float): target position of arm
theta1 (float): calculated angle of first joint
theta2 (float): calculated angle of second joint
*/
void calculate_joint_angles(int x1, int y1, int *theta1, int *theta2)
{
int dx = x1 - PLACE_X0;
int dy = y1 - PLACE_Y0;
int D_squared = dx * dx + dy * dy;
int D = sqrt(D_squared);
double rad_to_deg = 180. / PI;
double cos_theta2 = (D_squared - LINK1_LENGTH * LINK1_LENGTH - LINK2_LENGTH * LINK2_LENGTH) / (2 * LINK1_LENGTH * LINK2_LENGTH);
cos_theta2 = max(-1.0, min(1.0, cos_theta2));
*theta2 = (int) (acos(cos_theta2) * rad_to_deg);
double alpha = atan2(dy, dx);
double cos_beta = (D_squared + LINK1_LENGTH * LINK1_LENGTH - LINK2_LENGTH * LINK2_LENGTH) / (2 * D * LINK1_LENGTH);
cos_beta = max(-1.0, min(1.0, cos_beta));
double beta = acos(cos_beta);
*theta1 = (int) ((alpha - beta) * rad_to_deg);
}
//
void setup()
{ // ---
M5.begin(); // M5Stack
M5.Lcd.clear(); // LCD
M5.Lcd.setTextSize(2); // LCD
M5.Lcd.setCursor(0, 0); // LCD
M5.Lcd.print("ARM Demo"); //
M5.Lcd.setCursor(0, 20);
M5.Lcd.print("begin...");
Serial.begin(115200);
Serial.println("setup: begin...");
arm.begin(); // PWM
M5.Lcd.setCursor(0, 40);
M5.Lcd.print("neutral...");
Serial.println("setup: neutral...");
arm.neutral(); //
} // ---
//
static int current_x; // setup()
static int current_y; // setup()
static int current_z; // setup()
// x, y, z
void moveToXYZ(int x, int y, int z, bool move_now)
{ // ---
static int shoulder_angles[1]; //
static int elbow_angles[1]; // ---
static int gripper_angles[1]; // ---
static int timeouts[1] = {300}; //
shoulder_angles[0] = x;
elbow_angles[0] = y;
gripper_angles[0] = z;
M5.Lcd.setCursor(0, 60);
M5.Lcd.printf("XYZ:%d,%d,%d", x, y, z);
M5.Lcd.setCursor(0, 80);
M5.Lcd.print("arbitrary...");
Serial.printf("moveToXYZ: x=%d, y=%d, z=%d\n", x, y, z);
arm.arbitrary(shoulder_angles, elbow_angles, gripper_angles, timeouts, 1); // 1
Serial.println("moveToXYZ: arbitrary called");
// current_x, current_y, current_z
} // ---
//
void loop()
{ // ---
static bool arm_enabled = false; //
static int x = ARM_INIT_SHOULDER; //
static int y = ARM_INIT_ELBOW; //
static int z = ARM_INIT_GRIPPER; //
M5.update(); // M5Stack
// :
// Serial.println("loop: start");
// AB
if (M5.BtnA.wasPressed())
{
M5.Lcd.fillRect(0, 100, 320, 140, BLACK); //
Serial.println("loop: BtnA wasPressed");
arm_enabled = false; //
bool emergency = false;
// --- ---
Serial.println("loop: (BtnA) Camera request start"); //
Serial2.begin(CAMERA_SERIAL_BAUDRATE, SERIAL_8N1, CAMERA_SERIAL_RX_PIN, CAMERA_SERIAL_TX_PIN);
String send_cmd = "START,0.7\n";
Serial2.print(send_cmd);
Serial.printf("camera TX: %s\n", send_cmd.c_str());
M5.Lcd.setCursor(0, LCD_STATUS_Y);
M5.Lcd.setTextColor(YELLOW, BLACK);
M5.Lcd.printf("TX: %s", send_cmd.c_str());
unsigned long start_time = millis();
String lines[10];
int line_count = 0;
bool found_pliers = false;
int pliers_x = x, pliers_y = y;
// 3
while (millis() - start_time < 3000)
{
M5.update();
if (M5.BtnB.isPressed())
{
emergency = true;
break;
}
if (Serial2.available())
{
String line = Serial2.readStringUntil('\n');
line.trim();
if (line.length() == 0)
continue;
Serial.print("camera RX: ");
Serial.println(line);
if (line == "END")
break;
lines[line_count < 10 ? line_count++ : 9] = line;
}
}
if (line_count == 0)
{
M5.Lcd.setCursor(0, 140);
M5.Lcd.setTextColor(RED, BLACK);
}
//
for (int i = 1; i < line_count; ++i)
{
int idx1 = lines[i].indexOf(",");
if (idx1 < 0)
continue;
String label = lines[i].substring(0, idx1);
Serial.printf("camera parse: %s\n", lines[i].c_str());
int idx2 = lines[i].indexOf(",", idx1 + 1);
int idx3 = lines[i].indexOf(",", idx2 + 1);
int idx4 = lines[i].indexOf(",", idx3 + 1);
int idx5 = lines[i].indexOf(",", idx4 + 1);
if (idx2 > 0 && idx3 > 0 && idx4 > 0)
{
float conf = lines[i].substring(idx1 + 1, idx2).toFloat();
int left = lines[i].substring(idx2 + 1, idx3).toInt();
int top = lines[i].substring(idx3 + 1, idx4).toInt();
int width = lines[i].substring(idx3 + 1, idx4).toInt();
int height = lines[i].substring(idx3 + 1, idx4).toInt();
pliers_x = left + width / 2;
pliers_y = top + height / 2;
Serial.printf("camera parse: pliers found conf=%.2f x=%d y=%d z=%d\n", conf, pliers_x, pliers_y);
found_pliers = true;
break;
}
M5.Lcd.print("RX Timeout");
}
// --- ---
const int max_total_steps = 300; //
int shoulder_angles[max_total_steps];
int elbow_angles[max_total_steps];
int gripper_angles[max_total_steps];
int timeouts[max_total_steps];
int total_steps = 0;
// --- 1. ---
if (!found_pliers)
{
current_x = ARM_INIT_SHOULDER;
current_y = ARM_INIT_ELBOW;
current_z = ARM_INIT_GRIPPER;
}
int start_x = current_x, start_y = current_y, start_z = current_z;
int goal_x, goal_y, goal_z;
if (found_pliers)
{
calculate_joint_angles(pliers_x, pliers_y, &goal_x, &goal_y);
M5.Lcd.setCursor(0, 120);
M5.Lcd.setTextColor(GREEN, BLACK);
}
else
{
goal_x = ARM_INIT_SHOULDER + ARM_STEP_SHOULDER_DIFF[0];
goal_y = ARM_INIT_ELBOW + ARM_STEP_ELBOW_DIFF[0];
M5.Lcd.setCursor(0, 120);
M5.Lcd.setTextColor(RED, BLACK);
Serial.println("camera parse: pliers not found");
}
goal_z = ARM_INIT_GRIPPER + ARM_STEP_GRIPPER_DIFF[0];
M5.Lcd.printf("STEP0: %d,%d,%d", goal_x, goal_y, goal_z);
// 1. 0
const int steps1 = 50;
for (int i = 1; i <= steps1; ++i)
{
if (total_steps >= max_total_steps)
break;
shoulder_angles[total_steps] = (int)(start_x + (goal_x - start_x) * (float)i / steps1);
elbow_angles[total_steps] = (int)(start_y + (goal_y - start_y) * (float)i / steps1);
gripper_angles[total_steps] = (int)(start_z + (goal_z - start_z) * (float)i / steps1);
timeouts[total_steps] = 20;
total_steps++;
}
current_x = goal_x;
current_y = goal_y;
current_z = goal_z;
// 2. B
for (int step = 0; step < ARM_STEP_NUM; ++step)
{
if (emergency)
break;
int goal_shoulder = ARM_INIT_SHOULDER + ARM_STEP_SHOULDER_DIFF[step];
int goal_elbow = ARM_INIT_ELBOW + ARM_STEP_ELBOW_DIFF[step];
int goal_gripper = ARM_INIT_GRIPPER + ARM_STEP_GRIPPER_DIFF[step];
M5.Lcd.setCursor(0, 120);
M5.Lcd.setTextColor(YELLOW, BLACK);
M5.Lcd.printf("STEP%d: %d,%d,%d ", step, goal_shoulder, goal_elbow, goal_gripper);
int s_x = current_x, s_y = current_y, s_z = current_z;
const int steps2 = 30;
for (int j = 1; j <= steps2; ++j)
{
if (total_steps >= max_total_steps)
break;
shoulder_angles[total_steps] = (int)(s_x + (goal_shoulder - s_x) * (float)j / steps2);
elbow_angles[total_steps] = (int)(s_y + (goal_elbow - s_y) * (float)j / steps2);
gripper_angles[total_steps] = (int)(s_z + (goal_gripper - s_z) * (float)j / steps2);
timeouts[total_steps] = 20;
total_steps++;
}
current_x = goal_shoulder;
current_y = goal_elbow;
current_z = goal_gripper;
if (total_steps >= max_total_steps)
break;
shoulder_angles[total_steps] = current_x;
elbow_angles[total_steps] = current_y;
gripper_angles[total_steps] = current_z;
timeouts[total_steps] = ARM_STEP_TIMEOUTS[step];
total_steps++;
}
// 3.
if (!emergency)
{
int back_start_x = current_x, back_start_y = current_y, back_start_z = current_z;
int back_goal_x = ARM_INIT_SHOULDER;
int back_goal_y = ARM_INIT_ELBOW;
int back_goal_z = ARM_INIT_GRIPPER;
//
int diff_x = abs(back_goal_x - back_start_x);
int diff_y = abs(back_goal_y - back_start_y);
int diff_z = abs(back_goal_z - back_start_z);
int max_diff = diff_x;
if (diff_y > max_diff)
max_diff = diff_y;
if (diff_z > max_diff)
max_diff = diff_z;
int back_steps = max_diff / 4; // 41
if (back_steps < 2)
back_steps = 2; // 2
if (back_start_x == back_goal_x && back_start_y == back_goal_y && back_start_z == back_goal_z)
{
back_steps = 1;
}
int prev_shoulder = 0, prev_elbow = 0, prev_gripper = 0;
for (int k = 0; k < back_steps; ++k)
{
if (total_steps >= max_total_steps)
break;
float ratio = (back_steps == 1) ? 1.0f : (float)k / (back_steps - 1);
int s = (int)(back_start_x + (back_goal_x - back_start_x) * ratio);
int e = (int)(back_start_y + (back_goal_y - back_start_y) * ratio);
int g = (int)(back_start_z + (back_goal_z - back_start_z) * ratio);
shoulder_angles[total_steps] = s;
elbow_angles[total_steps] = e;
gripper_angles[total_steps] = g;
timeouts[total_steps] = (k == back_steps - 1) ? 100 : 20;
//
if (k == back_steps - 2)
{
prev_shoulder = s;
prev_elbow = e;
prev_gripper = g;
}
total_steps++;
}
// current_x
if (total_steps > 0)
{
shoulder_angles[total_steps - 1] = ARM_INIT_SHOULDER;
elbow_angles[total_steps - 1] = ARM_INIT_ELBOW;
gripper_angles[total_steps - 1] = ARM_INIT_GRIPPER;
timeouts[total_steps - 1] = 100;
// Serial
Serial.printf("[BackToInit] Last change: (%d,%d,%d) -> (%d,%d,%d)\n", prev_shoulder, prev_elbow, prev_gripper, ARM_INIT_SHOULDER, ARM_INIT_ELBOW, ARM_INIT_GRIPPER);
//
Serial.printf("[BackToInit] Actual before: (%d,%d,%d)\n", arm.getCurrentShoulder(), arm.getCurrentElbow(), arm.getCurrentGripper());
//
current_x = ARM_INIT_SHOULDER;
current_y = ARM_INIT_ELBOW;
current_z = ARM_INIT_GRIPPER;
}
}
//
if (!emergency)
{
// Serial
if (total_steps > 0)
{
Serial.printf("[ArbQ] Last step: (%d,%d,%d)\n", shoulder_angles[total_steps - 1], elbow_angles[total_steps - 1], gripper_angles[total_steps - 1]);
}
arm.arbitrary(shoulder_angles, elbow_angles, gripper_angles, timeouts, total_steps);
arm_enabled = true;
M5.Lcd.setCursor(0, 100);
M5.Lcd.setTextColor(GREEN, BLACK);
M5.Lcd.print("ARM MOVING...");
}
}
// B
if (M5.BtnB.wasPressed())
{
M5.Lcd.fillRect(0, 100, 320, 140, BLACK); //
arm_enabled = false; //
arm.neutral(); //
M5.Lcd.setCursor(0, 100);
M5.Lcd.setTextColor(RED, BLACK);
M5.Lcd.print("EMERGENCY STOP");
}
//
if (arm_enabled)
{
//
M5.Lcd.setCursor(0, 160);
M5.Lcd.setTextColor(WHITE, BLACK);
M5.Lcd.printf("cur: %3d,%3d,%3d", arm.getCurrentShoulder(), arm.getCurrentElbow(), arm.getCurrentGripper());
bool done = arm.action();
// :
Serial.printf("[Action] Actual: (%d,%d,%d)\n", arm.getCurrentShoulder(), arm.getCurrentElbow(), arm.getCurrentGripper());
Serial.printf("[Action] current_x,y,z: (%d,%d,%d)\n", current_x, current_y, current_z);
Serial.printf("[Action] Target: (%d,%d,%d)\n", arm.getTargetShoulder(), arm.getTargetElbow(), arm.getTargetGripper());
Serial.printf("[Action] QueueIndex: %d / %d\n", arm.getActionIndex(), arm.getActionCount());
//
Serial.printf("[Action] Queue head: (%d,%d,%d)\n", arm.getTargetShoulder(), arm.getTargetElbow(), arm.getTargetGripper());
Serial.printf("[Action] Queue tail: (%d,%d,%d)\n", arm.getCurrentShoulder(), arm.getCurrentElbow(), arm.getCurrentGripper());
//
if (done && arm.getCurrentShoulder() == ARM_INIT_SHOULDER && arm.getCurrentElbow() == ARM_INIT_ELBOW && arm.getCurrentGripper() == ARM_INIT_GRIPPER)
{
arm_enabled = false;
M5.Lcd.setCursor(0, 100);
M5.Lcd.setTextColor(YELLOW, BLACK);
M5.Lcd.print("ARM DONE ");
}
}
}
/*
ArmController
- SG90
- 50Hz -> 20msec
- -90deg: 0.5msec = 6.4 / 256
- 0deg: 1.45msec = 18.56 / 256
- 90deg: 2.4msec = 30.72 / 256
*/
#include <M5Unified.h>
class ArmController {
public:
static const uint16_t MAX_ACTIONS = 1000;
double PWM_FREQUENCY = 50;
double PWM_RESOLUTION = 8;
uint8_t SHOULDER_CHANNEL = 0;
uint8_t ELBOW_CHANNEL = 1;
uint8_t GRIPPER_CHANNEL = 2;
int getActionIndex() const { return _action_index; }
int getActionCount() const { return _action_count; }
int getTargetShoulder() const {
if (_action_index == 0) return _shoulder_action_queue[0];
return _shoulder_action_queue[_action_index - 1];
}
int getTargetElbow() const {
if (_action_index == 0) return _elbow_action_queue[0];
return _elbow_action_queue[_action_index - 1];
}
int getTargetGripper() const {
if (_action_index == 0) return _gripper_action_queue[0];
return _gripper_action_queue[_action_index - 1];
}
ArmController(uint8_t pin_shoulder, uint8_t pin_elbow, uint8_t pin_gripper)
{
_pin_shoulder = pin_shoulder;
_pin_elbow = pin_elbow;
_pin_gripper = pin_gripper;
_pwm_resolution_pulse = pow(2, PWM_RESOLUTION);
}
void begin()
{
ledcSetup(SHOULDER_CHANNEL, PWM_FREQUENCY, PWM_RESOLUTION);
ledcSetup(ELBOW_CHANNEL, PWM_FREQUENCY, PWM_RESOLUTION);
ledcSetup(GRIPPER_CHANNEL, PWM_FREQUENCY, PWM_RESOLUTION);
ledcAttachPin(_pin_shoulder, SHOULDER_CHANNEL);
ledcAttachPin(_pin_elbow, ELBOW_CHANNEL);
ledcAttachPin(_pin_gripper, GRIPPER_CHANNEL);
}
void arbitrary(int shoulder_angles[], int elbow_angles[], int gripper_angles[], int timeouts[], int action_num)
{
unsigned long now = millis();
unsigned long cumulative_timeout = 0;
_action_count = action_num > MAX_ACTIONS ? MAX_ACTIONS : action_num;
for (uint8_t i = 0; i < _action_count; i++)
{
_shoulder_action_queue[i] = shoulder_angles[i];
_elbow_action_queue[i] = elbow_angles[i];
_gripper_action_queue[i] = gripper_angles[i];
cumulative_timeout += timeouts[i];
_action_timeout_queue[i] = now + cumulative_timeout;
}
_action_index = 0;
}
void neutral()
{
unsigned long now = millis();
int shoulder_angles[1] = {0};
int elbow_angles[1] = {0};
int gripper_angles[1] = {0};
int timeouts[1] = {100};
arbitrary(shoulder_angles, elbow_angles, gripper_angles, timeouts, 1);
}
bool action()
{
if (_action_index >= _action_count)
{
return true;
}
if (millis() >= _action_timeout_queue[_action_index])
{
ledcWrite(SHOULDER_CHANNEL, _angle2pulse(_shoulder_action_queue[_action_index]));
ledcWrite(ELBOW_CHANNEL, _angle2pulse(_elbow_action_queue[_action_index]));
ledcWrite(GRIPPER_CHANNEL, _angle2pulse(_gripper_action_queue[_action_index]));
_action_index++;
}
return false;
}
int getCurrentShoulder() {
if (_action_index < _action_count) return _shoulder_action_queue[_action_index];
return _shoulder_action_queue[_action_count > 0 ? _action_count - 1 : 0];
}
int getCurrentElbow() {
if (_action_index < _action_count) return _elbow_action_queue[_action_index];
return _elbow_action_queue[_action_count > 0 ? _action_count - 1 : 0];
}
int getCurrentGripper() {
if (_action_index < _action_count) return _gripper_action_queue[_action_index];
return _gripper_action_queue[_action_count > 0 ? _action_count - 1 : 0];
}
private:
uint8_t _pin_shoulder = 19;
uint8_t _pin_elbow = 5;
uint8_t _pin_gripper = 22;
uint32_t _pwm_resolution_pulse = 256;
int32_t _shoulder_action_queue[MAX_ACTIONS] = {0};
int32_t _elbow_action_queue[MAX_ACTIONS] = {0};
int32_t _gripper_action_queue[MAX_ACTIONS] = {0};
uint32_t _action_timeout_queue[MAX_ACTIONS] = {0};
uint8_t _action_index = 0;
uint8_t _action_count = 0;
uint32_t _angle2pulse(int angle)
{
return (uint32_t)((0.0105555555 * (angle + 90) + 0.5) / 20.0 * _pwm_resolution_pulse);
}
};
import serial
import time
import traceback
import subprocess
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("-d", "--debug", action="store_true", help="Whether to enable debug print.")
parser.add_argument("-s", "--streaming", action="store_true", help="Enable live-streaming.")
args = parser.parse_args()
ser = serial.Serial("/dev/ttyS1", 115200, timeout=0.1)
if args.debug:
print("Debug Print enabled.")
if args.streaming:
print("Streaming enabled.")
try:
with subprocess.Popen(["./camera", "0", "--on-demand", "--debug" if args.debug else "", "--streaming" if args.streaming else ""], stdout=subprocess.PIPE, stderr=subprocess.PIPE) as proc:
while True:
# waiting for serial command
while True:
try:
data = ser.readline().decode("utf-8")
if not data.startswith("START,"):
continue
print(f"serial>> {data}")
on_demand = f"{data.split(',')[1]}\n"
fo = open("/home/m5stack/on_demand.txt", "w")
fo.write(on_demand)
fo.close()
break
except Exception:
traceback.print_exc()
# Read lines from the camera process
while True:
line = proc.stdout.readline().decode("utf-8").replace(" ", "").encode("utf-8")
print(f"camera>> {line.decode('utf-8')}")
# send to serial
ser.write(line + b"\n")
ser.flush()
time.sleep(0.1)
if b"END" in line:
break
except Exception:
traceback.print_exc()
ser.close()
#include <unistd.h>
#include "opencv2/opencv.hpp"
#include "iostream"
#include "opencv2/videoio/videoio_c.h"
#include "nadjieb/mjpeg_streamer.hpp"
#include <iostream>
#include <fstream>
#include <cstdio>
#include <time.h>
#include "../tensorflow-lite/tensorflow/lite/interpreter.h"
#include "../tensorflow-lite/tensorflow/lite/kernels/register.h"
#include "../tensorflow-lite/tensorflow/lite/model.h"
#include "get_result.h"
#define CLASSIFIER_INPUT_WIDTH 320 // input image size
#define CLASSIFIER_INPUT_HEIGHT 320 // input image size
#define INFERENCE_INTERVAL_MS 200 // inference interval
#define CONF_THRESH 0.5f // confidence threshold
using MJPEGStreamer = nadjieb::MJPEGStreamer;
static bool use_debug = false;
static bool use_streaming = false;
static bool on_demand = false;
static const char* CLASS_NAMES[] = { "driver", "nipper", "needle-nose plier", "wire stripper" };
static const char* MODEL_PATH = "yolov8n.tflite"; // model file name
/*
* Returns the current time in milliseconds
*/
long long get_current_time() {
struct timespec ts;
clock_gettime(CLOCK_REALTIME, &ts);
return ts.tv_sec * 1000LL + ts.tv_nsec / 1000000;
}
/*
* set image to input tensor
*/
void preprocess(const cv::Mat& src, float* dst) {
cv::Mat resized, rgb, float_img;
cv::resize(src, resized, cv::Size(CLASSIFIER_INPUT_WIDTH, CLASSIFIER_INPUT_HEIGHT));
cv::cvtColor(resized, rgb, cv::COLOR_BGR2RGB);
rgb.convertTo(float_img, CV_32FC3, 1.0 / 255.0);
std::memcpy(dst, float_img.data, sizeof(float) * CLASSIFIER_INPUT_WIDTH * CLASSIFIER_INPUT_HEIGHT * 3);
}
int main(int argc, char** argv) {
printf("OPENCV VERSION: %d_%d\n", CV_MAJOR_VERSION, CV_MINOR_VERSION);
if (argc < 2) {
printf("Requires one parameter (ID of the webcam).\n");
printf("You can find these via `v4l2-ctl --list-devices`.\n");
printf("E.g. for:\n");
printf(" C922 Pro Stream Webcam (usb-70090000.xusb-2.1):\n");
printf(" /dev/video0\n");
printf("The ID of the webcam is 0\n");
exit(1);
}
for (int ix = 2; ix < argc; ix++) {
if (strcmp(argv[ix], "--debug") == 0) {
printf("Enabling debug mode\n");
use_debug = true;
}
if (strcmp(argv[ix], "--streaming") == 0) {
printf("Enabling streaming mode\n");
use_streaming = true;
}
if (strcmp(argv[ix], "--on-demand") == 0) {
printf("Enabling on-demand mode\n");
on_demand = true;
}
}
MJPEGStreamer streamer;
streamer.start(80);
std::vector<int> params = {cv::IMWRITE_JPEG_QUALITY, 90};
// open the camera...
cv::VideoCapture camera(atoi(argv[1]));
if (!camera.isOpened()) {
std::cerr << "ERROR: Could not open camera" << std::endl;
return 1;
}
std::cout << "Resolution: " << camera.get(CV_CAP_PROP_FRAME_WIDTH)
<< "x" << camera.get(CV_CAP_PROP_FRAME_HEIGHT) << std::endl;
// tensorflow ---------------------------------------------
// read classifier model
auto model = tflite::FlatBufferModel::BuildFromFile(MODEL_PATH);
if (!model) {
std::cerr << "ERROR: failed to load model." << std::endl;
return -1;
}
// create interpreter
tflite::ops::builtin::BuiltinOpResolver resolver;
std::unique_ptr<tflite::Interpreter> interpreter;
tflite::InterpreterBuilder(*model, resolver)(&interpreter);
// allocate memory
interpreter->AllocateTensors();
// get input tensor information
int input = interpreter->inputs()[0];
TfLiteIntArray* dims = interpreter->tensor(input)->dims;
int height = dims->data[1]; // CLASSIFIER_INPUT_HEIGHT
int width = dims->data[2]; // CLASSIFIER_INPUT_WIDTH
int channels = dims->data[3];
std::cout << "Input tensor shape: " << height << "x" << width << "x" << channels << std::endl;
// ------------------------------------------------------------
// this will contain the image from the webcam
cv::Mat frame;
int64_t next_frame = (int64_t)(get_current_time());
float score_thresh = CONF_THRESH;
std::vector<Detection> detections;
int64_t t0;
// display the frame until you press a key
while (1) {
// 1000ms. between inference
next_frame = (int64_t)((int64_t)(get_current_time()) + INFERENCE_INTERVAL_MS);
// capture the next frame from the webcam
camera >> frame;
if (on_demand) {
//
const char*filename = "on_demand.txt";
std::ifstream file(filename);
if (file.is_open()) {
std::string line;
std::getline(file, line);
score_thresh = std::stof(line);
file.close();
std::remove(filename);
// printf("on_demand.txt exists: score_thresh=%f\n", score_thresh);
} else {
goto jump_publish;
}
}
{ // block
// set image to input tensor
float* input_data = interpreter->typed_tensor<float>(input);
preprocess(frame, input_data);
// inference
t0 = (int64_t)(get_current_time());
interpreter->Invoke();
// printf("Inferenence time %lldms\n", (int64_t)(get_current_time()) - t0);
// get output tensor
int output_index = interpreter->outputs()[0];
TfLiteTensor* output_tensor = interpreter->tensor(output_index);
const float* output = output_tensor->data.f;
int num_classes = output_tensor->dims->data[1] - 4; // = data[1] - BB4
int num_proposals = output_tensor->dims->data[2]; // BB:
// printf("num_classes: %d, num_proposals: %d\n", num_classes, num_proposals);
//
detections = get_result(
frame,
width,
height,
output,
num_classes,
num_proposals,
score_thresh,
use_debug
);
//
printf("RESP,%lld,%d,%f\n", t0, detections.size(), score_thresh);
for (int k = 0; k < detections.size(); k++) {
Detection detection = detections[k];
cv::Rect bb = detection.box;
printf("%s,%f,%d,%d,%d,%d\n",
CLASS_NAMES[detection.class_id],
detection.confidence,
bb.x,
bb.y,
bb.width,
bb.height
);
}
printf("END\n");
fflush(stdout);
}
jump_publish:
// draw bounding box on frame
for (int k = 0; k < detections.size(); k++) {
Detection detection = detections[k];
cv::Rect bb = detection.box;
cv::rectangle(frame, cv::Point(bb.x, bb.y), cv::Point(bb.x + bb.width, bb.y + bb.height), cv::Scalar(255, 0, 255), 2);
cv::putText(frame, CLASS_NAMES[detection.class_id], cv::Point(bb.x, bb.y-5), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(255, 0, 255), 1);
cv::putText(frame, std::to_string(detection.confidence), cv::Point(bb.x, bb.y-15), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(255, 0, 255), 1);
}
// show the image on the window
if (use_streaming) {
if (streamer.isRunning()) {
std::vector<uchar> buff_bgr;
cv::imencode(".jpg", frame, buff_bgr, params);
streamer.publish("/stream", std::string(buff_bgr.begin(), buff_bgr.end()));
} else {
printf("streamer is not alive!\n");
}
}
}
streamer.stop();
return 0;
}
#include "opencv2/opencv.hpp"
const float NMS_THRESH = 0.5f;
struct Detection {
cv::Rect box; // (x, y, width, height)
float confidence;
int class_id;
};
// IOUIntersection over Union
float iou(const cv::Rect2f& a, const cv::Rect2f& b) {
float inter = (a & b).area();
float union_ = a.area() + b.area() - inter;
return inter / union_;
}
/*
* m: num_classes
* n: num_proposals
* output tensor = [[
* [cx_0, cx_1, ..., cx_n-1],
* [cy_0, cy_1, ..., cy_n-1],
* [w_0, w_1, ..., w_n-1],
* [h_0, h_1, ..., h_n-1],
* [p_0_0, p_1_0, ..., p_m_0],
* :
* [p_0_m-1, p_1_m-1, ..., p_m-1_n-1]
* ]]
*/
std::vector<Detection> get_result(
cv::Mat frame,
const int input_width,
const int input_height,
const float* output,
int num_classes,
int num_proposals,
const float score_thresh,
bool use_debug = false
) {
// temporary vector for NMS
std::vector<cv::Rect> boxes;
std::vector<float> confidences;
std::vector<int> class_ids;
// image size to input size ratio
float x_factor = frame.cols / (input_width * 1.0f);
float y_factor = frame.rows / (input_height * 1.0f);
// printf("x_factor: %f, y_factor: %f\n", x_factor, y_factor);
for (int i = 0; i < num_proposals; ++i) {
const float* data = output + (4 * num_proposals) + i;
// find the class with the highest confidence score
float max_score = 0.0f;
int best_class_id = -1;
for (int j = 0; j < num_classes; ++j) {
if (data[j * num_proposals] > max_score) {
max_score = data[j * num_proposals];
best_class_id = j;
}
}
if (max_score < score_thresh) continue;
// get the bounding box (cx, cy, w, h)
float cx = output[0 * num_proposals + i];
float cy = output[1 * num_proposals + i];
float w = output[2 * num_proposals + i];
float h = output[3 * num_proposals + i];
// printf("cx: %f, cy: %f, w: %f, h: %f\n", cx, cy, w, h);
// (cx, cy, w, h) to (x, y, w, h)
int left = static_cast<int>((cx - 0.5 * w) * x_factor * input_width);
int top = static_cast<int>((cy - 0.5 * h) * y_factor * input_height);
int width = static_cast<int>(w * x_factor * input_width);
int height = static_cast<int>(h * y_factor * input_height);
boxes.emplace_back(left, top, width, height);
confidences.push_back(max_score);
class_ids.push_back(best_class_id);
if (use_debug) printf("class=%d, score=%f, bb=[%d, %d, %d, %d]\n", best_class_id, max_score, left, top, width, height);
}
// NMS(Non-Maximum Suppression) process
// If the IOU of two BBs is greater than or equal to a threshold, remove the BB with the lowest reliability.
bool nms[boxes.size()] = {false};
for (int index = 0; index < boxes.size(); index++) {
if (nms[index]) continue;
cv::Rect& a = boxes[index];
for (int other = index + 1; other < boxes.size(); other++) {
cv::Rect& b = boxes[other];
float _iou = iou(a, b);
if (use_debug) printf("NMS %d vs %d: score=%f vs %f, iou=%f\n", index, other, confidences[index], confidences[other], _iou);
if (_iou > NMS_THRESH) {
if (confidences[index] > confidences[other]) {
nms[other] = true;
if (use_debug) printf("NMS: %d removed\n", other);
} else {
nms[index] = true;
if (use_debug) printf("NMS: %d removed\n", index);
break;
}
}
}
}
std::vector<Detection> detections;
for (int index = 0; index < boxes.size(); index++) {
if (nms[index]) continue;
Detection det;
det.box = boxes[index];
det.confidence = confidences[index];
det.class_id = class_ids[index];
detections.push_back(det);
}
return detections;
}
// --- arm initial position ---
#define ARM_INIT_SHOULDER -80 // shoulder initial angle
#define ARM_INIT_ELBOW -90 // elbow initial angle
#define ARM_INIT_GRIPPER 80 // gripper initial angle
#define ARM_INIT_TIMEOUTS 1500 // gripper release angle
// --- arm motion pattern (difference from initial value) ---
#define ARM_STEP_NUM 5
#pragma once
// --- GPIO ---
#define BUTTON_PIN 15 // button
#define STAND_LIGHT_PIN 21 // stand light
#define ARM1_SHOULDER_PIN 19 // robot arm 1 shoulder
#define ARM2_ELBOW_PIN 5 // robot arm 2 elbow
#define GRIPPER_PIN 22 // gripper
#define CAMERA_SERIAL_BAUDRATE 115200
#define CAMERA_SERIAL_RX_PIN 16 // object recognition camera RX
#define CAMERA_SERIAL_TX_PIN 17 // object recognition camera TX
// tool detection system arm origin offset (mm)
const int action_num = 5;
static const int ARM_STEP_SHOULDER_DIFF[ARM_STEP_NUM] = {80, 80, 320, 320, 0}; // shoulder each step: difference from initial value
static const int ARM_STEP_ELBOW_DIFF[ARM_STEP_NUM] = {0, 0, 0, 0, 0}; // elbow each step: difference from initial value
static const int ARM_STEP_GRIPPER_DIFF[ARM_STEP_NUM] = {0, -130, -130, 0, 0}; // gripper each step: difference from initial value
static const int ARM_STEP_TIMEOUTS[ARM_STEP_NUM] = {ARM_INIT_TIMEOUTS, ARM_INIT_TIMEOUTS, ARM_INIT_TIMEOUTS, ARM_INIT_TIMEOUTS, ARM_INIT_TIMEOUTS}; // each step wait time (ms)
#define TOOL_OFFSET_X 10.0f
#define TOOL_OFFSET_Y 5.0f
#define TOOL_OFFSET_Z 0.0f
// robot arm lengthmm
#define LINK1_LENGTH 100.0f
#define LINK2_LENGTH 80.0f
#define LINK3_LENGTH 60.0f
// servo angle rangedegree
#define SERVO_BASE_MIN 0.0f
#define SERVO_BASE_MAX 180.0f
#define SERVO_SHOULDER_MIN 0.0f
#define SERVO_SHOULDER_MAX 180.0f
#define SERVO_ELBOW_MIN 0.0f
#define SERVO_ELBOW_MAX 180.0f
// servo pulse widths
#define SERVO_PWM_MIN 500
#define SERVO_PWM_MAX 2500
// working areamm
#define WORKSPACE_X_MIN 0.0f
#define WORKSPACE_X_MAX 200.0f
#define WORKSPACE_Y_MIN 0.0f
#define WORKSPACE_Y_MAX 200.0f
#define WORKSPACE_Z_MIN 0.0f
#define WORKSPACE_Z_MAX 200.0f
// arm
#define PLACE_X0 100.0f
#define PLACE_Y0 100.0f
// tool type specific placement coordinates (mm)
#define PLACE_X_WRENCH 150.0f
#define PLACE_Y_WRENCH 50.0f
#define PLACE_Z_WRENCH 0.0f
#define PLACE_X_SCREWDRIVER 170.0f
#define PLACE_Y_SCREWDRIVER 60.0f
#define PLACE_Z_SCREWDRIVER 0.0f
#define AI_CONFIDENCE_THRESHOLD 0.7f
// tool specific gripping height (mm)
#define GRIPPER_Z_WRENCH 20.0f
#define GRIPPER_Z_SCREWDRIVER 15.0f
// --- main.cpp ---
// LCD
#define LCD_TEXTSIZE 2
#define LCD_STATUS_Y 120
#define LCD_WIFI_Y 30 // unused
#define LCD_TIME_Y 60 // unused
#define LCD_EMGSTOP_Y 95
#define LCD_ACTIONTIME_Y 150
#define LCD_IO_Y 180
#define LCD_STATUS_WIDTH 320
#define LCD_STATUS_HEIGHT 20
#define LCD_ACTIONTIME_WIDTH 240
#define LCD_ACTIONTIME_HEIGHT 20
// IO pin assignments
#define START_PIN 36
#define STOP_PIN 39
#define EMG_PIN 34
// AB simultaneous press enable/disable toggle time (ms)
#define AB_ENABLE_MIN 2000
#define AB_ENABLE_MAX 5000
#define AB_DISABLE_MIN 5000
Comments