TD_ICT
Published

DeskSweeper: A robot that cleans your desk automatically

We developed "DeskSweeper, " a robotic arm that automatically tidies up your desk after crafting, keeping your workspace neat with no effort.

AdvancedProtip10 hours112
DeskSweeper: A robot that cleans your desk automatically

Things used in this project

Hardware components

ESP32 Basic Core IoT Development Kit
M5Stack ESP32 Basic Core IoT Development Kit
×1
SG90 Micro-servo motor
SG90 Micro-servo motor
×1
TSU-04 Servo Motor(TAMIYA)
×2
UnitV2
M5Stack UnitV2
×1

Story

Read more

Schematics

CircuitDiagram

Code

Program for operating a robotic arm(main.cpp)

C/C++
PlatformIO
#include <M5Unified.h>
#include "config.h"
#include "modules/arm-controller.h"
#include <math.h>

// arm controller object
ArmController arm(ARM1_SHOULDER_PIN, ARM2_ELBOW_PIN, GRIPPER_PIN);

/*
    Calculate the inverse kinematics of a two-joint robot arm and 
    the rotation angle from the initial position to the target position.
    
    Args:
        x1, y1 (float): target position of arm
        theta1 (float): calculated angle of first joint
        theta2 (float): calculated angle of second joint
*/
void calculate_joint_angles(int x1, int y1, int *theta1, int *theta2)
{

    int dx = x1 - PLACE_X0;
    int dy = y1 - PLACE_Y0;
    int D_squared = dx * dx + dy * dy;
    int D = sqrt(D_squared);
    double rad_to_deg = 180. / PI;

    double cos_theta2 = (D_squared - LINK1_LENGTH * LINK1_LENGTH - LINK2_LENGTH * LINK2_LENGTH) / (2 * LINK1_LENGTH * LINK2_LENGTH);
    cos_theta2 = max(-1.0, min(1.0, cos_theta2));
    *theta2 = (int) (acos(cos_theta2) * rad_to_deg);

    double alpha = atan2(dy, dx);

    double cos_beta = (D_squared + LINK1_LENGTH * LINK1_LENGTH - LINK2_LENGTH * LINK2_LENGTH) / (2 * D * LINK1_LENGTH);
    cos_beta = max(-1.0, min(1.0, cos_beta));
    double beta = acos(cos_beta);

    *theta1 = (int) ((alpha - beta) * rad_to_deg);
}

//
void setup()
{                             // ---
    M5.begin();               // M5Stack
    M5.Lcd.clear();           // LCD
    M5.Lcd.setTextSize(2);    // LCD
    M5.Lcd.setCursor(0, 0);   // LCD
    M5.Lcd.print("ARM Demo"); //
    M5.Lcd.setCursor(0, 20);
    M5.Lcd.print("begin...");
    Serial.begin(115200);
    Serial.println("setup: begin...");
    arm.begin(); // PWM
    M5.Lcd.setCursor(0, 40);
    M5.Lcd.print("neutral...");
    Serial.println("setup: neutral...");
    arm.neutral(); //
} // ---

//
static int current_x; // setup()
static int current_y; // setup()
static int current_z; // setup()
// x, y, z
void moveToXYZ(int x, int y, int z, bool move_now)
{                                   // ---
    static int shoulder_angles[1];  //
    static int elbow_angles[1];     // ---
    static int gripper_angles[1];   // ---
    static int timeouts[1] = {300}; //
    shoulder_angles[0] = x;
    elbow_angles[0] = y;
    gripper_angles[0] = z;
    M5.Lcd.setCursor(0, 60);
    M5.Lcd.printf("XYZ:%d,%d,%d", x, y, z);
    M5.Lcd.setCursor(0, 80);
    M5.Lcd.print("arbitrary...");
    Serial.printf("moveToXYZ: x=%d, y=%d, z=%d\n", x, y, z);
    arm.arbitrary(shoulder_angles, elbow_angles, gripper_angles, timeouts, 1); // 1
    Serial.println("moveToXYZ: arbitrary called");
    // current_x, current_y, current_z
} // ---

//
void loop()
{                                     // ---
    static bool arm_enabled = false;  //
    static int x = ARM_INIT_SHOULDER; //
    static int y = ARM_INIT_ELBOW;    //
    static int z = ARM_INIT_GRIPPER;  //

    M5.update(); // M5Stack
    // :
    // Serial.println("loop: start");

    // AB
    if (M5.BtnA.wasPressed())
    {
        M5.Lcd.fillRect(0, 100, 320, 140, BLACK); //
        Serial.println("loop: BtnA wasPressed");
        arm_enabled = false; //
        bool emergency = false;
        // ---  ---
        Serial.println("loop: (BtnA) Camera request start"); //
        Serial2.begin(CAMERA_SERIAL_BAUDRATE, SERIAL_8N1, CAMERA_SERIAL_RX_PIN, CAMERA_SERIAL_TX_PIN);
        String send_cmd = "START,0.7\n";
        Serial2.print(send_cmd);
        Serial.printf("camera TX: %s\n", send_cmd.c_str());
        M5.Lcd.setCursor(0, LCD_STATUS_Y);
        M5.Lcd.setTextColor(YELLOW, BLACK);
        M5.Lcd.printf("TX: %s", send_cmd.c_str());
        unsigned long start_time = millis();
        String lines[10];
        int line_count = 0;
        bool found_pliers = false;
        int pliers_x = x, pliers_y = y;
        // 3
        while (millis() - start_time < 3000)
        {
            M5.update();
            if (M5.BtnB.isPressed())
            {
                emergency = true;
                break;
            }
            if (Serial2.available())
            {
                String line = Serial2.readStringUntil('\n');
                line.trim();
                if (line.length() == 0)
                    continue;
                Serial.print("camera RX: ");
                Serial.println(line);
                if (line == "END")
                    break;
                lines[line_count < 10 ? line_count++ : 9] = line;
            }
        }
        if (line_count == 0)
        {
            M5.Lcd.setCursor(0, 140);
            M5.Lcd.setTextColor(RED, BLACK);
        }
        //
        for (int i = 1; i < line_count; ++i)
        {
            int idx1 = lines[i].indexOf(",");
            if (idx1 < 0)
                continue;
            String label = lines[i].substring(0, idx1);
            Serial.printf("camera parse: %s\n", lines[i].c_str());
            int idx2 = lines[i].indexOf(",", idx1 + 1);
            int idx3 = lines[i].indexOf(",", idx2 + 1);
            int idx4 = lines[i].indexOf(",", idx3 + 1);
            int idx5 = lines[i].indexOf(",", idx4 + 1);
            if (idx2 > 0 && idx3 > 0 && idx4 > 0)
            {
                float conf = lines[i].substring(idx1 + 1, idx2).toFloat();
                int left = lines[i].substring(idx2 + 1, idx3).toInt();
                int top = lines[i].substring(idx3 + 1, idx4).toInt();
                int width = lines[i].substring(idx3 + 1, idx4).toInt();
                int height = lines[i].substring(idx3 + 1, idx4).toInt();  
                pliers_x = left + width / 2;
                pliers_y = top + height / 2;
                Serial.printf("camera parse: pliers found conf=%.2f x=%d y=%d z=%d\n", conf, pliers_x, pliers_y);
                found_pliers = true;
                break;
            }
            M5.Lcd.print("RX Timeout");
        }

        // ---  ---
        const int max_total_steps = 300; //
        int shoulder_angles[max_total_steps];
        int elbow_angles[max_total_steps];
        int gripper_angles[max_total_steps];
        int timeouts[max_total_steps];
        int total_steps = 0;

        // --- 1.  ---
        if (!found_pliers)
        {
            current_x = ARM_INIT_SHOULDER;
            current_y = ARM_INIT_ELBOW;
            current_z = ARM_INIT_GRIPPER;
        }
        int start_x = current_x, start_y = current_y, start_z = current_z;
        int goal_x, goal_y, goal_z;
        if (found_pliers)
        {
            calculate_joint_angles(pliers_x, pliers_y, &goal_x, &goal_y);
            M5.Lcd.setCursor(0, 120);
            M5.Lcd.setTextColor(GREEN, BLACK);
        }
        else
        {
            goal_x = ARM_INIT_SHOULDER + ARM_STEP_SHOULDER_DIFF[0];
            goal_y = ARM_INIT_ELBOW + ARM_STEP_ELBOW_DIFF[0];
            M5.Lcd.setCursor(0, 120);
            M5.Lcd.setTextColor(RED, BLACK);
            Serial.println("camera parse: pliers not found");
        }
        goal_z = ARM_INIT_GRIPPER + ARM_STEP_GRIPPER_DIFF[0];
        M5.Lcd.printf("STEP0: %d,%d,%d", goal_x, goal_y, goal_z);

        // 1. 0
        const int steps1 = 50;
        for (int i = 1; i <= steps1; ++i)
        {
            if (total_steps >= max_total_steps)
                break;
            shoulder_angles[total_steps] = (int)(start_x + (goal_x - start_x) * (float)i / steps1);
            elbow_angles[total_steps] = (int)(start_y + (goal_y - start_y) * (float)i / steps1);
            gripper_angles[total_steps] = (int)(start_z + (goal_z - start_z) * (float)i / steps1);
            timeouts[total_steps] = 20;
            total_steps++;
        }
        current_x = goal_x;
        current_y = goal_y;
        current_z = goal_z;

        // 2. B
        for (int step = 0; step < ARM_STEP_NUM; ++step)
        {
            if (emergency)
                break;
            int goal_shoulder = ARM_INIT_SHOULDER + ARM_STEP_SHOULDER_DIFF[step];
            int goal_elbow = ARM_INIT_ELBOW + ARM_STEP_ELBOW_DIFF[step];
            int goal_gripper = ARM_INIT_GRIPPER + ARM_STEP_GRIPPER_DIFF[step];

            M5.Lcd.setCursor(0, 120);
            M5.Lcd.setTextColor(YELLOW, BLACK);
            M5.Lcd.printf("STEP%d: %d,%d,%d   ", step, goal_shoulder, goal_elbow, goal_gripper);

            int s_x = current_x, s_y = current_y, s_z = current_z;
            const int steps2 = 30;
            for (int j = 1; j <= steps2; ++j)
            {
                if (total_steps >= max_total_steps)
                    break;
                shoulder_angles[total_steps] = (int)(s_x + (goal_shoulder - s_x) * (float)j / steps2);
                elbow_angles[total_steps] = (int)(s_y + (goal_elbow - s_y) * (float)j / steps2);
                gripper_angles[total_steps] = (int)(s_z + (goal_gripper - s_z) * (float)j / steps2);
                timeouts[total_steps] = 20;
                total_steps++;
            }
            current_x = goal_shoulder;
            current_y = goal_elbow;
            current_z = goal_gripper;

            if (total_steps >= max_total_steps)
                break;
            shoulder_angles[total_steps] = current_x;
            elbow_angles[total_steps] = current_y;
            gripper_angles[total_steps] = current_z;
            timeouts[total_steps] = ARM_STEP_TIMEOUTS[step];
            total_steps++;
        }

        // 3.
        if (!emergency)
        {
            int back_start_x = current_x, back_start_y = current_y, back_start_z = current_z;
            int back_goal_x = ARM_INIT_SHOULDER;
            int back_goal_y = ARM_INIT_ELBOW;
            int back_goal_z = ARM_INIT_GRIPPER;
            //
            int diff_x = abs(back_goal_x - back_start_x);
            int diff_y = abs(back_goal_y - back_start_y);
            int diff_z = abs(back_goal_z - back_start_z);
            int max_diff = diff_x;
            if (diff_y > max_diff)
                max_diff = diff_y;
            if (diff_z > max_diff)
                max_diff = diff_z;
            int back_steps = max_diff / 4; // 41
            if (back_steps < 2)
                back_steps = 2; // 2
            if (back_start_x == back_goal_x && back_start_y == back_goal_y && back_start_z == back_goal_z)
            {
                back_steps = 1;
            }
            int prev_shoulder = 0, prev_elbow = 0, prev_gripper = 0;
            for (int k = 0; k < back_steps; ++k)
            {
                if (total_steps >= max_total_steps)
                    break;
                float ratio = (back_steps == 1) ? 1.0f : (float)k / (back_steps - 1);
                int s = (int)(back_start_x + (back_goal_x - back_start_x) * ratio);
                int e = (int)(back_start_y + (back_goal_y - back_start_y) * ratio);
                int g = (int)(back_start_z + (back_goal_z - back_start_z) * ratio);
                shoulder_angles[total_steps] = s;
                elbow_angles[total_steps] = e;
                gripper_angles[total_steps] = g;
                timeouts[total_steps] = (k == back_steps - 1) ? 100 : 20;
                //
                if (k == back_steps - 2)
                {
                    prev_shoulder = s;
                    prev_elbow = e;
                    prev_gripper = g;
                }
                total_steps++;
            }
            // current_x
            if (total_steps > 0)
            {
                shoulder_angles[total_steps - 1] = ARM_INIT_SHOULDER;
                elbow_angles[total_steps - 1] = ARM_INIT_ELBOW;
                gripper_angles[total_steps - 1] = ARM_INIT_GRIPPER;
                timeouts[total_steps - 1] = 100;
                // Serial
                Serial.printf("[BackToInit] Last change: (%d,%d,%d) -> (%d,%d,%d)\n", prev_shoulder, prev_elbow, prev_gripper, ARM_INIT_SHOULDER, ARM_INIT_ELBOW, ARM_INIT_GRIPPER);
                //
                Serial.printf("[BackToInit] Actual before: (%d,%d,%d)\n", arm.getCurrentShoulder(), arm.getCurrentElbow(), arm.getCurrentGripper());
                //
                current_x = ARM_INIT_SHOULDER;
                current_y = ARM_INIT_ELBOW;
                current_z = ARM_INIT_GRIPPER;
            }
        }

        //
        if (!emergency)
        {
            // Serial
            if (total_steps > 0)
            {
                Serial.printf("[ArbQ] Last step: (%d,%d,%d)\n", shoulder_angles[total_steps - 1], elbow_angles[total_steps - 1], gripper_angles[total_steps - 1]);
            }
            arm.arbitrary(shoulder_angles, elbow_angles, gripper_angles, timeouts, total_steps);
            arm_enabled = true;
            M5.Lcd.setCursor(0, 100);
            M5.Lcd.setTextColor(GREEN, BLACK);
            M5.Lcd.print("ARM MOVING...");
        }
    }

    // B
    if (M5.BtnB.wasPressed())
    {
        M5.Lcd.fillRect(0, 100, 320, 140, BLACK); //
        arm_enabled = false;                      //
        arm.neutral();                            //
        M5.Lcd.setCursor(0, 100);
        M5.Lcd.setTextColor(RED, BLACK);
        M5.Lcd.print("EMERGENCY STOP");
    }

    //
    if (arm_enabled)
    {
        //
        M5.Lcd.setCursor(0, 160);
        M5.Lcd.setTextColor(WHITE, BLACK);
        M5.Lcd.printf("cur: %3d,%3d,%3d", arm.getCurrentShoulder(), arm.getCurrentElbow(), arm.getCurrentGripper());

        bool done = arm.action();
        // :
        Serial.printf("[Action] Actual: (%d,%d,%d)\n", arm.getCurrentShoulder(), arm.getCurrentElbow(), arm.getCurrentGripper());
        Serial.printf("[Action] current_x,y,z: (%d,%d,%d)\n", current_x, current_y, current_z);
        Serial.printf("[Action] Target: (%d,%d,%d)\n", arm.getTargetShoulder(), arm.getTargetElbow(), arm.getTargetGripper());
        Serial.printf("[Action] QueueIndex: %d / %d\n", arm.getActionIndex(), arm.getActionCount());
        //
        Serial.printf("[Action] Queue head: (%d,%d,%d)\n", arm.getTargetShoulder(), arm.getTargetElbow(), arm.getTargetGripper());
        Serial.printf("[Action] Queue tail: (%d,%d,%d)\n", arm.getCurrentShoulder(), arm.getCurrentElbow(), arm.getCurrentGripper());

        //
        if (done && arm.getCurrentShoulder() == ARM_INIT_SHOULDER && arm.getCurrentElbow() == ARM_INIT_ELBOW && arm.getCurrentGripper() == ARM_INIT_GRIPPER)
        {
            arm_enabled = false;
            M5.Lcd.setCursor(0, 100);
            M5.Lcd.setTextColor(YELLOW, BLACK);
            M5.Lcd.print("ARM DONE      ");
        }
    }
}

Program for operating a robotic arm(arm-controller.h)

C/C++
PlatformIO
/*
ArmController

- SG90
- 50Hz -> 20msec
- -90deg: 0.5msec = 6.4 / 256
- 0deg: 1.45msec = 18.56 / 256
- 90deg: 2.4msec = 30.72 / 256

*/
#include <M5Unified.h>

class ArmController {
public:
    static const uint16_t MAX_ACTIONS = 1000;
    double PWM_FREQUENCY = 50;
    double PWM_RESOLUTION = 8;
    uint8_t SHOULDER_CHANNEL = 0;
    uint8_t ELBOW_CHANNEL = 1;
    uint8_t GRIPPER_CHANNEL = 2;

    int getActionIndex() const { return _action_index; }
    int getActionCount() const { return _action_count; }
    int getTargetShoulder() const {
    if (_action_index == 0) return _shoulder_action_queue[0];
    return _shoulder_action_queue[_action_index - 1];
    }
    int getTargetElbow() const {
    if (_action_index == 0) return _elbow_action_queue[0];
    return _elbow_action_queue[_action_index - 1];
    }
    int getTargetGripper() const {
    if (_action_index == 0) return _gripper_action_queue[0];
    return _gripper_action_queue[_action_index - 1];
    }

    ArmController(uint8_t pin_shoulder, uint8_t pin_elbow, uint8_t pin_gripper)
    {
        _pin_shoulder = pin_shoulder;
        _pin_elbow = pin_elbow;
        _pin_gripper = pin_gripper;
        _pwm_resolution_pulse = pow(2, PWM_RESOLUTION);
    }
    void begin()
    {
        ledcSetup(SHOULDER_CHANNEL, PWM_FREQUENCY, PWM_RESOLUTION);
        ledcSetup(ELBOW_CHANNEL, PWM_FREQUENCY, PWM_RESOLUTION);
        ledcSetup(GRIPPER_CHANNEL, PWM_FREQUENCY, PWM_RESOLUTION);
        ledcAttachPin(_pin_shoulder, SHOULDER_CHANNEL);
        ledcAttachPin(_pin_elbow, ELBOW_CHANNEL);
        ledcAttachPin(_pin_gripper, GRIPPER_CHANNEL);
    }
    void arbitrary(int shoulder_angles[], int elbow_angles[], int gripper_angles[], int timeouts[], int action_num)
    {
        unsigned long now = millis();
        unsigned long cumulative_timeout = 0;
        _action_count = action_num > MAX_ACTIONS ? MAX_ACTIONS : action_num;
        for (uint8_t i = 0; i < _action_count; i++)
        {
            _shoulder_action_queue[i] = shoulder_angles[i];
            _elbow_action_queue[i] = elbow_angles[i];
            _gripper_action_queue[i] = gripper_angles[i];
            cumulative_timeout += timeouts[i];
            _action_timeout_queue[i] = now + cumulative_timeout;
        }
        _action_index = 0;
    }
    void neutral()
    {
        unsigned long now = millis();
        int shoulder_angles[1] = {0};
        int elbow_angles[1]    = {0};
        int gripper_angles[1]  = {0};
        int timeouts[1]        = {100};
        arbitrary(shoulder_angles, elbow_angles, gripper_angles, timeouts, 1);
    }
    bool action()
    {
        if (_action_index >= _action_count)
        {
            return true;
        }
        if (millis() >= _action_timeout_queue[_action_index])
        {
            ledcWrite(SHOULDER_CHANNEL, _angle2pulse(_shoulder_action_queue[_action_index]));
            ledcWrite(ELBOW_CHANNEL, _angle2pulse(_elbow_action_queue[_action_index]));
            ledcWrite(GRIPPER_CHANNEL, _angle2pulse(_gripper_action_queue[_action_index]));
            _action_index++;
        }
        return false;
    }
    int getCurrentShoulder() {
        if (_action_index < _action_count) return _shoulder_action_queue[_action_index];
        return _shoulder_action_queue[_action_count > 0 ? _action_count - 1 : 0];
    }
    int getCurrentElbow() {
        if (_action_index < _action_count) return _elbow_action_queue[_action_index];
        return _elbow_action_queue[_action_count > 0 ? _action_count - 1 : 0];
    }
    int getCurrentGripper() {
        if (_action_index < _action_count) return _gripper_action_queue[_action_index];
        return _gripper_action_queue[_action_count > 0 ? _action_count - 1 : 0];
    }

private:
    uint8_t _pin_shoulder = 19;
    uint8_t _pin_elbow = 5;
    uint8_t _pin_gripper = 22;
    uint32_t _pwm_resolution_pulse = 256;
    int32_t _shoulder_action_queue[MAX_ACTIONS] = {0};
    int32_t _elbow_action_queue[MAX_ACTIONS] = {0};
    int32_t _gripper_action_queue[MAX_ACTIONS] = {0};
    uint32_t _action_timeout_queue[MAX_ACTIONS] = {0};
    uint8_t _action_index = 0;
    uint8_t _action_count = 0;

    uint32_t _angle2pulse(int angle)
    {
        return (uint32_t)((0.0105555555 * (angle + 90) + 0.5) / 20.0 * _pwm_resolution_pulse);
    }
};

Program for object detection (start.py)

Python
Serial communication
import serial
import time
import traceback
import subprocess
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("-d", "--debug", action="store_true", help="Whether to enable debug print.")
parser.add_argument("-s", "--streaming", action="store_true", help="Enable live-streaming.")

args = parser.parse_args()

ser = serial.Serial("/dev/ttyS1", 115200, timeout=0.1)

if args.debug:
    print("Debug Print enabled.")
if args.streaming:
    print("Streaming enabled.")

try:
    with subprocess.Popen(["./camera", "0", "--on-demand", "--debug" if args.debug else "", "--streaming" if args.streaming else ""], stdout=subprocess.PIPE, stderr=subprocess.PIPE) as proc:
        while True:

            # waiting for serial command
            while True:
                try:
                    data = ser.readline().decode("utf-8")
                    if not data.startswith("START,"):
                        continue
                    print(f"serial>> {data}")
                    on_demand = f"{data.split(',')[1]}\n"
                    fo = open("/home/m5stack/on_demand.txt", "w")
                    fo.write(on_demand)
                    fo.close()
                    break
                except Exception:
                    traceback.print_exc()

            # Read lines from the camera process
            while True:
                line = proc.stdout.readline().decode("utf-8").replace(" ", "").encode("utf-8")
                print(f"camera>> {line.decode('utf-8')}")
                # send to serial
                ser.write(line + b"\n")
                ser.flush()
                time.sleep(0.1)
                if b"END" in line:
                    break


except Exception:
    traceback.print_exc()

ser.close()

Program for object detection (camera.cpp)

C/C++
capture an image and inference
#include <unistd.h>
#include "opencv2/opencv.hpp"
#include "iostream"
#include "opencv2/videoio/videoio_c.h"
#include "nadjieb/mjpeg_streamer.hpp"

#include <iostream>
#include <fstream>
#include <cstdio>

#include <time.h>
#include "../tensorflow-lite/tensorflow/lite/interpreter.h"
#include "../tensorflow-lite/tensorflow/lite/kernels/register.h"
#include "../tensorflow-lite/tensorflow/lite/model.h"
#include "get_result.h"

#define CLASSIFIER_INPUT_WIDTH 320 // input image size
#define CLASSIFIER_INPUT_HEIGHT 320 // input image size
#define INFERENCE_INTERVAL_MS 200 // inference interval
#define CONF_THRESH 0.5f // confidence threshold

using MJPEGStreamer = nadjieb::MJPEGStreamer;
static bool use_debug = false;
static bool use_streaming = false;
static bool on_demand = false;
static const char* CLASS_NAMES[] = { "driver", "nipper", "needle-nose plier", "wire stripper" };
static const char* MODEL_PATH = "yolov8n.tflite"; // model file name

/*
 * Returns the current time in milliseconds
 */
long long get_current_time() {
    struct timespec ts;
    clock_gettime(CLOCK_REALTIME, &ts);
    return ts.tv_sec * 1000LL + ts.tv_nsec / 1000000;
}

/*
 * set image to input tensor
 */
void preprocess(const cv::Mat& src, float* dst) {
    cv::Mat resized, rgb, float_img;
    cv::resize(src, resized, cv::Size(CLASSIFIER_INPUT_WIDTH, CLASSIFIER_INPUT_HEIGHT));
    cv::cvtColor(resized, rgb, cv::COLOR_BGR2RGB);
    rgb.convertTo(float_img, CV_32FC3, 1.0 / 255.0);
    std::memcpy(dst, float_img.data, sizeof(float) * CLASSIFIER_INPUT_WIDTH * CLASSIFIER_INPUT_HEIGHT * 3);
}

int main(int argc, char** argv) {

    printf("OPENCV VERSION: %d_%d\n", CV_MAJOR_VERSION, CV_MINOR_VERSION);

    if (argc < 2) {
        printf("Requires one parameter (ID of the webcam).\n");
        printf("You can find these via `v4l2-ctl --list-devices`.\n");
        printf("E.g. for:\n");
        printf("    C922 Pro Stream Webcam (usb-70090000.xusb-2.1):\n");
	    printf("            /dev/video0\n");
        printf("The ID of the webcam is 0\n");
        exit(1);
    }

    for (int ix = 2; ix < argc; ix++) {
        if (strcmp(argv[ix], "--debug") == 0) {
            printf("Enabling debug mode\n");
            use_debug = true;
        }
        if (strcmp(argv[ix], "--streaming") == 0) {
            printf("Enabling streaming mode\n");
            use_streaming = true;
        }
        if (strcmp(argv[ix], "--on-demand") == 0) {
            printf("Enabling on-demand mode\n");
            on_demand = true;
        }
    }

    MJPEGStreamer streamer;
    streamer.start(80);
    std::vector<int> params = {cv::IMWRITE_JPEG_QUALITY, 90};

    // open the camera...
    cv::VideoCapture camera(atoi(argv[1]));
    if (!camera.isOpened()) {
        std::cerr << "ERROR: Could not open camera" << std::endl;
        return 1;
    } 

    std::cout << "Resolution: " <<  camera.get(CV_CAP_PROP_FRAME_WIDTH) 
	    << "x" << camera.get(CV_CAP_PROP_FRAME_HEIGHT) << std::endl;

    // tensorflow ---------------------------------------------
    // read classifier model
    auto model = tflite::FlatBufferModel::BuildFromFile(MODEL_PATH);
    if (!model) {
        std::cerr << "ERROR: failed to load model." << std::endl;
        return -1;
    }

    // create interpreter
    tflite::ops::builtin::BuiltinOpResolver resolver;
    std::unique_ptr<tflite::Interpreter> interpreter;
    tflite::InterpreterBuilder(*model, resolver)(&interpreter);

    // allocate memory
    interpreter->AllocateTensors();

    // get input tensor information
    int input = interpreter->inputs()[0];
    TfLiteIntArray* dims = interpreter->tensor(input)->dims;
    int height = dims->data[1]; // CLASSIFIER_INPUT_HEIGHT
    int width = dims->data[2]; // CLASSIFIER_INPUT_WIDTH
    int channels = dims->data[3];
    std::cout << "Input tensor shape: " << height << "x" << width << "x" << channels << std::endl;

    // ------------------------------------------------------------

    // this will contain the image from the webcam
    cv::Mat frame;
    int64_t next_frame = (int64_t)(get_current_time());
    float score_thresh = CONF_THRESH;
    std::vector<Detection> detections;
    int64_t t0;

    // display the frame until you press a key
    while (1) {
        // 1000ms. between inference
        next_frame = (int64_t)((int64_t)(get_current_time()) + INFERENCE_INTERVAL_MS);

        // capture the next frame from the webcam
        camera >> frame;

        if (on_demand) {
            // 
            const char*filename = "on_demand.txt";
            std::ifstream file(filename);
            if (file.is_open()) {
                std::string line;
                std::getline(file, line);
                score_thresh = std::stof(line);
                file.close();
                std::remove(filename);
                // printf("on_demand.txt exists: score_thresh=%f\n", score_thresh);
            } else {
                goto jump_publish;
            }
        }

        { // block

            // set image to input tensor
            float* input_data = interpreter->typed_tensor<float>(input);
            preprocess(frame, input_data);

            // inference
            t0 = (int64_t)(get_current_time());
            interpreter->Invoke();
            // printf("Inferenence time %lldms\n", (int64_t)(get_current_time()) - t0);

            // get output tensor
            int output_index = interpreter->outputs()[0];
            TfLiteTensor* output_tensor = interpreter->tensor(output_index);
            const float* output = output_tensor->data.f;
            int num_classes = output_tensor->dims->data[1] - 4; //  = data[1] - BB4
            int num_proposals = output_tensor->dims->data[2]; // BB: 
            // printf("num_classes: %d, num_proposals: %d\n", num_classes, num_proposals);

            // 
            detections = get_result(
                frame, 
                width, 
                height, 
                output, 
                num_classes, 
                num_proposals,
                score_thresh,
                use_debug
            );

            // 
            printf("RESP,%lld,%d,%f\n", t0, detections.size(), score_thresh);
            for (int k = 0; k < detections.size(); k++) {
                Detection detection = detections[k];
                cv::Rect bb = detection.box;
                printf("%s,%f,%d,%d,%d,%d\n", 
                    CLASS_NAMES[detection.class_id], 
                    detection.confidence, 
                    bb.x, 
                    bb.y, 
                    bb.width, 
                    bb.height
                );
            }
            printf("END\n");
            fflush(stdout);
        }

jump_publish:

        // draw bounding box on frame
        for (int k = 0; k < detections.size(); k++) {
            Detection detection = detections[k];
            cv::Rect bb = detection.box;
            cv::rectangle(frame, cv::Point(bb.x, bb.y), cv::Point(bb.x + bb.width, bb.y + bb.height), cv::Scalar(255, 0, 255), 2);
            cv::putText(frame, CLASS_NAMES[detection.class_id], cv::Point(bb.x, bb.y-5), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(255, 0, 255), 1);
            cv::putText(frame, std::to_string(detection.confidence), cv::Point(bb.x, bb.y-15), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(255, 0, 255), 1);
        }
        
        // show the image on the window
        if (use_streaming) {
            if (streamer.isRunning()) {
                std::vector<uchar> buff_bgr;
                cv::imencode(".jpg", frame, buff_bgr, params);
                streamer.publish("/stream", std::string(buff_bgr.begin(), buff_bgr.end()));
            } else {
                printf("streamer is not alive!\n");
            }
        }

    }

    streamer.stop();

    return 0;
}

Program for object detection (get_result.h)

C/C++
helper functions
#include "opencv2/opencv.hpp"

const float NMS_THRESH = 0.5f;

struct Detection {
    cv::Rect box;       // (x, y, width, height)
    float confidence;
    int class_id;
};

// IOUIntersection over Union
float iou(const cv::Rect2f& a, const cv::Rect2f& b) {
    float inter = (a & b).area();
    float union_ = a.area() + b.area() - inter;
    return inter / union_;
}

/*
 * m: num_classes
 * n: num_proposals
 * output tensor = [[
 *  [cx_0, cx_1, ..., cx_n-1],
 *  [cy_0, cy_1, ..., cy_n-1],
 *  [w_0, w_1, ..., w_n-1],
 *  [h_0, h_1, ..., h_n-1],
 *  [p_0_0, p_1_0, ..., p_m_0],
 *  :
 *  [p_0_m-1, p_1_m-1, ..., p_m-1_n-1]
 * ]]
*/

std::vector<Detection> get_result(
    cv::Mat frame,
    const int input_width,
    const int input_height,
    const float* output,
    int num_classes,
    int num_proposals,
    const float score_thresh,
    bool use_debug = false
) {

    // temporary vector for NMS
    std::vector<cv::Rect> boxes;
    std::vector<float> confidences;
    std::vector<int> class_ids;

    // image size to input size ratio
    float x_factor = frame.cols / (input_width * 1.0f);
    float y_factor = frame.rows / (input_height * 1.0f);
    // printf("x_factor: %f, y_factor: %f\n", x_factor, y_factor);
    
    for (int i = 0; i < num_proposals; ++i) {
        const float* data = output + (4 * num_proposals) + i;

        // find the class with the highest confidence score
        float max_score = 0.0f;
        int best_class_id = -1;
        for (int j = 0; j < num_classes; ++j) {
            if (data[j * num_proposals] > max_score) {
                max_score = data[j * num_proposals];
                best_class_id = j;
            }
        }

        if (max_score < score_thresh) continue;

        // get the bounding box (cx, cy, w, h)
        float cx = output[0 * num_proposals + i];
        float cy = output[1 * num_proposals + i];
        float w = output[2 * num_proposals + i];
        float h = output[3 * num_proposals + i];
        // printf("cx: %f, cy: %f, w: %f, h: %f\n", cx, cy, w, h);

        // (cx, cy, w, h) to (x, y, w, h)
        int left = static_cast<int>((cx - 0.5 * w) * x_factor * input_width);
        int top = static_cast<int>((cy - 0.5 * h) * y_factor * input_height);
        int width = static_cast<int>(w * x_factor * input_width);
        int height = static_cast<int>(h * y_factor * input_height);

        boxes.emplace_back(left, top, width, height);
        confidences.push_back(max_score);
        class_ids.push_back(best_class_id);

        if (use_debug) printf("class=%d, score=%f, bb=[%d, %d, %d, %d]\n", best_class_id, max_score, left, top, width, height);
    }

    // NMS(Non-Maximum Suppression) process
    // If the IOU of two BBs is greater than or equal to a threshold, remove the BB with the lowest reliability.
    bool nms[boxes.size()] = {false};
    for (int index = 0; index < boxes.size(); index++) {
        if (nms[index]) continue;
        cv::Rect& a = boxes[index];
        for (int other = index + 1; other < boxes.size(); other++) {
            cv::Rect& b = boxes[other];
            float _iou = iou(a, b);
            if (use_debug) printf("NMS %d vs %d: score=%f vs %f, iou=%f\n", index, other, confidences[index], confidences[other], _iou);
            if (_iou > NMS_THRESH) {
                if (confidences[index] > confidences[other]) {
                    nms[other] = true;
                    if (use_debug) printf("NMS: %d removed\n", other);
                } else {
                    nms[index] = true;
                    if (use_debug) printf("NMS: %d removed\n", index);
                    break;
                }
            }
        }
    }

    std::vector<Detection> detections;

    for (int index = 0; index < boxes.size(); index++) {
        if (nms[index]) continue;
        Detection det;
        det.box = boxes[index];
        det.confidence = confidences[index];
        det.class_id = class_ids[index];
        detections.push_back(det);
    }

    return detections;

}

Program for operating a robotic arm(config.h)

C/C++
PlatformIO
// --- arm initial position ---
#define ARM_INIT_SHOULDER -80   // shoulder initial angle
#define ARM_INIT_ELBOW     -90    // elbow initial angle
#define ARM_INIT_GRIPPER   80  // gripper initial angle
#define ARM_INIT_TIMEOUTS  1500  // gripper release angle

// --- arm motion pattern (difference from initial value) ---
#define ARM_STEP_NUM 5
#pragma once

// --- GPIO ---
#define BUTTON_PIN         15    // button
#define STAND_LIGHT_PIN    21    // stand light
#define ARM1_SHOULDER_PIN  19    // robot arm 1 shoulder
#define ARM2_ELBOW_PIN      5    // robot arm 2 elbow
#define GRIPPER_PIN        22    // gripper

#define CAMERA_SERIAL_BAUDRATE 115200
#define CAMERA_SERIAL_RX_PIN   16    // object recognition camera RX
#define CAMERA_SERIAL_TX_PIN   17    // object recognition camera TX

// tool detection system  arm origin offset (mm)
const int action_num = 5;
static const int ARM_STEP_SHOULDER_DIFF[ARM_STEP_NUM] = {80, 80, 320, 320, 0};   // shoulder each step: difference from initial value
static const int ARM_STEP_ELBOW_DIFF[ARM_STEP_NUM]    = {0, 0, 0, 0, 0};       // elbow each step: difference from initial value
static const int ARM_STEP_GRIPPER_DIFF[ARM_STEP_NUM]  = {0, -130, -130, 0, 0};   // gripper each step: difference from initial value
static const int ARM_STEP_TIMEOUTS[ARM_STEP_NUM]      = {ARM_INIT_TIMEOUTS, ARM_INIT_TIMEOUTS, ARM_INIT_TIMEOUTS, ARM_INIT_TIMEOUTS, ARM_INIT_TIMEOUTS}; // each step wait time (ms)
#define TOOL_OFFSET_X  10.0f
#define TOOL_OFFSET_Y  5.0f
#define TOOL_OFFSET_Z  0.0f

// robot arm lengthmm
#define LINK1_LENGTH   100.0f
#define LINK2_LENGTH   80.0f
#define LINK3_LENGTH   60.0f

// servo angle rangedegree
#define SERVO_BASE_MIN     0.0f
#define SERVO_BASE_MAX   180.0f
#define SERVO_SHOULDER_MIN  0.0f
#define SERVO_SHOULDER_MAX 180.0f
#define SERVO_ELBOW_MIN     0.0f
#define SERVO_ELBOW_MAX   180.0f

// servo pulse widths
#define SERVO_PWM_MIN   500
#define SERVO_PWM_MAX  2500

// working areamm
#define WORKSPACE_X_MIN   0.0f
#define WORKSPACE_X_MAX 200.0f
#define WORKSPACE_Y_MIN   0.0f
#define WORKSPACE_Y_MAX 200.0f
#define WORKSPACE_Z_MIN   0.0f
#define WORKSPACE_Z_MAX 200.0f

// arm 
#define PLACE_X0 100.0f
#define PLACE_Y0 100.0f

// tool type specific placement coordinates (mm)
#define PLACE_X_WRENCH      150.0f
#define PLACE_Y_WRENCH       50.0f
#define PLACE_Z_WRENCH        0.0f

#define PLACE_X_SCREWDRIVER 170.0f
#define PLACE_Y_SCREWDRIVER  60.0f
#define PLACE_Z_SCREWDRIVER   0.0f

#define AI_CONFIDENCE_THRESHOLD 0.7f

// tool specific gripping height (mm)
#define GRIPPER_Z_WRENCH      20.0f
#define GRIPPER_Z_SCREWDRIVER 15.0f

// --- main.cpp ---
// LCD
#define LCD_TEXTSIZE 2
#define LCD_STATUS_Y 120
#define LCD_WIFI_Y   30 // unused
#define LCD_TIME_Y   60 // unused
#define LCD_EMGSTOP_Y 95
#define LCD_ACTIONTIME_Y 150
#define LCD_IO_Y 180
#define LCD_STATUS_WIDTH 320
#define LCD_STATUS_HEIGHT 20
#define LCD_ACTIONTIME_WIDTH 240
#define LCD_ACTIONTIME_HEIGHT 20

// IO pin assignments
#define START_PIN 36
#define STOP_PIN 39
#define EMG_PIN 34

// AB simultaneous press enable/disable toggle time (ms)
#define AB_ENABLE_MIN 2000
#define AB_ENABLE_MAX 5000
#define AB_DISABLE_MIN 5000

Credits

TD_ICT
2 projects • 0 followers
We are a team developing IoT devices and related products in Japan.

Comments