Published June 13, 2025 © MIT

Lesson 10: Face Following with BallyBot's Motors

Program your BallyBot to autonomously track faces using motors. Open doors to innovative applications in interaction and observation.

AdvancedProtip2 hours74

Lesson 10: Face Following with BallyBot's Motors

Things used in this project

Hardware components

BallyBot

FTDI TYPE-C USB

USB Cable, USB Type C Plug

Software apps and online services

Arduino IDE

Story

Now that your BallyBot can detect faces, it's time to take it to the next level! Imagine you're at a mall, and you want your robot to draw passerby. With face following, your BallyBot can track and follow a person's face, creating a more engaging experience.

In this lesson, you'll learn how to program your BallyBot to follow a face using its motors. This will enable your robot to pivot to keep faces centered in view, making it perfect for applications like security, surveillance, or even social robotics.

By combining face detection with motor control, you'll create a more dynamic robot that can respond to its environment. Get ready to bring your BallyBot to life!

Step 0: Starting Code

Begin with your working code from Lesson 9: Make ESP32 React to Face Detection. This already has code to detect faces offline that we can build on.

#include "esp_camera.h"
#include "human_face_detect_mnp01.hpp"
#include "human_face_detect_msr01.hpp"

#define CAMERA_MODEL_AI_THINKER
#define PWDN_GPIO_NUM 32
#define RESET_GPIO_NUM -1
#define XCLK_GPIO_NUM 0
#define SIOD_GPIO_NUM 26
#define SIOC_GPIO_NUM 27

#define Y9_GPIO_NUM 35
#define Y8_GPIO_NUM 34
#define Y7_GPIO_NUM 39
#define Y6_GPIO_NUM 36
#define Y5_GPIO_NUM 21
#define Y4_GPIO_NUM 19
#define Y3_GPIO_NUM 18
#define Y2_GPIO_NUM 5
#define VSYNC_GPIO_NUM 25
#define HREF_GPIO_NUM 23
#define PCLK_GPIO_NUM 22

#define LED_GPIO_NUM 4  // Assuming LED is connected to GPIO 4

HumanFaceDetectMSR01 s1(0.1F, 0.5F, 10, 0.2F);
HumanFaceDetectMNP01 s2(0.5F, 0.3F, 5);

void setup() {
  Serial.begin(115200);
  Serial.setDebugOutput(true);

  pinMode(LED_GPIO_NUM, OUTPUT);
  digitalWrite(LED_GPIO_NUM, LOW);

  CameraSetup();
}

void loop() {
  camera_fb_t *fb = esp_camera_fb_get();
  if (!fb) {
    Serial.println("Camera capture failed");
    return;
  }

  std::list<dl::detect::result_t> results =
      s1.infer((uint16_t *)fb->buf, {(int)fb->height, (int)fb->width, 3});
  results = s2.infer((uint16_t *)fb->buf, {(int)fb->height, (int)fb->width, 3},
                     results);

  if (!results.empty()) {
    Serial.println("Face detected!");
    digitalWrite(LED_GPIO_NUM, HIGH);

  } else {
    digitalWrite(LED_GPIO_NUM, LOW);
  }

  esp_camera_fb_return(fb);
  delay(50);
}

void CameraSetup() {
  camera_config_t config;
  config.ledc_channel = LEDC_CHANNEL_0;
  config.ledc_timer = LEDC_TIMER_0;
  config.pin_d0 = Y2_GPIO_NUM;
  config.pin_d1 = Y3_GPIO_NUM;
  config.pin_d2 = Y4_GPIO_NUM;
  config.pin_d3 = Y5_GPIO_NUM;
  config.pin_d4 = Y6_GPIO_NUM;
  config.pin_d5 = Y7_GPIO_NUM;
  config.pin_d6 = Y8_GPIO_NUM;
  config.pin_d7 = Y9_GPIO_NUM;
  config.pin_xclk = XCLK_GPIO_NUM;
  config.pin_pclk = PCLK_GPIO_NUM;
  config.pin_vsync = VSYNC_GPIO_NUM;
  config.pin_href = HREF_GPIO_NUM;
  config.pin_sscb_sda = SIOD_GPIO_NUM;
  config.pin_sscb_scl = SIOC_GPIO_NUM;
  config.pin_pwdn = PWDN_GPIO_NUM;
  config.pin_reset = RESET_GPIO_NUM;
  config.xclk_freq_hz = 10000000;
  config.pixel_format = PIXFORMAT_RGB565;
  /* init with high specs to pre-allocate larger buffers */
  config.frame_size = FRAMESIZE_QVGA;
  config.jpeg_quality = 40;
  config.fb_count = 2;
  /* camera init */
  esp_err_t err = esp_camera_init(&config);
  if (err != ESP_OK) {
    Serial.printf("Camera init failed with error 0x%x", err);
    return;
  }
}

Step 1: Find Face Center

Our first goal to get access to the face objects (result_t) that our inference command s1.infer() returns. We normally get them returned as a std::list of the result_t.

results being defined as a list of result_t types

Get access to the individual face objects by looping through the faces in results.

if (!results.empty()) {
  Serial.println("Face detected!");
  digitalWrite(LED_GPIO_NUM, HIGH);

  for (auto &face : results) {
    
  }  
}else{
  digitalWrite(LED_GPIO_NUM, LOW);
}

Now let's take a small detour to really understand what these face objects are:

We must find the face coordinates of the result_t face object returned from the face detection command s1.infer()

So we need to find out what exactly does the type dl::detect::result_t consist of. To do this we can ctrl+click on it and it will show this:

So given this info it seems like result_t.box is our most intuitive way to find the face center. All we need to do is average the left_up.x and right_down.x of the box vector.

Now let's go back to our code:

Create a new variables for each of the box's attributes x, y, w, h:

if (!results.empty()) {
  Serial.println("Face detected!");
  digitalWrite(LED_GPIO_NUM, HIGH);

  for (auto &face : results) {
    int x = face.box[0];
    int y = face.box[1];
    int w = face.box[2] - x;
    int h = face.box[3] - y;

  }  
}else{
  digitalWrite(LED_GPIO_NUM, LOW);
}

Add a new variable called xMid for the x coordinate of the face center. We can place it in our loop just after the box is set up:

if (!results.empty()) {
  Serial.println("Face detected!");
  digitalWrite(LED_GPIO_NUM, HIGH);

  for (auto &face : results) {
    int x = face.box[0];
    int y = face.box[1];
    int w = face.box[2] - x;
    int h = face.box[3] - y;

    int xMid = x + (w / 2);
  }  
}else{
  digitalWrite(LED_GPIO_NUM, LOW);
}

Print out the value of the face center xMid:

if (!results.empty()) {
  Serial.println("Face detected!");
  digitalWrite(LED_GPIO_NUM, HIGH);

  for (auto &face : results) {
    int x = face.box[0];
    int y = face.box[1];
    int w = face.box[2] - x;
    int h = face.box[3] - y;

    int xMid = x + (w / 2);
    Serial.println(xMid);
  }  
}else{
  digitalWrite(LED_GPIO_NUM, LOW);
}

We now have tracking of the face on the x axis. That is all we are going to need to track the face with wheels.

Step 2: Get Left Right of Face

We have the coordinates of the face, but how do we know if it is leaning left of right? If your face is dead center the the camera it wouldn't be obvious, the number is the resolution width/2.

To make it easier to work with we will convert it so that you face being centered is 0 left is - and right is +.

Before iterating through the faces create a variable rfbwidth to hold the width:

...
if (!results.empty()) {
  int rfbwidth = fb->width;
  Serial.println("Face detected!");
  digitalWrite(LED_GPIO_NUM, HIGH);

  ...

Then create some useful variables:

xOff for the new middle
xDist for any distance left or right from middle

if (!results.empty()) {
  int rfbwidth = fb->width;
  Serial.println("Face detected!");
  digitalWrite(LED_GPIO_NUM, HIGH);

  for (auto &face : results) {
    int x = face.box[0];
    int y = face.box[1];
    int w = face.box[2] - x;
    int h = face.box[3] - y;

    int xMid = x + (w / 2);
    int xOff = rfbwidth/2-xMid;
    int xdist = abs(xOff);

    Serial.println(xOff);
  }  
}else{
  digitalWrite(LED_GPIO_NUM, LOW);
}

Also print out the value of the face direction, you will see it goes between the positive for left and negative right.

We now have everything we need to know about the faces: Is it left or right of camera? And how far away is it from the center of the camera?

Step 3: Connect Motors

To connect the motors there are 3 main parts:

Set up the motor pins
Turning the motors based on the face
Adjusting motor power based on the distance from center

To set up the motor pins we will be using the function sigmaDelta. It is better to use than digitalWrite because it uses fractions of power in the motor instead of completely on or off.

Add this to setup function, initializing the motor pins:

void setup() {
  Serial.begin(115200);
  Serial.setDebugOutput(true);
  pinMode(14, OUTPUT);            /* must set pin as it is on by default */
  sigmaDeltaSetup(4, 0, 312500);  // LED
  sigmaDeltaSetup(12, 1, 1220);   // motor right
  sigmaDeltaSetup(2, 2, 1220);    // motor left
  sigmaDeltaWrite(0, 100);        
  sigmaDeltaWrite(1, 0);  // .6 is as low as can go without stalling
  sigmaDeltaWrite(2, 0);

  CameraSetup();
}

Notice this code also added sigmaDeltaSetup to the LED pin 4. This will give us some practice to help understand sigmaDelta.

We will replace the LED's usage of digitalWrite to make it work this sigmaDelta:

...
if (!results.empty()) {
  int rfbwidth = fb->width;
  Serial.println("Face detected!");
  sigmaDeltaWrite(0, 30);                // <---- LED change

  for (auto &face : results) {
    int x = face.box[0];
    int y = face.box[1];
    int w = face.box[2] - x;
    int h = face.box[3] - y;

    int xMid = x + (w / 2);
    int xOff = rfbwidth/2-xMid;
    int xdist = abs(xOff);

    Serial.println(xOff);
  }  
}else{
  sigmaDeltaWrite(0, 0);                // <---- LED change
}
...

The first input of sigmaDeltaWrite is the channel of the LED. You might think it'd be 4 like the pin, but when we called sigmaDeltaSetup(4, 0, 312500); we are basically assigning pin 4 to the 0 channel. The second input is just the amount of power from a range of 0-255.

The next part is to add this for motor controls so add an if statement if xOff is positive or negative and then call sigmaDelta to power the motors.

...
if (!results.empty()) {
  int rfbwidth = fb->width;
  Serial.println("Face detected!");
  sigmaDeltaWrite(0, 30);

  for (auto &face : results) {
    int x = face.box[0];
    int y = face.box[1];
    int w = face.box[2] - x;
    int h = face.box[3] - y;

    int xMid = x + (w / 2);
    int xOff = rfbwidth/2-xMid;
    int xdist = abs(xOff);

    Serial.println(xOff);
    
    if (xOff>0){
      sigmaDeltaWrite(1, 150);
    }else{
      sigmaDeltaWrite(2, 150);
    }
  }  
}else{
  digitalWrite(LED_GPIO_NUM, LOW);
  sigmaDeltaWrite(0, 0);
  sigmaDeltaWrite(1, 0);
  sigmaDeltaWrite(2, 0);
}
...

This code will now work, but it is not ideal. You will turn the same speed to you are and inch away from the camera center or 2 feet away. So this last part is to make it so that it turns based on how far away your face is from the camera.

Step 4: Smooth face tracking

To make the motors more proportional to the distance of your face, you will need to use the distance to decide motor power.

Here is the first approach:

sigmaDeltaWrite(1, xDist);

But this is proportional, but we are directly using the pixels of the face from the center as the number. If you have a different camera resolution it will make the motors move differently, so overall not ideal. What we want is to map the proportions of your face to the edge of the camera onto motors in the range of 0-255. Fortunate the Arduino IDE comes with the map() command that will list us do just that.

Here is how its inputs work:

map(number, old1min, old1max, new2min, new2max)

So in our case it will look like:

sigmaDeltaWrite(1, map(xdist,0,rfbwidth/2,0,255));

Now lets place that into the context of our loop:

...
if (!results.empty()) {
  int rfbwidth = fb->width;
  Serial.println("Face detected!");
  sigmaDeltaWrite(0, 30);

  for (auto &face : results) {
    int x = face.box[0];
    int y = face.box[1];
    int w = face.box[2] - x;
    int h = face.box[3] - y;

    int xMid = x + (w / 2);
    int xOff = rfbwidth/2-xMid;
    int xdist = abs(xOff);

    Serial.println(xOff);
    
    if (xOff>0){
      sigmaDeltaWrite(1, map(xdist,0,rfbwidth/2,0,255));
    }else{
      sigmaDeltaWrite(2, map(xdist,0,rfbwidth/2,0,255));    
    }
  }  
}else{
  digitalWrite(LED_GPIO_NUM, LOW);
  sigmaDeltaWrite(0, 0);
  sigmaDeltaWrite(1, 0);
  sigmaDeltaWrite(2, 0);
}
...

Motors still won't move however there is actually a dead zone. To move the motors and will not even start to move until you reach 100 power. So we must change the min range of the motor from 0-255 to 100-255 as 100 is the real number where the motors will start to move.

Step 6: Upload & Test

This section is assuming you are using the BallyBot's breakout Board to upload:

Connect FTDI uploader
Make sure Arduino IDE has AI Thinker ESP32-CAM board selected
Select the correct port for the ESP32-CAM
Click the Upload Button
Turn the BallyBot upload switch up and then turn the power switch off -> on
Once Arduino IDE finished upload turn BallyBot upload switch down and power off -> on

Troubleshooting:

human_face_detect_mnp01.hpp not found:

This can happen if you downloaded a version of esp32 that is (>3.0.7 and <3.2.0). To fix this download the older version of the esp32 board manager 3.0.7

Conclusion:

Your BallyBot now actively follows faces! This breakthrough enables:

Social robotics that engage with people
Intelligent surveillance tracking
Interactive competition entries

Resources

Previous Lesson: lesson-9-make-esp32-react-to-face-detection
Full Code for Lesson 10

This is that last lesson of the BallyBots course, but it is not the end! There are many more interesting projects to be done pushing ideas further with the BallyBot.

Here are some potential ideas to pursue that haven't been tried before:

Surveillance with highlighted moments saved
Person tracking and following with the BallyBot
Facial Recognition from the client side of a video stream
3D reconstruction from the client side of a video stream
Automatic navigation from the client side of a video stream

You can come up with some on your own too. I hope these projects have given you the start to continue to pursue projects of you own with a confident understanding of the basics.

Lesson10-Face_Following_With_BallyBots_Motors

#include "esp_camera.h"
#include "human_face_detect_mnp01.hpp" 
#include "human_face_detect_msr01.hpp" 

#define CAMERA_MODEL_AI_THINKER
#define PWDN_GPIO_NUM 32
#define RESET_GPIO_NUM -1
#define XCLK_GPIO_NUM 0
#define SIOD_GPIO_NUM 26
#define SIOC_GPIO_NUM 27

#define Y9_GPIO_NUM 35
#define Y8_GPIO_NUM 34
#define Y7_GPIO_NUM 39
#define Y6_GPIO_NUM 36
#define Y5_GPIO_NUM 21
#define Y4_GPIO_NUM 19
#define Y3_GPIO_NUM 18
#define Y2_GPIO_NUM 5
#define VSYNC_GPIO_NUM 25
#define HREF_GPIO_NUM 23
#define PCLK_GPIO_NUM 22

// #define LED_GPIO_NUM      4 // Assuming LED is connected to GPIO 4

HumanFaceDetectMSR01 s1(0.1F, 0.5F, 10, 0.2F);
HumanFaceDetectMNP01 s2(0.5F, 0.3F, 5);

void setup() {
  Serial.begin(115200);
  Serial.setDebugOutput(true);
  pinMode(14, OUTPUT); /* must set pin as it is on by default */
  sigmaDeltaSetup(4, 0, 312500); // LED
  sigmaDeltaSetup(12, 1, 1220); // motor right
  sigmaDeltaSetup(2, 2, 1220); // motor left
  sigmaDeltaWrite(0, 100);
  sigmaDeltaWrite(1, 0); // .6 is as low as can go without stalling
  sigmaDeltaWrite(2, 0);

  
  CameraSetup();
}

void loop() {
  camera_fb_t * fb = esp_camera_fb_get();
  if (!fb) {
    Serial.println("Camera capture failed");
    return;
  }

  std::list<dl::detect::result_t> results = 
      s1.infer((uint16_t *)fb->buf, {(int)fb->height, (int)fb->width, 3});
  results = s2.infer((uint16_t *)fb->buf, {(int)fb->height, (int)fb->width, 3}, results);

  int rfbwidth = fb->width;

  
  if (!results.empty()) {
    for (auto &face : results) {
      int x = face.box[0];
      int y = face.box[1];
      int w = face.box[2] - x;
      int h = face.box[3] - y;
    
      int xMid = x + (w / 2);
      int xOff = rfbwidth/2-xMid;
      int xdist = abs(xOff);

      Serial.println(map(xdist,0,rfbwidth/2,0,255));
      sigmaDeltaWrite(0, map(xdist,0,rfbwidth/2,0,90));
      
      sigmaDeltaWrite(xOff>0? 1:2, map(xdist,0,rfbwidth/2,100,255));
    
    }

    Serial.println("Face detected!");
  }else{
    sigmaDeltaWrite(0, 0);
    sigmaDeltaWrite(1, 0);
    sigmaDeltaWrite(2, 0);
  }

  esp_camera_fb_return(fb);
  delay(50); 
}

void CameraSetup() {
  camera_config_t config;
  config.ledc_channel = LEDC_CHANNEL_0;
  config.ledc_timer = LEDC_TIMER_0;
  config.pin_d0 = Y2_GPIO_NUM;
  config.pin_d1 = Y3_GPIO_NUM;
  config.pin_d2 = Y4_GPIO_NUM;
  config.pin_d3 = Y5_GPIO_NUM;
  config.pin_d4 = Y6_GPIO_NUM;
  config.pin_d5 = Y7_GPIO_NUM;
  config.pin_d6 = Y8_GPIO_NUM;
  config.pin_d7 = Y9_GPIO_NUM;
  config.pin_xclk = XCLK_GPIO_NUM;
  config.pin_pclk = PCLK_GPIO_NUM;
  config.pin_vsync = VSYNC_GPIO_NUM;
  config.pin_href = HREF_GPIO_NUM;
  config.pin_sscb_sda = SIOD_GPIO_NUM;
  config.pin_sscb_scl = SIOC_GPIO_NUM;
  config.pin_pwdn = PWDN_GPIO_NUM;
  config.pin_reset = RESET_GPIO_NUM;
  config.xclk_freq_hz = 10000000;
  config.pixel_format = PIXFORMAT_RGB565;
  /* init with high specs to pre-allocate larger buffers */
  config.frame_size = FRAMESIZE_QVGA;
  config.jpeg_quality = 40;
  config.fb_count = 2;
  /* camera init */
  esp_err_t err = esp_camera_init(&config);
  if (err != ESP_OK) {
    Serial.printf("Camera init failed with error 0x%x", err);
    return;
  }
}

Credits

William Sokol

10 projects • 4 followers

Robots and programming