Demo 53: ESP32 Camera as a Webcam for Skype/Zoom video call

1. Introduction

Today I will make a very interseting demo. That is turning your ESP32 Cam into a webcam for Skype/Zoom video call.

This demo only applies for Linux OS. Since it will use v4l2loopback device.

This demo can be applied for deep learning projects with ESP32 Camera. You can apply filter before forward the image to Skype/Zoom. For example I will apply A.I to hide eyes by red circle in this demo.

2. Hardware

- A Linux (Ubuntu) PC

- A ESP32 Camera

3. Software

- Install the software on Linux PC:

+ sudo apt install python3-pip

+ pip3 install --user cv2

+ pip3 install --user numpy

+ pip3 install git+https://github.com/antmicro/python3-v4l2

+ git clone https://github.com/umlaeute/v4l2loopback.git (then make && sudo make install)

From Terminal typing:

sudo modprobe v4l2loopback video_nr=6

This will create virtual /dev/video6 device. You can check it by command:

ls /dev/video*

Skype/Zoom will read /dev/video6 device for images

The communication between ESP32 Cam and PC is TCP Server/Client.

The ESP32 Cam Arduino code:

#include "esp_camera.h"
#include <WiFi.h>

#define PWDN_GPIO_NUM     32
#define RESET_GPIO_NUM    -1
#define XCLK_GPIO_NUM      0
#define SIOD_GPIO_NUM     26
#define SIOC_GPIO_NUM     27

#define Y9_GPIO_NUM       35
#define Y8_GPIO_NUM       34
#define Y7_GPIO_NUM       39
#define Y6_GPIO_NUM       36
#define Y5_GPIO_NUM       21
#define Y4_GPIO_NUM       19
#define Y3_GPIO_NUM       18
#define Y2_GPIO_NUM        5
#define VSYNC_GPIO_NUM    25
#define HREF_GPIO_NUM     23
#define PCLK_GPIO_NUM     22

WiFiServer server(8088);
bool connected = false;
WiFiClient live_client;

void configCamera(){
  camera_config_t config;
  config.ledc_channel = LEDC_CHANNEL_0;
  config.ledc_timer = LEDC_TIMER_0;
  config.pin_d0 = Y2_GPIO_NUM;
  config.pin_d1 = Y3_GPIO_NUM;
  config.pin_d2 = Y4_GPIO_NUM;
  config.pin_d3 = Y5_GPIO_NUM;
  config.pin_d4 = Y6_GPIO_NUM;
  config.pin_d5 = Y7_GPIO_NUM;
  config.pin_d6 = Y8_GPIO_NUM;
  config.pin_d7 = Y9_GPIO_NUM;
  config.pin_xclk = XCLK_GPIO_NUM;
  config.pin_pclk = PCLK_GPIO_NUM;
  config.pin_vsync = VSYNC_GPIO_NUM;
  config.pin_href = HREF_GPIO_NUM;
  config.pin_sscb_sda = SIOD_GPIO_NUM;
  config.pin_sscb_scl = SIOC_GPIO_NUM;
  config.pin_pwdn = PWDN_GPIO_NUM;
  config.pin_reset = RESET_GPIO_NUM;
  config.xclk_freq_hz = 20000000;
  config.pixel_format = PIXFORMAT_JPEG;

  config.frame_size = FRAMESIZE_QVGA;
  config.jpeg_quality = 9;
  config.fb_count = 1;

  esp_err_t err = esp_camera_init(&config);
  if (err != ESP_OK) {
    Serial.printf("Camera init failed with error 0x%x", err);
    return;
  }
}

//continue sending camera frame
void liveCam(WiFiClient &client){
  //capture a frame
  camera_fb_t * fb = esp_camera_fb_get();
  if (!fb) {
      Serial.println("Frame buffer could not be acquired");
      return;
  }
  client.write(fb->buf, fb->len);
  client.flush();
  client.print("\r\n");
  client.flush();
  //return the frame buffer back to be reused
  esp_camera_fb_return(fb);
}

void setup() {
  Serial.begin(115200);
  WiFi.begin("I3.41", "xxx");
  Serial.println("");
  while (WiFi.status() != WL_CONNECTED) {
    delay(500);
    Serial.print(".");
  }
  Serial.println("");
  String IP = WiFi.localIP().toString();
  Serial.println("IP address: " + IP);
  server.begin();
  configCamera();
}

void loop() {
  WiFiClient client = server.available(); 
  if (client.connected()) {
    live_client = client;
    connected = true;
  }
  if(live_client.connected() == false) {
    connected = false;
  }
  if(connected) {
    liveCam(live_client);
  }
}

The Python code on PC:

The ".xml" files can be downloaded here:

https://github.com/opencv/opencv/tree/master/data/haarcascades

import socket
import os
import fcntl
import numpy as np
import cv2
from v4l2 import *
import time

ESP32_SERVER_IP = '192.168.1.3' 
PORT = 8088
VID_WIDTH = 320
VID_HEIGHT = 240
vd = os.open('/dev/video6', os.O_RDWR)
fmt = v4l2_format()
fmt.type = V4L2_BUF_TYPE_VIDEO_OUTPUT
fcntl.ioctl(vd, VIDIOC_G_FMT, fmt)
fmt.fmt.pix.width = VID_WIDTH
fmt.fmt.pix.height = VID_HEIGHT
fmt.fmt.pix.pixelformat = V4L2_PIX_FMT_YUV420
fmt.fmt.pix.sizeimage = VID_WIDTH * VID_HEIGHT * 3
fmt.fmt.pix.field = V4L2_FIELD_NONE
fcntl.ioctl(vd, VIDIOC_S_FMT, fmt)
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
eyes_cascade = cv2.CascadeClassifier('haarcascade_eye_tree_eyeglasses.xml')

while True:
    buffer = bytearray()
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
        s.connect((ESP32_SERVER_IP, PORT))
        while True:
            data = s.recv(1024)
            len = data.find(b'\r\n')
            if(len > 0):
                buffer += data[0:len]
                image_bytes = np.frombuffer(buffer, dtype=np.uint8) 
                frame = cv2.imdecode(image_bytes, flags=cv2.IMREAD_COLOR)
                frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
                frame_gray = cv2.equalizeHist(frame_gray)
                #-- Detect faces
                faces = face_cascade.detectMultiScale(frame_gray)
                for (x,y,w,h) in faces:
                    center = (x + w//2, y + h//2)
                    faceROI = frame_gray[y:y+h,x:x+w]
                    #-- In each face, detect eyes
                    eyes = eyes_cascade.detectMultiScale(faceROI)
                    for (x2,y2,w2,h2) in eyes:
                        eye_center = (x + x2 + w2//2, y + y2 + h2//2)
                        radius = int(round((w2 + h2)*0.25))
                        frame = cv2.circle(frame, eye_center, radius, (255, 0, 0 ), -1)
                frame = cv2.cvtColor(frame, cv2.COLOR_RGB2YUV_I420)
                os.write(vd, frame)
                s.close()
                break
            else:
                buffer += data