Mar-31-2024, 11:35 AM
Hello all
I am trying to piece together some Python code to track 2 faces. All going OK so far, considering first effort and I have no idea what I am doing!
Basically, I am lifting code I find on the web and trying to understand how it works. Not easy, as I have not really found a clear explanation of how Mediapipe works.
So, I am sure my code is horrible, but I working my way through. I am sure some of my annotation is incorrect (like explaining Normalisation)
There is a lot of redundancy in the code at the moment... please excuse that.
ANYWAY. It picks up multiple faces fine. I am now trying to work out how close the faces are to the screen.
Lines 131-143 are grabbing the width of the eyes and that is working. The print statement prints the face number and then the width of the eye (line 143), but the eye width is only ever the one closest to the camera? (the face number changes correctly)
Can someone point to what is incorrect?
I though it might be the statement to_check = results.multi_face_landmarks[0].landmark was only looking at the first face (multi_face_landmarks[0]), but I might be wrong.
I hope I have formatted this first question correctly!
I am trying to piece together some Python code to track 2 faces. All going OK so far, considering first effort and I have no idea what I am doing!
Basically, I am lifting code I find on the web and trying to understand how it works. Not easy, as I have not really found a clear explanation of how Mediapipe works.
So, I am sure my code is horrible, but I working my way through. I am sure some of my annotation is incorrect (like explaining Normalisation)
There is a lot of redundancy in the code at the moment... please excuse that.
ANYWAY. It picks up multiple faces fine. I am now trying to work out how close the faces are to the screen.
Lines 131-143 are grabbing the width of the eyes and that is working. The print statement prints the face number and then the width of the eye (line 143), but the eye width is only ever the one closest to the camera? (the face number changes correctly)
Can someone point to what is incorrect?
I though it might be the statement to_check = results.multi_face_landmarks[0].landmark was only looking at the first face (multi_face_landmarks[0]), but I might be wrong.
I hope I have formatted this first question correctly!
import cv2 import mediapipe as mp import serial # Serial for comms to the Arduino import time # For delays etc import platform # Details of the system we are running import atexit # Clean method of exiting import serial.tools.list_ports # Serial port information import sys import numpy as np # Processes numbers, strings and arrays import keyboard # Allows use of the keyboard from vpython import * #import pickle # Allows you to store data print(platform.system(), platform.release()) print("Python version " + platform.python_version()) print("") mp_drawing = mp.solutions.drawing_utils mp_drawing_styles = mp.solutions.drawing_styles mp_face_mesh = mp.solutions.face_mesh showvideo = True faces = 0 #--------------------- Arduino communications ----------------------- def findArduinoUnoPort(): # Check all the comm ports for an Arduino portList = list(serial.tools.list_ports.comports()) for port in portList: if "VID:PID=2341:0043" in port[0]\ or "VID:PID=2341:0043" in port[1]\ or "VID:PID=2341:0043" in port[2]: print(port) print(port[0]) print(port[1]) print(port[2]) return port[0] def doAtExit(): if serialUno.isOpen(): serialUno.close() print("Close serial") print("serialUno.isOpen() = " + str(serialUno.isOpen())) atexit.register(doAtExit) unoPort = findArduinoUnoPort() if not unoPort: print("No Arduino found") #sys.exit("No Arduino found - Exiting system") #print("Arduino found: " + unoPort) #print() serialArduino = serial.Serial(unoPort, 9600) print("serialUno.isOpen() = " + str(serialArduino.isOpen())) #----------------------------------------------------------------------- # For webcam input: drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1) #-- Select a video source - #cap = cv2.VideoCapture('dance2.mp4') # Read local video file cap = cv2.VideoCapture(0) # Laptop built-in webcam #cap = cv2.VideoCapture(1, cv2.CAP_DSHOW) # External USB camera #cap = cv2.VideoCapture('rtsp://192.168.1.64/1') # Capture from an IP camera #cap = cv2.VideoCapture('rtsp://username:[email protected]/1') # Capture from an IP camera with password + username with mp_face_mesh.FaceMesh( max_num_faces=20, # Max amount of faces detectable refine_landmarks=True, # Whether it uses all the face land marks min_detection_confidence=0.5, min_tracking_confidence=0.5) as face_mesh: while cap.isOpened(): success, image = cap.read() width = 1280 # Resize the webcam feed height = 720 dim = (width, height) image = cv2.resize(image, dim, interpolation=cv2.INTER_AREA) if not success: print("Ignoring empty camera frame.") # If loading a video, use 'break' instead of 'continue'. continue # To improve performance, optionally mark the image as not writeable to # pass by reference. image.flags.writeable = False image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) results = face_mesh.process(image) faces = 0 # Reset the faces count black = cv2.imread("black.png") # Create a blank page for the 'video off' feed if showvideo == True: # Show live video feed image.flags.writeable = True # Draw the face mesh annotations on the image. image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) if results.multi_face_landmarks: for face_landmarks in results.multi_face_landmarks: #mp_drawing.draw_landmarks( # Draw the entire face (mesh) #image=image, #landmark_list=face_landmarks, #connections=mp_face_mesh.FACEMESH_TESSELATION, #landmark_drawing_spec=None, #connection_drawing_spec=mp_drawing_styles #.get_default_face_mesh_tesselation_style()) mp_drawing.draw_landmarks( # Draw just the outlines image=image, landmark_list=face_landmarks, connections=mp_face_mesh.FACEMESH_CONTOURS, landmark_drawing_spec=None, connection_drawing_spec=mp_drawing_styles .get_default_face_mesh_contours_style()) mp_drawing.draw_landmarks( # Draw the eyes image=image, landmark_list=face_landmarks, connections=mp_face_mesh.FACEMESH_IRISES, landmark_drawing_spec=None, connection_drawing_spec=mp_drawing_styles .get_default_face_mesh_iris_connections_style()) faces+=1 # Increase the faces count for every face identified #---------- Track the width of the eyes (to determine range) ------------- def invert_normalization(x, y, w, h): # Re-correct the Z value (depth) for the new scale of the image. Z = Smaller = Nearer the camera return int(x * w), int(y * h) # Normalization is the conversion of larger variables of detail to a smaller range for easier processing. E.G. 0-255 becomes 0-1 to_check = results.multi_face_landmarks[0].landmark image_width = image.shape[1] image_height = image.shape[0] #up_left_x, up_left_y = invert_normalization(x=to_check[71].x, y=to_check[71].y, w=image_width, h=image_height) # https://github.com/edge7/Eye-Region-Extraction-Toolbox/blob/main/runner/main.py #up_right_x, up_right_y = invert_normalization(x=to_check[301].x, y=to_check[71].y, w=image_width, h=image_height) down_left_x, down_left_y = invert_normalization(x=to_check[71].x, y=to_check[123].y, w=image_width, h=image_height) down_right_x, down_right_y = invert_normalization(x=to_check[301].x, y=to_check[123].y, w=image_width, h=image_height) width_eyes = (down_right_x - down_left_x) print(faces,width_eyes) #---------- Determine action ----------- cv2.putText(image, str("Face recognition (live video)"), (25,50), cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,0), 2) # Text cv2.putText(image, str("Number of faces: "+ str(faces)), (25,80), cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,255), 2) # Text if (faces == 1): cv2.putText(image, str("You need another friend!"), (25,110), cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,255), 2) # Text if (faces == 2): cv2.putText(image, str("Hello you two!"), (25,110), cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,255), 2) # Text cv2.rectangle(image, (12, 12), (1266, 706), (0, 255, 0), 20) if (faces > 2): cv2.putText(image, str("TOO MANY PEOPLE!"), (25,110), cv2.FONT_HERSHEY_SIMPLEX,1,(255,0,255), 2) # Text else: # Show black screen image.flags.writeable = True image = cv2.cvtColor(black, cv2.COLOR_RGB2BGR) if results.multi_face_landmarks: for face_landmarks in results.multi_face_landmarks: mp_drawing.draw_landmarks( # Draw the entire face (mesh) image=black, landmark_list=face_landmarks, connections=mp_face_mesh.FACEMESH_TESSELATION, landmark_drawing_spec=None, connection_drawing_spec=mp_drawing_styles .get_default_face_mesh_tesselation_style()) mp_drawing.draw_landmarks( # Draw just the outlines image=black, landmark_list=face_landmarks, connections=mp_face_mesh.FACEMESH_CONTOURS, landmark_drawing_spec=None, connection_drawing_spec=mp_drawing_styles .get_default_face_mesh_contours_style()) mp_drawing.draw_landmarks( # Draw the eyes image=black, landmark_list=face_landmarks, connections=mp_face_mesh.FACEMESH_IRISES, landmark_drawing_spec=None, connection_drawing_spec=mp_drawing_styles .get_default_face_mesh_iris_connections_style()) faces+=1 #---------- Determine action ----------- cv2.putText(black, str("Face recognition (stealth)"), (25,50), cv2.FONT_HERSHEY_SIMPLEX,1,(0,0,255), 2) # Text cv2.putText(black, str("Number of faces: "+ str(faces)), (25,80), cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,255), 2) # Text if (faces == 1): cv2.putText(black, str("You need another friend!"), (25,110), cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,255), 2) # Text if (faces == 2): cv2.putText(black, str("Hello you two!"), (25,110), cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,255), 2) # Text cv2.rectangle(black, (12, 12), (1266, 706), (0, 255, 0), 20) if (faces > 2): cv2.putText(black, str("TOO MANY PEOPLE!"), (25,110), cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,255), 2) # Text #------------ Toggle video mode --------------- if keyboard.is_pressed('v') and showvideo == True: showvideo = False while keyboard.is_pressed('v'): cv2.waitKey(1) if keyboard.is_pressed('v') and showvideo == False: showvideo = True while keyboard.is_pressed('v'): cv2.waitKey(1) if showvideo == True: cv2.imshow('MediaPipe Face Mesh', image) # Display the live video feed (window name, image) #cv2.imshow('MediaPipe Face Mesh', cv2.flip(image, 1)) # Horizontally flip the image (selfie mode) else: cv2.imshow('MediaPipe Face Mesh', black) # Display the blank background image #cv2.imshow('MediaPipe Face Mesh', cv2.flip(black, 1)) # Horizontally flip the image (selfie mode) if cv2.waitKey(5) & 0xFF == 27: break cap.release()