From 569879193ab0dae23ef796775da7f1649c317343 Mon Sep 17 00:00:00 2001 From: Srinadh Vura <83588454+SrinadhVura@users.noreply.github.com> Date: Sat, 21 Oct 2023 12:57:21 +0530 Subject: [PATCH 1/4] Create Readme.md --- Volume_hand_controller/Readme.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 Volume_hand_controller/Readme.md diff --git a/Volume_hand_controller/Readme.md b/Volume_hand_controller/Readme.md new file mode 100644 index 00000000..fc7ab3e6 --- /dev/null +++ b/Volume_hand_controller/Readme.md @@ -0,0 +1,4 @@ +## Volume controller +This repository uses mediapipe framework along with opencv to identify 21 landmarks on hand and control the volume of the system based on the distance between thumb tip and index tip + +Mediapipe is an open source framework that contains multiple resources to develop apps based on pretrained deep learning models that detect posture, hand, objects, faces etc. From 12d9acd75250ee77d95b87b929cbc8f1d33f856f Mon Sep 17 00:00:00 2001 From: Srinadh Vura <83588454+SrinadhVura@users.noreply.github.com> Date: Sat, 21 Oct 2023 12:58:17 +0530 Subject: [PATCH 2/4] Create requirements.txt --- Volume_hand_controller/requirements.txt | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 Volume_hand_controller/requirements.txt diff --git a/Volume_hand_controller/requirements.txt b/Volume_hand_controller/requirements.txt new file mode 100644 index 00000000..e169d94b --- /dev/null +++ b/Volume_hand_controller/requirements.txt @@ -0,0 +1,5 @@ +pycaw==20230407 +opencv-python==4.7.0.72 +numpy==1.24.3 +comtypes==1.2.0 +mediapipe==0.10.3 From d539d614c50d12f59204e715ef96bde0101ee21a Mon Sep 17 00:00:00 2001 From: Srinadh Vura <83588454+SrinadhVura@users.noreply.github.com> Date: Sat, 21 Oct 2023 12:59:00 +0530 Subject: [PATCH 3/4] Create HTrack.py --- Volume_hand_controller/HTrack.py | 61 ++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 Volume_hand_controller/HTrack.py diff --git a/Volume_hand_controller/HTrack.py b/Volume_hand_controller/HTrack.py new file mode 100644 index 00000000..969560c0 --- /dev/null +++ b/Volume_hand_controller/HTrack.py @@ -0,0 +1,61 @@ +""" +This file uses mediapipe - a framework by google to detect the landmarks of hand (21) +in the image captured from webcam using openCV +The functionality wrapped as handDetector class containing methods drawHands() and getPositions() +drawHands() --> To draw land marks of detected hands +getPositions() --> To return the found landmarks as a list +""" +import cv2 +import mediapipe as mp +import time + +class handDetector(): + def __init__(self, mode=False, maxHands=2, detectionCon=0.5, trackCon=0.5): + self.mode = mode + self.maxHands = maxHands + self.detectionCon = detectionCon + self.trackCon = trackCon + self.mpDraw = mp.solutions.drawing_utils + self.hands = mp.solutions.hands.Hands(static_image_mode=self.mode,max_num_hands= self.maxHands,min_detection_confidence=self.detectionCon,min_tracking_confidence= self.trackCon) + def drawHands(self, img, draw=True): + imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + self.res = self.hands.process(imgRGB) + if self.res.multi_hand_landmarks: + for handLms in self.res.multi_hand_landmarks: + if draw: + self.mpDraw.draw_landmarks(img, handLms,mp.solutions.hands.HAND_CONNECTIONS) + return img + + def getPositions(self, img, handId=0): + self.xList=[] + self.yList=[] + self.lmList = [] + if self.res.multi_hand_landmarks: + decHand = self.res.multi_hand_landmarks[handId] + for index, lmark in enumerate(decHand.landmark): + h, w, c = img.shape + cx, cy = int(lmark.x * w), int(lmark.y * h) + self.xList.append(cx) + self.yList.append(cy) + self.lmList.append([index, cx, cy]) + return self.lmList + +def main(): + prevTime = 0 + currTime = 0 + cap = cv2.VideoCapture(0) + detector = handDetector() + while True: + _, img = cap.read() + img = detector.drawHands(img) + lms= detector.getPositions(img) + print(lms) + currTime = time.time() + fps = 1 / (cTime - pTime) + prevTime = cTime + cv2.putText(img, str(int(fps)), (10, 70), cv2.FONT_HERSHEY_PLAIN, 3,(51, 0, 255), 3) + cv2.imshow("Image", img) + cv2.waitKey(1) + +if __name__ == "__main__": + main() From 45e35280d044d5552e418a097841968e16373b2f Mon Sep 17 00:00:00 2001 From: Srinadh Vura <83588454+SrinadhVura@users.noreply.github.com> Date: Sat, 21 Oct 2023 12:59:26 +0530 Subject: [PATCH 4/4] Create Controller.py --- Volume_hand_controller/Controller.py | 59 ++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 Volume_hand_controller/Controller.py diff --git a/Volume_hand_controller/Controller.py b/Volume_hand_controller/Controller.py new file mode 100644 index 00000000..55112141 --- /dev/null +++ b/Volume_hand_controller/Controller.py @@ -0,0 +1,59 @@ +""" I have used pycaw by Andre Miras at https://github.com/AndreMiras/pycaw + To control audio. It is an open-source module which can be installed by --> pip install pycaw +""" +import math +import cv2 +import time +import numpy as np +import HTrack as ht +from comtypes import CLSCTX_ALL +from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume + +height = 720 # height of webwindow +width = 1080 # width of webwindow +cap = cv2.VideoCapture(0) # using default system web cam 0 +cap.set(3, width) # 3 is a index specifying width +cap.set(4, height) # 4 is a index specifying height +prevTime = 0 +detector = ht.handDetector(detectionCon=0.7) # we need to detect hand more precisely +devices = AudioUtilities.GetSpeakers() # instantiating system speakers as objects +interface = devices.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None) # activating the interface to control volume +volume = interface.QueryInterface(IAudioEndpointVolume) +rangeVol=volume.GetVolumeRange() # volume range for my system is found to be -96 to 0 +volLeast=rangeVol[0] +volHigh=rangeVol[1] +vol=volLeast #initialising volume to least volume +volBar=400 #initialising volume bar +while True: + success, img = cap.read() # reading the image + img=detector.drawHands(img) # detecting the hands + lmList=detector.getPositions(img) # finding landmarks of detected hands + if len(lmList)!=0: + # print(lmList[8],lmList[12]) + x1,y1=lmList[8][1],lmList[8][2] + x2,y2=lmList[12][1],lmList[12][2] + cx,cy=(x1+x2)//2,(y1+y2)//2 + cv2.circle(img,(x1,y1),10,(170,153,255),cv2.FILLED) # drawing circle around index finger + cv2.circle(img,(x2,y2),10,(170,153,255),cv2.FILLED) # drawing circle around middle finger + cv2.circle(img,(cx,cy),10,(170,153,255),cv2.FILLED) # drawing circle around center point of both fingers + cv2.line(img,(x1,y1),(x2,y2),(25,102,180),2) + distance=math.dist([x1,y1],[x2,y2]) + # print(distance) # range of distance found to be 25 and 150 + vol=np.interp(distance,[25,150],[volLeast,volHigh]) # To interpolate distance between index and middle fingers to the volume of system + volBar=np.interp(distance,[25,150],[400,100]) # To interpolate distance between index and middle fingers to the volume bar length + volume.SetMasterVolumeLevel(vol, None) + print(int(distance),vol) + if distance<25: + cv2.circle(img,(cx,cy),10,(0,0,120),cv2.FILLED) # indicating volume is 0 bby changing color of center circle + #drawing volume bar + cv2.rectangle(img,(80,100),(60,400),(230,153,0),2) # outer rectangle + cv2.rectangle(img,(80,int(volBar)),(60,400),(150,153,0),cv2.FILLED) # actual volume + cv2.putText(img,"Volume",(40,420),cv2.FONT_HERSHEY_SIMPLEX,0.5,(149,100,149),2) + + currTime = time.time() + fps = 1/(currTime - prevTime) # using time to get timestamp of previous iteration and current iteration to know the frames per second + prevTime = currTime + cv2.putText(img, str(int(fps)), (30, 40), cv2.FONT_HERSHEY_PLAIN, 2, (255, 255, 0), 3) # writing FPS on screen + cv2.imshow("Image", img) + cv2.waitKey(1) +