Add files via upload

TemugeB · Sep 14, 2021 · 88bc80c · 88bc80c
1 parent a621a65
commit 88bc80c
Show file tree

Hide file tree

Showing 13 changed files with 369 additions and 0 deletions.
diff --git a/bodypose3d.py b/bodypose3d.py
@@ -0,0 +1,162 @@
+import cv2 as cv
+import mediapipe as mp
+import numpy as np
+import sys
+from utils import DLT, get_projection_matrix, write_keypoints_to_disk
+
+mp_drawing = mp.solutions.drawing_utils
+mp_drawing_styles = mp.solutions.drawing_styles
+mp_pose = mp.solutions.pose
+
+frame_shape = [720, 1280]
+
+#add here if you need more keypoints
+pose_keypoints = [16, 14, 12, 11, 13, 15, 24, 23, 25, 26, 27, 28]
+
+def run_mp(input_stream1, input_stream2, P0, P1):
+    #input video stream
+    cap0 = cv.VideoCapture(input_stream1)
+    cap1 = cv.VideoCapture(input_stream2)
+    caps = [cap0, cap1]
+
+    #set camera resolution if using webcam to 1280x720. Any bigger will cause some lag for hand detection
+    for cap in caps:
+        cap.set(3, frame_shape[1])
+        cap.set(4, frame_shape[0])
+
+    #create body keypoints detector objects.
+    pose0 = mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5)
+    pose1 = mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5)
+
+    #containers for detected keypoints for each camera. These are filled at each frame.
+    #This will run you into memory issue if you run the program without stop
+    kpts_cam0 = []
+    kpts_cam1 = []
+    kpts_3d = []
+    while True:
+
+        #read frames from stream
+        ret0, frame0 = cap0.read()
+        ret1, frame1 = cap1.read()
+
+        if not ret0 or not ret1: break
+
+        #crop to 720x720.
+        #Note: camera calibration parameters are set to this resolution.If you change this, make sure to also change camera intrinsic parameters
+        if frame0.shape[1] != 720:
+            frame0 = frame0[:,frame_shape[1]//2 - frame_shape[0]//2:frame_shape[1]//2 + frame_shape[0]//2]
+            frame1 = frame1[:,frame_shape[1]//2 - frame_shape[0]//2:frame_shape[1]//2 + frame_shape[0]//2]
+
+        # the BGR image to RGB.
+        frame0 = cv.cvtColor(frame0, cv.COLOR_BGR2RGB)
+        frame1 = cv.cvtColor(frame1, cv.COLOR_BGR2RGB)
+
+        # To improve performance, optionally mark the image as not writeable to
+        # pass by reference.
+        frame0.flags.writeable = False
+        frame1.flags.writeable = False
+        results0 = pose0.process(frame0)
+        results1 = pose1.process(frame1)
+
+        #reverse changes
+        frame0.flags.writeable = True
+        frame1.flags.writeable = True
+        frame0 = cv.cvtColor(frame0, cv.COLOR_RGB2BGR)
+        frame1 = cv.cvtColor(frame1, cv.COLOR_RGB2BGR)
+
+        #check for keypoints detection
+        frame0_keypoints = []
+        if results0.pose_landmarks:
+            for i, landmark in enumerate(results0.pose_landmarks.landmark):
+                if i not in pose_keypoints: continue #only save keypoints that are indicated in pose_keypoints
+                pxl_x = landmark.x * frame0.shape[1]
+                pxl_y = landmark.y * frame0.shape[0]
+                pxl_x = int(round(pxl_x))
+                pxl_y = int(round(pxl_y))
+                cv.circle(frame0,(pxl_x, pxl_y), 3, (0,0,255), -1) #add keypoint detection points into figure
+                kpts = [pxl_x, pxl_y]
+                frame0_keypoints.append(kpts)
+        else:
+            #if no keypoints are found, simply fill the frame data with [-1,-1] for each kpt
+            frame0_keypoints = [[-1, -1]]*len(pose_keypoints)
+
+        #this will keep keypoints of this frame in memory
+        kpts_cam0.append(frame0_keypoints)
+
+        frame1_keypoints = []
+        if results1.pose_landmarks:
+            for i, landmark in enumerate(results1.pose_landmarks.landmark):
+                if i not in pose_keypoints: continue
+                pxl_x = landmark.x * frame0.shape[1]
+                pxl_y = landmark.y * frame0.shape[0]
+                pxl_x = int(round(pxl_x))
+                pxl_y = int(round(pxl_y))
+                cv.circle(frame1,(pxl_x, pxl_y), 3, (0,0,255), -1)
+                kpts = [pxl_x, pxl_y]
+                frame1_keypoints.append(kpts)
+
+        else:
+            #if no keypoints are found, simply fill the frame data with [-1,-1] for each kpt
+            frame1_keypoints = [[-1, -1]]*len(pose_keypoints)
+
+        #update keypoints container
+        kpts_cam1.append(frame1_keypoints)
+
+        #calculate 3d position
+        frame_p3ds = []
+        for uv1, uv2 in zip(frame0_keypoints, frame1_keypoints):
+            if uv1[0] == -1 or uv2[0] == -1:
+                _p3d = [-1, -1, -1]
+            else:
+                _p3d = DLT(P0, P1, uv1, uv2) #calculate 3d position of keypoint
+            frame_p3ds.append(_p3d)
+
+        '''
+        This contains the 3d position of each keypoint in current frame.
+        For real time application, this is what you want.
+        '''
+        frame_p3ds = np.array(frame_p3ds).reshape((12, 3))
+        kpts_3d.append(frame_p3ds)
+
+        # uncomment these if you want to see the full keypoints detections
+        # mp_drawing.draw_landmarks(frame0, results0.pose_landmarks, mp_pose.POSE_CONNECTIONS,
+        #                           landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())
+        #
+        # mp_drawing.draw_landmarks(frame1, results1.pose_landmarks, mp_pose.POSE_CONNECTIONS,
+        #                           landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())
+
+        cv.imshow('cam1', frame1)
+        cv.imshow('cam0', frame0)
+
+        k = cv.waitKey(1)
+        if k & 0xFF == 27: break #27 is ESC key.
+
+
+    cv.destroyAllWindows()
+    for cap in caps:
+        cap.release()
+
+
+    return np.array(kpts_cam0), np.array(kpts_cam1), np.array(kpts_3d)
+
+if __name__ == '__main__':
+
+    #this will load the sample videos if no camera ID is given
+    input_stream1 = 'media/cam0_test.mp4'
+    input_stream2 = 'media/cam1_test.mp4'
+
+    #put camera id as command line arguements
+    if len(sys.argv) == 3:
+        input_stream1 = int(sys.argv[1])
+        input_stream2 = int(sys.argv[2])
+
+    #get projection matrices
+    P0 = get_projection_matrix(0)
+    P1 = get_projection_matrix(1)
+
+    kpts_cam0, kpts_cam1, kpts_3d = run_mp(input_stream1, input_stream2, P0, P1)
+
+    #this will create keypoints file in current working folder
+    write_keypoints_to_disk('kpts_cam0.dat', kpts_cam0)
+    write_keypoints_to_disk('kpts_cam1.dat', kpts_cam1)
+    write_keypoints_to_disk('kpts_3d.dat', kpts_3d)
diff --git a/camera_parameters/c0.dat b/camera_parameters/c0.dat
@@ -0,0 +1,6 @@
+intrinsic:
+900.4857632254283 0.0 366.8661146698476 
+0.0 902.6176551471326 363.7146796254499 
+0.0 0.0 1.0 
+distortion:
+-0.04074323275039075 1.3719603634486155 -0.0027819018501480247 0.0011755031235693644 -5.637775169623254 
diff --git a/camera_parameters/c1.dat b/camera_parameters/c1.dat
@@ -0,0 +1,6 @@
+intrinsic:
+926.7112344222677 0.0 366.86387719274677 
+0.0 928.7212583044412 358.17005894003324 
+0.0 0.0 1.0 
+distortion:
+-0.042736966121127366 1.355959158837944 -0.0024952648024986887 0.0033896734823038875 -5.316323475425649 
diff --git a/camera_parameters/rot_trans_c0.dat b/camera_parameters/rot_trans_c0.dat
@@ -0,0 +1,8 @@
+R:
+0.03522596169240533 0.9989186047045775 -0.030343908744683592 
+-0.9954114357284269 0.0323669499742608 -0.09004695536460891 
+-0.08896743923434776 0.03337666436934931 0.9954751594247135 
+T:
+-3.7714497419971353 
+0.5952700116552851 
+14.753837487029038 
diff --git a/camera_parameters/rot_trans_c1.dat b/camera_parameters/rot_trans_c1.dat
@@ -0,0 +1,8 @@
+R:
+0.01862128  0.72598112  0.68746248
+-0.99830361 -0.02443645  0.05284666
+0.05516482 -0.68728034  0.72429454
+T:
+-8.97548292
+1.56690522
+22.88471505
diff --git a/media/cam0_kpts.gif b/media/cam0_kpts.gif
diff --git a/media/cam0_test.mp4 b/media/cam0_test.mp4
diff --git a/media/cam1_kpts.gif b/media/cam1_kpts.gif
diff --git a/media/cam1_test.mp4 b/media/cam1_test.mp4
diff --git a/media/keypoints_ids.png b/media/keypoints_ids.png
diff --git a/media/pose2.gif b/media/pose2.gif
diff --git a/show_3d_pose.py b/show_3d_pose.py
@@ -0,0 +1,73 @@
+import numpy as np
+import matplotlib.pyplot as plt
+from utils import DLT
+plt.style.use('seaborn')
+
+
+pose_keypoints = np.array([16, 14, 12, 11, 13, 15, 24, 23, 25, 26, 27, 28])
+
+def read_keypoints(filename):
+    fin = open(filename, 'r')
+
+    kpts = []
+    while(True):
+        line = fin.readline()
+        if line == '': break
+
+        line = line.split()
+        line = [float(s) for s in line]
+
+        line = np.reshape(line, (len(pose_keypoints), -1))
+        kpts.append(line)
+
+    kpts = np.array(kpts)
+    return kpts
+
+
+def visualize_3d(p3ds):
+
+    """Now visualize in 3D"""
+    torso = [[0, 1] , [1, 7], [7, 6], [6, 0]]
+    armr = [[1, 3], [3, 5]]
+    arml = [[0, 2], [2, 4]]
+    legr = [[6, 8], [8, 10]]
+    legl = [[7, 9], [9, 11]]
+    body = [torso, arml, armr, legr, legl]
+    colors = ['red', 'blue', 'green', 'black', 'orange']
+
+    from mpl_toolkits.mplot3d import Axes3D
+
+    fig = plt.figure()
+    ax = fig.add_subplot(111, projection='3d')
+
+    for framenum, kpts3d in enumerate(p3ds):
+        if framenum%2 == 0: continue #skip every 2nd frame
+        for bodypart, part_color in zip(body, colors):
+            for _c in bodypart:
+                ax.plot(xs = [kpts3d[_c[0],0], kpts3d[_c[1],0]], ys = [kpts3d[_c[0],1], kpts3d[_c[1],1]], zs = [kpts3d[_c[0],2], kpts3d[_c[1],2]], linewidth = 4, c = part_color)
+
+        #uncomment these if you want scatter plot of keypoints and their indices.
+        # for i in range(12):
+        #     #ax.text(kpts3d[i,0], kpts3d[i,1], kpts3d[i,2], str(i))
+        #     #ax.scatter(xs = kpts3d[i:i+1,0], ys = kpts3d[i:i+1,1], zs = kpts3d[i:i+1,2])
+
+
+        #ax.set_axis_off()
+        ax.set_xticks([])
+        ax.set_yticks([])
+        ax.set_zticks([])
+
+        ax.set_xlim3d(-10, 10)
+        ax.set_xlabel('x')
+        ax.set_ylim3d(-10, 10)
+        ax.set_ylabel('y')
+        ax.set_zlim3d(-10, 10)
+        ax.set_zlabel('z')
+        plt.pause(0.1)
+        ax.cla()
+
+
+if __name__ == '__main__':
+
+    p3ds = read_keypoints('kpts_3d.dat')
+    visualize_3d(p3ds)
diff --git a/utils.py b/utils.py
@@ -0,0 +1,106 @@
+import numpy as np
+
+
+def _make_homogeneous_rep_matrix(R, t):
+    P = np.zeros((4,4))
+    P[:3,:3] = R
+    P[:3, 3] = t.reshape(3)
+    P[3,3] = 1
+    return P
+
+#direct linear transform
+def DLT(P1, P2, point1, point2):
+
+    A = [point1[1]*P1[2,:] - P1[1,:],
+         P1[0,:] - point1[0]*P1[2,:],
+         point2[1]*P2[2,:] - P2[1,:],
+         P2[0,:] - point2[0]*P2[2,:]
+        ]
+    A = np.array(A).reshape((4,4))
+    #print('A: ')
+    #print(A)
+
+    B = A.transpose() @ A
+    from scipy import linalg
+    U, s, Vh = linalg.svd(B, full_matrices = False)
+
+    #print('Triangulated point: ')
+    #print(Vh[3,0:3]/Vh[3,3])
+    return Vh[3,0:3]/Vh[3,3]
+
+def read_camera_parameters(camera_id):
+
+    inf = open('camera_parameters/c' + str(camera_id) + '.dat', 'r')
+
+    cmtx = []
+    dist = []
+
+    line = inf.readline()
+    for _ in range(3):
+        line = inf.readline().split()
+        line = [float(en) for en in line]
+        cmtx.append(line)
+
+    line = inf.readline()
+    line = inf.readline().split()
+    line = [float(en) for en in line]
+    dist.append(line)
+
+    return np.array(cmtx), np.array(dist)
+
+def read_rotation_translation(camera_id, savefolder = 'camera_parameters/'):
+
+    inf = open(savefolder + 'rot_trans_c'+ str(camera_id) + '.dat', 'r')
+
+    inf.readline()
+    rot = []
+    trans = []
+    for _ in range(3):
+        line = inf.readline().split()
+        line = [float(en) for en in line]
+        rot.append(line)
+
+    inf.readline()
+    for _ in range(3):
+        line = inf.readline().split()
+        line = [float(en) for en in line]
+        trans.append(line)
+
+    inf.close()
+    return np.array(rot), np.array(trans)
+
+def _convert_to_homogeneous(pts):
+    pts = np.array(pts)
+    if len(pts.shape) > 1:
+        w = np.ones((pts.shape[0], 1))
+        return np.concatenate([pts, w], axis = 1)
+    else:
+        return np.concatenate([pts, [1]], axis = 0)
+
+def get_projection_matrix(camera_id):
+
+    #read camera parameters
+    cmtx, dist = read_camera_parameters(camera_id)
+    rvec, tvec = read_rotation_translation(camera_id)
+
+    #calculate projection matrix
+    P = cmtx @ _make_homogeneous_rep_matrix(rvec, tvec)[:3,:]
+    return P
+
+def write_keypoints_to_disk(filename, kpts):
+    fout = open(filename, 'w')
+
+    for frame_kpts in kpts:
+        for kpt in frame_kpts:
+            if len(kpt) == 2:
+                fout.write(str(kpt[0]) + ' ' + str(kpt[1]) + ' ')
+            else:
+                fout.write(str(kpt[0]) + ' ' + str(kpt[1]) + ' ' + str(kpt[2]) + ' ')
+
+        fout.write('\n')
+    fout.close()
+
+if __name__ == '__main__':
+
+    P2 = get_projection_matrix(0)
+    P1 = get_projection_matrix(1)