diff --git a/bodypose3d.py b/bodypose3d.py new file mode 100644 index 0000000..c9f7159 --- /dev/null +++ b/bodypose3d.py @@ -0,0 +1,162 @@ +import cv2 as cv +import mediapipe as mp +import numpy as np +import sys +from utils import DLT, get_projection_matrix, write_keypoints_to_disk + +mp_drawing = mp.solutions.drawing_utils +mp_drawing_styles = mp.solutions.drawing_styles +mp_pose = mp.solutions.pose + +frame_shape = [720, 1280] + +#add here if you need more keypoints +pose_keypoints = [16, 14, 12, 11, 13, 15, 24, 23, 25, 26, 27, 28] + +def run_mp(input_stream1, input_stream2, P0, P1): + #input video stream + cap0 = cv.VideoCapture(input_stream1) + cap1 = cv.VideoCapture(input_stream2) + caps = [cap0, cap1] + + #set camera resolution if using webcam to 1280x720. Any bigger will cause some lag for hand detection + for cap in caps: + cap.set(3, frame_shape[1]) + cap.set(4, frame_shape[0]) + + #create body keypoints detector objects. + pose0 = mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) + pose1 = mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) + + #containers for detected keypoints for each camera. These are filled at each frame. + #This will run you into memory issue if you run the program without stop + kpts_cam0 = [] + kpts_cam1 = [] + kpts_3d = [] + while True: + + #read frames from stream + ret0, frame0 = cap0.read() + ret1, frame1 = cap1.read() + + if not ret0 or not ret1: break + + #crop to 720x720. + #Note: camera calibration parameters are set to this resolution.If you change this, make sure to also change camera intrinsic parameters + if frame0.shape[1] != 720: + frame0 = frame0[:,frame_shape[1]//2 - frame_shape[0]//2:frame_shape[1]//2 + frame_shape[0]//2] + frame1 = frame1[:,frame_shape[1]//2 - frame_shape[0]//2:frame_shape[1]//2 + frame_shape[0]//2] + + # the BGR image to RGB. + frame0 = cv.cvtColor(frame0, cv.COLOR_BGR2RGB) + frame1 = cv.cvtColor(frame1, cv.COLOR_BGR2RGB) + + # To improve performance, optionally mark the image as not writeable to + # pass by reference. + frame0.flags.writeable = False + frame1.flags.writeable = False + results0 = pose0.process(frame0) + results1 = pose1.process(frame1) + + #reverse changes + frame0.flags.writeable = True + frame1.flags.writeable = True + frame0 = cv.cvtColor(frame0, cv.COLOR_RGB2BGR) + frame1 = cv.cvtColor(frame1, cv.COLOR_RGB2BGR) + + #check for keypoints detection + frame0_keypoints = [] + if results0.pose_landmarks: + for i, landmark in enumerate(results0.pose_landmarks.landmark): + if i not in pose_keypoints: continue #only save keypoints that are indicated in pose_keypoints + pxl_x = landmark.x * frame0.shape[1] + pxl_y = landmark.y * frame0.shape[0] + pxl_x = int(round(pxl_x)) + pxl_y = int(round(pxl_y)) + cv.circle(frame0,(pxl_x, pxl_y), 3, (0,0,255), -1) #add keypoint detection points into figure + kpts = [pxl_x, pxl_y] + frame0_keypoints.append(kpts) + else: + #if no keypoints are found, simply fill the frame data with [-1,-1] for each kpt + frame0_keypoints = [[-1, -1]]*len(pose_keypoints) + + #this will keep keypoints of this frame in memory + kpts_cam0.append(frame0_keypoints) + + frame1_keypoints = [] + if results1.pose_landmarks: + for i, landmark in enumerate(results1.pose_landmarks.landmark): + if i not in pose_keypoints: continue + pxl_x = landmark.x * frame0.shape[1] + pxl_y = landmark.y * frame0.shape[0] + pxl_x = int(round(pxl_x)) + pxl_y = int(round(pxl_y)) + cv.circle(frame1,(pxl_x, pxl_y), 3, (0,0,255), -1) + kpts = [pxl_x, pxl_y] + frame1_keypoints.append(kpts) + + else: + #if no keypoints are found, simply fill the frame data with [-1,-1] for each kpt + frame1_keypoints = [[-1, -1]]*len(pose_keypoints) + + #update keypoints container + kpts_cam1.append(frame1_keypoints) + + #calculate 3d position + frame_p3ds = [] + for uv1, uv2 in zip(frame0_keypoints, frame1_keypoints): + if uv1[0] == -1 or uv2[0] == -1: + _p3d = [-1, -1, -1] + else: + _p3d = DLT(P0, P1, uv1, uv2) #calculate 3d position of keypoint + frame_p3ds.append(_p3d) + + ''' + This contains the 3d position of each keypoint in current frame. + For real time application, this is what you want. + ''' + frame_p3ds = np.array(frame_p3ds).reshape((12, 3)) + kpts_3d.append(frame_p3ds) + + # uncomment these if you want to see the full keypoints detections + # mp_drawing.draw_landmarks(frame0, results0.pose_landmarks, mp_pose.POSE_CONNECTIONS, + # landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style()) + # + # mp_drawing.draw_landmarks(frame1, results1.pose_landmarks, mp_pose.POSE_CONNECTIONS, + # landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style()) + + cv.imshow('cam1', frame1) + cv.imshow('cam0', frame0) + + k = cv.waitKey(1) + if k & 0xFF == 27: break #27 is ESC key. + + + cv.destroyAllWindows() + for cap in caps: + cap.release() + + + return np.array(kpts_cam0), np.array(kpts_cam1), np.array(kpts_3d) + +if __name__ == '__main__': + + #this will load the sample videos if no camera ID is given + input_stream1 = 'media/cam0_test.mp4' + input_stream2 = 'media/cam1_test.mp4' + + #put camera id as command line arguements + if len(sys.argv) == 3: + input_stream1 = int(sys.argv[1]) + input_stream2 = int(sys.argv[2]) + + #get projection matrices + P0 = get_projection_matrix(0) + P1 = get_projection_matrix(1) + + kpts_cam0, kpts_cam1, kpts_3d = run_mp(input_stream1, input_stream2, P0, P1) + + #this will create keypoints file in current working folder + write_keypoints_to_disk('kpts_cam0.dat', kpts_cam0) + write_keypoints_to_disk('kpts_cam1.dat', kpts_cam1) + write_keypoints_to_disk('kpts_3d.dat', kpts_3d) diff --git a/camera_parameters/c0.dat b/camera_parameters/c0.dat new file mode 100644 index 0000000..147aa52 --- /dev/null +++ b/camera_parameters/c0.dat @@ -0,0 +1,6 @@ +intrinsic: +900.4857632254283 0.0 366.8661146698476 +0.0 902.6176551471326 363.7146796254499 +0.0 0.0 1.0 +distortion: +-0.04074323275039075 1.3719603634486155 -0.0027819018501480247 0.0011755031235693644 -5.637775169623254 diff --git a/camera_parameters/c1.dat b/camera_parameters/c1.dat new file mode 100644 index 0000000..0d6ff65 --- /dev/null +++ b/camera_parameters/c1.dat @@ -0,0 +1,6 @@ +intrinsic: +926.7112344222677 0.0 366.86387719274677 +0.0 928.7212583044412 358.17005894003324 +0.0 0.0 1.0 +distortion: +-0.042736966121127366 1.355959158837944 -0.0024952648024986887 0.0033896734823038875 -5.316323475425649 diff --git a/camera_parameters/rot_trans_c0.dat b/camera_parameters/rot_trans_c0.dat new file mode 100644 index 0000000..5e30379 --- /dev/null +++ b/camera_parameters/rot_trans_c0.dat @@ -0,0 +1,8 @@ +R: +0.03522596169240533 0.9989186047045775 -0.030343908744683592 +-0.9954114357284269 0.0323669499742608 -0.09004695536460891 +-0.08896743923434776 0.03337666436934931 0.9954751594247135 +T: +-3.7714497419971353 +0.5952700116552851 +14.753837487029038 diff --git a/camera_parameters/rot_trans_c1.dat b/camera_parameters/rot_trans_c1.dat new file mode 100644 index 0000000..10c0d31 --- /dev/null +++ b/camera_parameters/rot_trans_c1.dat @@ -0,0 +1,8 @@ +R: +0.01862128 0.72598112 0.68746248 +-0.99830361 -0.02443645 0.05284666 +0.05516482 -0.68728034 0.72429454 +T: +-8.97548292 +1.56690522 +22.88471505 diff --git a/media/cam0_kpts.gif b/media/cam0_kpts.gif new file mode 100644 index 0000000..c94d0a1 Binary files /dev/null and b/media/cam0_kpts.gif differ diff --git a/media/cam0_test.mp4 b/media/cam0_test.mp4 new file mode 100644 index 0000000..7eaecdf Binary files /dev/null and b/media/cam0_test.mp4 differ diff --git a/media/cam1_kpts.gif b/media/cam1_kpts.gif new file mode 100644 index 0000000..b06d1c4 Binary files /dev/null and b/media/cam1_kpts.gif differ diff --git a/media/cam1_test.mp4 b/media/cam1_test.mp4 new file mode 100644 index 0000000..2978854 Binary files /dev/null and b/media/cam1_test.mp4 differ diff --git a/media/keypoints_ids.png b/media/keypoints_ids.png new file mode 100644 index 0000000..c62b37e Binary files /dev/null and b/media/keypoints_ids.png differ diff --git a/media/pose2.gif b/media/pose2.gif new file mode 100644 index 0000000..0c84c7c Binary files /dev/null and b/media/pose2.gif differ diff --git a/show_3d_pose.py b/show_3d_pose.py new file mode 100644 index 0000000..08e114f --- /dev/null +++ b/show_3d_pose.py @@ -0,0 +1,73 @@ +import numpy as np +import matplotlib.pyplot as plt +from utils import DLT +plt.style.use('seaborn') + + +pose_keypoints = np.array([16, 14, 12, 11, 13, 15, 24, 23, 25, 26, 27, 28]) + +def read_keypoints(filename): + fin = open(filename, 'r') + + kpts = [] + while(True): + line = fin.readline() + if line == '': break + + line = line.split() + line = [float(s) for s in line] + + line = np.reshape(line, (len(pose_keypoints), -1)) + kpts.append(line) + + kpts = np.array(kpts) + return kpts + + +def visualize_3d(p3ds): + + """Now visualize in 3D""" + torso = [[0, 1] , [1, 7], [7, 6], [6, 0]] + armr = [[1, 3], [3, 5]] + arml = [[0, 2], [2, 4]] + legr = [[6, 8], [8, 10]] + legl = [[7, 9], [9, 11]] + body = [torso, arml, armr, legr, legl] + colors = ['red', 'blue', 'green', 'black', 'orange'] + + from mpl_toolkits.mplot3d import Axes3D + + fig = plt.figure() + ax = fig.add_subplot(111, projection='3d') + + for framenum, kpts3d in enumerate(p3ds): + if framenum%2 == 0: continue #skip every 2nd frame + for bodypart, part_color in zip(body, colors): + for _c in bodypart: + ax.plot(xs = [kpts3d[_c[0],0], kpts3d[_c[1],0]], ys = [kpts3d[_c[0],1], kpts3d[_c[1],1]], zs = [kpts3d[_c[0],2], kpts3d[_c[1],2]], linewidth = 4, c = part_color) + + #uncomment these if you want scatter plot of keypoints and their indices. + # for i in range(12): + # #ax.text(kpts3d[i,0], kpts3d[i,1], kpts3d[i,2], str(i)) + # #ax.scatter(xs = kpts3d[i:i+1,0], ys = kpts3d[i:i+1,1], zs = kpts3d[i:i+1,2]) + + + #ax.set_axis_off() + ax.set_xticks([]) + ax.set_yticks([]) + ax.set_zticks([]) + + ax.set_xlim3d(-10, 10) + ax.set_xlabel('x') + ax.set_ylim3d(-10, 10) + ax.set_ylabel('y') + ax.set_zlim3d(-10, 10) + ax.set_zlabel('z') + plt.pause(0.1) + ax.cla() + + +if __name__ == '__main__': + + p3ds = read_keypoints('kpts_3d.dat') + visualize_3d(p3ds) diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..c070f97 --- /dev/null +++ b/utils.py @@ -0,0 +1,106 @@ +import numpy as np + + +def _make_homogeneous_rep_matrix(R, t): + P = np.zeros((4,4)) + P[:3,:3] = R + P[:3, 3] = t.reshape(3) + P[3,3] = 1 + return P + +#direct linear transform +def DLT(P1, P2, point1, point2): + + A = [point1[1]*P1[2,:] - P1[1,:], + P1[0,:] - point1[0]*P1[2,:], + point2[1]*P2[2,:] - P2[1,:], + P2[0,:] - point2[0]*P2[2,:] + ] + A = np.array(A).reshape((4,4)) + #print('A: ') + #print(A) + + B = A.transpose() @ A + from scipy import linalg + U, s, Vh = linalg.svd(B, full_matrices = False) + + #print('Triangulated point: ') + #print(Vh[3,0:3]/Vh[3,3]) + return Vh[3,0:3]/Vh[3,3] + +def read_camera_parameters(camera_id): + + inf = open('camera_parameters/c' + str(camera_id) + '.dat', 'r') + + cmtx = [] + dist = [] + + line = inf.readline() + for _ in range(3): + line = inf.readline().split() + line = [float(en) for en in line] + cmtx.append(line) + + line = inf.readline() + line = inf.readline().split() + line = [float(en) for en in line] + dist.append(line) + + return np.array(cmtx), np.array(dist) + +def read_rotation_translation(camera_id, savefolder = 'camera_parameters/'): + + inf = open(savefolder + 'rot_trans_c'+ str(camera_id) + '.dat', 'r') + + inf.readline() + rot = [] + trans = [] + for _ in range(3): + line = inf.readline().split() + line = [float(en) for en in line] + rot.append(line) + + inf.readline() + for _ in range(3): + line = inf.readline().split() + line = [float(en) for en in line] + trans.append(line) + + inf.close() + return np.array(rot), np.array(trans) + +def _convert_to_homogeneous(pts): + pts = np.array(pts) + if len(pts.shape) > 1: + w = np.ones((pts.shape[0], 1)) + return np.concatenate([pts, w], axis = 1) + else: + return np.concatenate([pts, [1]], axis = 0) + +def get_projection_matrix(camera_id): + + #read camera parameters + cmtx, dist = read_camera_parameters(camera_id) + rvec, tvec = read_rotation_translation(camera_id) + + #calculate projection matrix + P = cmtx @ _make_homogeneous_rep_matrix(rvec, tvec)[:3,:] + return P + +def write_keypoints_to_disk(filename, kpts): + fout = open(filename, 'w') + + for frame_kpts in kpts: + for kpt in frame_kpts: + if len(kpt) == 2: + fout.write(str(kpt[0]) + ' ' + str(kpt[1]) + ' ') + else: + fout.write(str(kpt[0]) + ' ' + str(kpt[1]) + ' ' + str(kpt[2]) + ' ') + + fout.write('\n') + fout.close() + +if __name__ == '__main__': + + P2 = get_projection_matrix(0) + P1 = get_projection_matrix(1)