-
Notifications
You must be signed in to change notification settings - Fork 34
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
13 changed files
with
369 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,162 @@ | ||
import cv2 as cv | ||
import mediapipe as mp | ||
import numpy as np | ||
import sys | ||
from utils import DLT, get_projection_matrix, write_keypoints_to_disk | ||
|
||
mp_drawing = mp.solutions.drawing_utils | ||
mp_drawing_styles = mp.solutions.drawing_styles | ||
mp_pose = mp.solutions.pose | ||
|
||
frame_shape = [720, 1280] | ||
|
||
#add here if you need more keypoints | ||
pose_keypoints = [16, 14, 12, 11, 13, 15, 24, 23, 25, 26, 27, 28] | ||
|
||
def run_mp(input_stream1, input_stream2, P0, P1): | ||
#input video stream | ||
cap0 = cv.VideoCapture(input_stream1) | ||
cap1 = cv.VideoCapture(input_stream2) | ||
caps = [cap0, cap1] | ||
|
||
#set camera resolution if using webcam to 1280x720. Any bigger will cause some lag for hand detection | ||
for cap in caps: | ||
cap.set(3, frame_shape[1]) | ||
cap.set(4, frame_shape[0]) | ||
|
||
#create body keypoints detector objects. | ||
pose0 = mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) | ||
pose1 = mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) | ||
|
||
#containers for detected keypoints for each camera. These are filled at each frame. | ||
#This will run you into memory issue if you run the program without stop | ||
kpts_cam0 = [] | ||
kpts_cam1 = [] | ||
kpts_3d = [] | ||
while True: | ||
|
||
#read frames from stream | ||
ret0, frame0 = cap0.read() | ||
ret1, frame1 = cap1.read() | ||
|
||
if not ret0 or not ret1: break | ||
|
||
#crop to 720x720. | ||
#Note: camera calibration parameters are set to this resolution.If you change this, make sure to also change camera intrinsic parameters | ||
if frame0.shape[1] != 720: | ||
frame0 = frame0[:,frame_shape[1]//2 - frame_shape[0]//2:frame_shape[1]//2 + frame_shape[0]//2] | ||
frame1 = frame1[:,frame_shape[1]//2 - frame_shape[0]//2:frame_shape[1]//2 + frame_shape[0]//2] | ||
|
||
# the BGR image to RGB. | ||
frame0 = cv.cvtColor(frame0, cv.COLOR_BGR2RGB) | ||
frame1 = cv.cvtColor(frame1, cv.COLOR_BGR2RGB) | ||
|
||
# To improve performance, optionally mark the image as not writeable to | ||
# pass by reference. | ||
frame0.flags.writeable = False | ||
frame1.flags.writeable = False | ||
results0 = pose0.process(frame0) | ||
results1 = pose1.process(frame1) | ||
|
||
#reverse changes | ||
frame0.flags.writeable = True | ||
frame1.flags.writeable = True | ||
frame0 = cv.cvtColor(frame0, cv.COLOR_RGB2BGR) | ||
frame1 = cv.cvtColor(frame1, cv.COLOR_RGB2BGR) | ||
|
||
#check for keypoints detection | ||
frame0_keypoints = [] | ||
if results0.pose_landmarks: | ||
for i, landmark in enumerate(results0.pose_landmarks.landmark): | ||
if i not in pose_keypoints: continue #only save keypoints that are indicated in pose_keypoints | ||
pxl_x = landmark.x * frame0.shape[1] | ||
pxl_y = landmark.y * frame0.shape[0] | ||
pxl_x = int(round(pxl_x)) | ||
pxl_y = int(round(pxl_y)) | ||
cv.circle(frame0,(pxl_x, pxl_y), 3, (0,0,255), -1) #add keypoint detection points into figure | ||
kpts = [pxl_x, pxl_y] | ||
frame0_keypoints.append(kpts) | ||
else: | ||
#if no keypoints are found, simply fill the frame data with [-1,-1] for each kpt | ||
frame0_keypoints = [[-1, -1]]*len(pose_keypoints) | ||
|
||
#this will keep keypoints of this frame in memory | ||
kpts_cam0.append(frame0_keypoints) | ||
|
||
frame1_keypoints = [] | ||
if results1.pose_landmarks: | ||
for i, landmark in enumerate(results1.pose_landmarks.landmark): | ||
if i not in pose_keypoints: continue | ||
pxl_x = landmark.x * frame0.shape[1] | ||
pxl_y = landmark.y * frame0.shape[0] | ||
pxl_x = int(round(pxl_x)) | ||
pxl_y = int(round(pxl_y)) | ||
cv.circle(frame1,(pxl_x, pxl_y), 3, (0,0,255), -1) | ||
kpts = [pxl_x, pxl_y] | ||
frame1_keypoints.append(kpts) | ||
|
||
else: | ||
#if no keypoints are found, simply fill the frame data with [-1,-1] for each kpt | ||
frame1_keypoints = [[-1, -1]]*len(pose_keypoints) | ||
|
||
#update keypoints container | ||
kpts_cam1.append(frame1_keypoints) | ||
|
||
#calculate 3d position | ||
frame_p3ds = [] | ||
for uv1, uv2 in zip(frame0_keypoints, frame1_keypoints): | ||
if uv1[0] == -1 or uv2[0] == -1: | ||
_p3d = [-1, -1, -1] | ||
else: | ||
_p3d = DLT(P0, P1, uv1, uv2) #calculate 3d position of keypoint | ||
frame_p3ds.append(_p3d) | ||
|
||
''' | ||
This contains the 3d position of each keypoint in current frame. | ||
For real time application, this is what you want. | ||
''' | ||
frame_p3ds = np.array(frame_p3ds).reshape((12, 3)) | ||
kpts_3d.append(frame_p3ds) | ||
|
||
# uncomment these if you want to see the full keypoints detections | ||
# mp_drawing.draw_landmarks(frame0, results0.pose_landmarks, mp_pose.POSE_CONNECTIONS, | ||
# landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style()) | ||
# | ||
# mp_drawing.draw_landmarks(frame1, results1.pose_landmarks, mp_pose.POSE_CONNECTIONS, | ||
# landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style()) | ||
|
||
cv.imshow('cam1', frame1) | ||
cv.imshow('cam0', frame0) | ||
|
||
k = cv.waitKey(1) | ||
if k & 0xFF == 27: break #27 is ESC key. | ||
|
||
|
||
cv.destroyAllWindows() | ||
for cap in caps: | ||
cap.release() | ||
|
||
|
||
return np.array(kpts_cam0), np.array(kpts_cam1), np.array(kpts_3d) | ||
|
||
if __name__ == '__main__': | ||
|
||
#this will load the sample videos if no camera ID is given | ||
input_stream1 = 'media/cam0_test.mp4' | ||
input_stream2 = 'media/cam1_test.mp4' | ||
|
||
#put camera id as command line arguements | ||
if len(sys.argv) == 3: | ||
input_stream1 = int(sys.argv[1]) | ||
input_stream2 = int(sys.argv[2]) | ||
|
||
#get projection matrices | ||
P0 = get_projection_matrix(0) | ||
P1 = get_projection_matrix(1) | ||
|
||
kpts_cam0, kpts_cam1, kpts_3d = run_mp(input_stream1, input_stream2, P0, P1) | ||
|
||
#this will create keypoints file in current working folder | ||
write_keypoints_to_disk('kpts_cam0.dat', kpts_cam0) | ||
write_keypoints_to_disk('kpts_cam1.dat', kpts_cam1) | ||
write_keypoints_to_disk('kpts_3d.dat', kpts_3d) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
intrinsic: | ||
900.4857632254283 0.0 366.8661146698476 | ||
0.0 902.6176551471326 363.7146796254499 | ||
0.0 0.0 1.0 | ||
distortion: | ||
-0.04074323275039075 1.3719603634486155 -0.0027819018501480247 0.0011755031235693644 -5.637775169623254 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
intrinsic: | ||
926.7112344222677 0.0 366.86387719274677 | ||
0.0 928.7212583044412 358.17005894003324 | ||
0.0 0.0 1.0 | ||
distortion: | ||
-0.042736966121127366 1.355959158837944 -0.0024952648024986887 0.0033896734823038875 -5.316323475425649 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
R: | ||
0.03522596169240533 0.9989186047045775 -0.030343908744683592 | ||
-0.9954114357284269 0.0323669499742608 -0.09004695536460891 | ||
-0.08896743923434776 0.03337666436934931 0.9954751594247135 | ||
T: | ||
-3.7714497419971353 | ||
0.5952700116552851 | ||
14.753837487029038 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
R: | ||
0.01862128 0.72598112 0.68746248 | ||
-0.99830361 -0.02443645 0.05284666 | ||
0.05516482 -0.68728034 0.72429454 | ||
T: | ||
-8.97548292 | ||
1.56690522 | ||
22.88471505 |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
from utils import DLT | ||
plt.style.use('seaborn') | ||
|
||
|
||
pose_keypoints = np.array([16, 14, 12, 11, 13, 15, 24, 23, 25, 26, 27, 28]) | ||
|
||
def read_keypoints(filename): | ||
fin = open(filename, 'r') | ||
|
||
kpts = [] | ||
while(True): | ||
line = fin.readline() | ||
if line == '': break | ||
|
||
line = line.split() | ||
line = [float(s) for s in line] | ||
|
||
line = np.reshape(line, (len(pose_keypoints), -1)) | ||
kpts.append(line) | ||
|
||
kpts = np.array(kpts) | ||
return kpts | ||
|
||
|
||
def visualize_3d(p3ds): | ||
|
||
"""Now visualize in 3D""" | ||
torso = [[0, 1] , [1, 7], [7, 6], [6, 0]] | ||
armr = [[1, 3], [3, 5]] | ||
arml = [[0, 2], [2, 4]] | ||
legr = [[6, 8], [8, 10]] | ||
legl = [[7, 9], [9, 11]] | ||
body = [torso, arml, armr, legr, legl] | ||
colors = ['red', 'blue', 'green', 'black', 'orange'] | ||
|
||
from mpl_toolkits.mplot3d import Axes3D | ||
|
||
fig = plt.figure() | ||
ax = fig.add_subplot(111, projection='3d') | ||
|
||
for framenum, kpts3d in enumerate(p3ds): | ||
if framenum%2 == 0: continue #skip every 2nd frame | ||
for bodypart, part_color in zip(body, colors): | ||
for _c in bodypart: | ||
ax.plot(xs = [kpts3d[_c[0],0], kpts3d[_c[1],0]], ys = [kpts3d[_c[0],1], kpts3d[_c[1],1]], zs = [kpts3d[_c[0],2], kpts3d[_c[1],2]], linewidth = 4, c = part_color) | ||
|
||
#uncomment these if you want scatter plot of keypoints and their indices. | ||
# for i in range(12): | ||
# #ax.text(kpts3d[i,0], kpts3d[i,1], kpts3d[i,2], str(i)) | ||
# #ax.scatter(xs = kpts3d[i:i+1,0], ys = kpts3d[i:i+1,1], zs = kpts3d[i:i+1,2]) | ||
|
||
|
||
#ax.set_axis_off() | ||
ax.set_xticks([]) | ||
ax.set_yticks([]) | ||
ax.set_zticks([]) | ||
|
||
ax.set_xlim3d(-10, 10) | ||
ax.set_xlabel('x') | ||
ax.set_ylim3d(-10, 10) | ||
ax.set_ylabel('y') | ||
ax.set_zlim3d(-10, 10) | ||
ax.set_zlabel('z') | ||
plt.pause(0.1) | ||
ax.cla() | ||
|
||
|
||
if __name__ == '__main__': | ||
|
||
p3ds = read_keypoints('kpts_3d.dat') | ||
visualize_3d(p3ds) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
import numpy as np | ||
|
||
|
||
def _make_homogeneous_rep_matrix(R, t): | ||
P = np.zeros((4,4)) | ||
P[:3,:3] = R | ||
P[:3, 3] = t.reshape(3) | ||
P[3,3] = 1 | ||
return P | ||
|
||
#direct linear transform | ||
def DLT(P1, P2, point1, point2): | ||
|
||
A = [point1[1]*P1[2,:] - P1[1,:], | ||
P1[0,:] - point1[0]*P1[2,:], | ||
point2[1]*P2[2,:] - P2[1,:], | ||
P2[0,:] - point2[0]*P2[2,:] | ||
] | ||
A = np.array(A).reshape((4,4)) | ||
#print('A: ') | ||
#print(A) | ||
|
||
B = A.transpose() @ A | ||
from scipy import linalg | ||
U, s, Vh = linalg.svd(B, full_matrices = False) | ||
|
||
#print('Triangulated point: ') | ||
#print(Vh[3,0:3]/Vh[3,3]) | ||
return Vh[3,0:3]/Vh[3,3] | ||
|
||
def read_camera_parameters(camera_id): | ||
|
||
inf = open('camera_parameters/c' + str(camera_id) + '.dat', 'r') | ||
|
||
cmtx = [] | ||
dist = [] | ||
|
||
line = inf.readline() | ||
for _ in range(3): | ||
line = inf.readline().split() | ||
line = [float(en) for en in line] | ||
cmtx.append(line) | ||
|
||
line = inf.readline() | ||
line = inf.readline().split() | ||
line = [float(en) for en in line] | ||
dist.append(line) | ||
|
||
return np.array(cmtx), np.array(dist) | ||
|
||
def read_rotation_translation(camera_id, savefolder = 'camera_parameters/'): | ||
|
||
inf = open(savefolder + 'rot_trans_c'+ str(camera_id) + '.dat', 'r') | ||
|
||
inf.readline() | ||
rot = [] | ||
trans = [] | ||
for _ in range(3): | ||
line = inf.readline().split() | ||
line = [float(en) for en in line] | ||
rot.append(line) | ||
|
||
inf.readline() | ||
for _ in range(3): | ||
line = inf.readline().split() | ||
line = [float(en) for en in line] | ||
trans.append(line) | ||
|
||
inf.close() | ||
return np.array(rot), np.array(trans) | ||
|
||
def _convert_to_homogeneous(pts): | ||
pts = np.array(pts) | ||
if len(pts.shape) > 1: | ||
w = np.ones((pts.shape[0], 1)) | ||
return np.concatenate([pts, w], axis = 1) | ||
else: | ||
return np.concatenate([pts, [1]], axis = 0) | ||
|
||
def get_projection_matrix(camera_id): | ||
|
||
#read camera parameters | ||
cmtx, dist = read_camera_parameters(camera_id) | ||
rvec, tvec = read_rotation_translation(camera_id) | ||
|
||
#calculate projection matrix | ||
P = cmtx @ _make_homogeneous_rep_matrix(rvec, tvec)[:3,:] | ||
return P | ||
|
||
def write_keypoints_to_disk(filename, kpts): | ||
fout = open(filename, 'w') | ||
|
||
for frame_kpts in kpts: | ||
for kpt in frame_kpts: | ||
if len(kpt) == 2: | ||
fout.write(str(kpt[0]) + ' ' + str(kpt[1]) + ' ') | ||
else: | ||
fout.write(str(kpt[0]) + ' ' + str(kpt[1]) + ' ' + str(kpt[2]) + ' ') | ||
|
||
fout.write('\n') | ||
fout.close() | ||
|
||
if __name__ == '__main__': | ||
|
||
P2 = get_projection_matrix(0) | ||
P1 = get_projection_matrix(1) |