import argparse
import os
import sys
import time
import numpy as np
from PIL import Image, ImageOps
from torchvision import transforms
from tqdm import tqdm
import cv2
sys.path.append("./")
from img2pose import img2poseModel
from model_loader import load_model
def init_model(pretrained_path, threed_68_points, pose_stddev, pose_mean, depth):
img2pose_model = img2poseModel(
depth,
200,
1400,
pose_mean=np.load(pose_mean),
pose_stddev=np.load(pose_stddev),
threed_68_points=np.load(threed_68_points),
)
load_model(
img2pose_model.fpn_model,
pretrained_path,
cpu_mode=str(img2pose_model.device) == "cpu",
model_only=True,
)
img2pose_model.evaluate()
return img2pose_model
def bbox_voting(bboxes, iou_thresh=0.6):
# bboxes: a numpy array of N*5 size representing N boxes;
# for each box, it is represented as [x1, y1, x2, y2, s]
# iou_thresh: group bounding boxes if their overlap is > threshold.
bboxes = np.asarray(bboxes)
order = bboxes[:, 4].ravel().argsort()[::-1]
bboxes = bboxes[order, :]
areas = (bboxes[:, 2] - bboxes[:, 0] + 1) * (bboxes[:, 3] - bboxes[:, 1] + 1)
voted_bboxes = np.zeros([0, 5])
while bboxes.shape[0] > 0:
xx1 = np.maximum(bboxes[0, 0], bboxes[:, 0])
yy1 = np.maximum(bboxes[0, 1], bboxes[:, 1])
xx2 = np.minimum(bboxes[0, 2], bboxes[:, 2])
yy2 = np.minimum(bboxes[0, 3], bboxes[:, 3])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
overlaps = inter / (areas[0] + areas[:] - inter)
merge_indexs = np.where(overlaps >= iou_thresh)[0]
if merge_indexs.shape[0] == 0:
bboxes = np.delete(bboxes, np.array([0]), 0)
areas = np.delete(areas, np.array([0]), 0)
continue
bboxes_accu = bboxes[merge_indexs, :]
bboxes = np.delete(bboxes, merge_indexs, 0)
areas = np.delete(areas, merge_indexs, 0)
# generate a new box by score voting and box voting
bbox = np.zeros((1, 5))
box_weights = (bboxes_accu[:, -1] / max(bboxes_accu[:, -1])) * overlaps[
merge_indexs
]
bboxes_accu[:, 0:4] = bboxes_accu[:, 0:4] * np.tile(
box_weights.reshape((-1, 1)), (1, 4)
)
bbox[:, 0:4] = np.sum(bboxes_accu[:, 0:4], axis=0) / (np.sum(box_weights))
bbox[0, 4] = np.sum(bboxes_accu[:, 4] * box_weights)
voted_bboxes = np.row_stack((voted_bboxes, bbox))
return voted_bboxes
def get_M_homo(R, t):
M = [[R[0][0], R[0][1], R[0][2], t[0]],
[R[1][0], R[1][1], R[1][2], t[1]],
[R[2][0], R[2][1], R[2][2], t[2]],
[0, 0, 0, 1]]
M = np.linalg.inv(np.array(M))
r = M[:3, :3]
R_1X3 = np.zeros((1, 3))
cv2.Rodrigues(r, R_1X3)
t = np.array([M[0][3], M[1][3], M[2][3]])
return R_1X3, t
def show_xyz(img, point_3d, rotation_vector, translation_vector, ipm, dpv):
R_3X3 = np.zeros((3, 3))
cv2.Rodrigues(rotation_vector, R_3X3)
rotation_vector, translation_vector = get_M_homo(R_3X3, translation_vector)
gap = 1
x_3d = np.array([point_3d[0] + gap, point_3d[1], point_3d[2]], dtype=np.float64)
y_3d = np.array([point_3d[0], point_3d[1] + gap, point_3d[2]], dtype=np.float64)
z_3d = np.array([point_3d[0], point_3d[1], point_3d[2] + gap], dtype=np.float64)
# print(x_3d.dtype)
# print(rotation_vector.dtype)
# print(translation_vector.dtype)
# print(ipm.dtype)
# print(dpv.dtype)
(x_3d, jacobian) = cv2.projectPoints(x_3d, rotation_vector, translation_vector, ipm, dpv)
(y_3d, jacobian) = cv2.projectPoints(y_3d, rotation_vector, translation_vector, ipm, dpv)
(z_3d, jacobian) = cv2.projectPoints(z_3d, rotation_vector, translation_vector, ipm, dpv)
(origin_coordinates, jacobian) = cv2.projectPoints(np.array(point_3d, dtype=np.float64), rotation_vector, translation_vector, ipm, dpv)
x_coordinates = (int(x_3d[0][0][0]), int(x_3d[0][0][1]))
y_coordinates = (int(y_3d[0][0][0]), int(y_3d[0][0][1]))
z_coordinates = (int(z_3d[0][0][0]), int(z_3d[0][0][1]))
origin_coordinates = (int(origin_coordinates[0][0][0]), int(origin_coordinates[0][0][1]))
# print(origin_coordinates)
# print(x_coordinates)
cv2.line(img, origin_coordinates, x_coordinates, (0, 0, 255), 2)
cv2.putText(img, "X", x_coordinates, cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
cv2.line(img, origin_coordinates, y_coordinates, (0, 255, 0), 2)
cv2.putText(img, "Y", y_coordinates, cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
cv2.line(img, origin_coordinates, z_coordinates, (255, 0, 0), 2)
cv2.putText(img, "Z", z_coordinates, cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)
def main():
cap = cv2.VideoCapture("./datasets/123.avi") # "./datasets/123.avi"
model = init_model(pretrained_path, threed_68_points, pose_stddev, pose_mean, depth)
while 1:
success, img_bgr = cap.read()
# img_bgr = cv2.imread("./datasets/112.png")
if not success:
break
count1 = cv2.getTickCount()
img = img_bgr[:, :, ::-1]
run_img = img.copy()
transform = transforms.Compose([transforms.ToTensor()])
res = model.predict([transform(run_img)])
bboxes = []
poses = []
# print("src", res[0])
for i in range(len(res[0]["scores"])):
bbox = res[0]["boxes"].cpu().numpy()[i].astype("int")
score = res[0]["scores"].cpu().numpy()[i]
pose = res[0]["dofs"].cpu().numpy()[i]
poses.append(pose)
bboxes.append(np.append(bbox, score))
if np.ndim(bboxes) == 1 and len(bboxes) > 0:
bboxes = bboxes[np.newaxis, :]
# print(bboxes)
if bboxes == []:
continue
# bboxes = bbox_voting(bboxes, 0.6)
# print("dest", bboxes)
for i, [x1,y1,x2,y2,s] in enumerate(bboxes):
if s > 0.6:
cv2.rectangle(img_bgr, (int(x1), int(y1)), (int(x2), int(y2)), (255, 0, 0))
cv2.putText(img_bgr, str(int(s*1000)), (int(x1), int(y1)), cv2.FONT_HERSHEY_PLAIN, 2.0, (0, 0, 255))
show_xyz(img_bgr, (0, 0, 0), np.array([poses[i][0], poses[i][1], poses[i][2]], dtype=np.float64),
np.array([poses[i][3], poses[i][4], poses[i][5]], dtype=np.float64), ipm, dpv)
print(poses[i])
count2 = cv2.getTickCount()
fps = cv2.getTickFrequency() / (count2 - count1)
cv2.putText(img_bgr, str(fps), (20, 20), cv2.FONT_HERSHEY_PLAIN, 2.0, (0, 0, 255))
cv2.imshow('video', img_bgr)
# cv2.waitKey(0)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# ipm:相机内参 dpv:相机畸变参数
ipm = np.array([[738.77113867, 0., 309.55582235], [0., 738.46009689, 220.83418211], [0, 0, 1]])
dpv = np.array([0.0573193604, -0.37914004, -0.00216807948, 0.000132759817, 0.923696829])
depth = 18 # 18, 50, 101
pose_mean = "./models/WIDER_train_pose_mean_v1.npy"
pose_stddev = "./models/WIDER_train_pose_stddev_v1.npy"
threed_68_points = "./pose_references/reference_3d_68_points_trans.npy"
pretrained_path = "models/img2pose_v1.pth"
if __name__ == "__main__":
main()
以上面的main.py为运行起点,img2pose代码主要调用逻辑如下所示(可配合这里一起理解)。
本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系:hwhale#tublm.com(使用前将#替换为@)