Spaces:
Running
Running
File size: 6,076 Bytes
b9be4e6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
import cv2, os
import sys
sys.path.insert(0, 'FaceBoxesV2')
sys.path.insert(0, '..')
import numpy as np
import pickle
import importlib
from math import floor
from faceboxes_detector import *
import time
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
import torch.nn.functional as F
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
from networks import *
import data_utils
from functions import *
if not len(sys.argv) == 3:
print('Format:')
print('python lib/demo_video.py config_file video_file')
exit(0)
experiment_name = sys.argv[1].split('/')[-1][:-3]
data_name = sys.argv[1].split('/')[-2]
config_path = '.experiments.{}.{}'.format(data_name, experiment_name)
video_file = sys.argv[2]
my_config = importlib.import_module(config_path, package='PIPNet')
Config = getattr(my_config, 'Config')
cfg = Config()
cfg.experiment_name = experiment_name
cfg.data_name = data_name
save_dir = os.path.join('./snapshots', cfg.data_name, cfg.experiment_name)
meanface_indices, reverse_index1, reverse_index2, max_len = get_meanface(os.path.join('data', cfg.data_name, 'meanface.txt'), cfg.num_nb)
if cfg.backbone == 'resnet18':
resnet18 = models.resnet18(pretrained=cfg.pretrained)
net = Pip_resnet18(resnet18, cfg.num_nb, num_lms=cfg.num_lms, input_size=cfg.input_size, net_stride=cfg.net_stride)
elif cfg.backbone == 'resnet50':
resnet50 = models.resnet50(pretrained=cfg.pretrained)
net = Pip_resnet50(resnet50, cfg.num_nb, num_lms=cfg.num_lms, input_size=cfg.input_size, net_stride=cfg.net_stride)
elif cfg.backbone == 'resnet101':
resnet101 = models.resnet101(pretrained=cfg.pretrained)
net = Pip_resnet101(resnet101, cfg.num_nb, num_lms=cfg.num_lms, input_size=cfg.input_size, net_stride=cfg.net_stride)
else:
print('No such backbone!')
exit(0)
if cfg.use_gpu:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
else:
device = torch.device("cpu")
net = net.to(device)
weight_file = os.path.join(save_dir, 'epoch%d.pth' % (cfg.num_epochs-1))
state_dict = torch.load(weight_file, map_location=device)
net.load_state_dict(state_dict)
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
preprocess = transforms.Compose([transforms.Resize((cfg.input_size, cfg.input_size)), transforms.ToTensor(), normalize])
def demo_video(video_file, net, preprocess, input_size, net_stride, num_nb, use_gpu, device):
detector = FaceBoxesDetector('FaceBoxes', 'FaceBoxesV2/weights/FaceBoxesV2.pth', use_gpu, device)
my_thresh = 0.9
det_box_scale = 1.2
net.eval()
if video_file == 'camera':
cap = cv2.VideoCapture(0)
else:
cap = cv2.VideoCapture(video_file)
if (cap.isOpened()== False):
print("Error opening video stream or file")
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
count = 0
while(cap.isOpened()):
ret, frame = cap.read()
if ret == True:
detections, _ = detector.detect(frame, my_thresh, 1)
for i in range(len(detections)):
det_xmin = detections[i][2]
det_ymin = detections[i][3]
det_width = detections[i][4]
det_height = detections[i][5]
det_xmax = det_xmin + det_width - 1
det_ymax = det_ymin + det_height - 1
det_xmin -= int(det_width * (det_box_scale-1)/2)
# remove a part of top area for alignment, see paper for details
det_ymin += int(det_height * (det_box_scale-1)/2)
det_xmax += int(det_width * (det_box_scale-1)/2)
det_ymax += int(det_height * (det_box_scale-1)/2)
det_xmin = max(det_xmin, 0)
det_ymin = max(det_ymin, 0)
det_xmax = min(det_xmax, frame_width-1)
det_ymax = min(det_ymax, frame_height-1)
det_width = det_xmax - det_xmin + 1
det_height = det_ymax - det_ymin + 1
cv2.rectangle(frame, (det_xmin, det_ymin), (det_xmax, det_ymax), (0, 0, 255), 2)
det_crop = frame[det_ymin:det_ymax, det_xmin:det_xmax, :]
det_crop = cv2.resize(det_crop, (input_size, input_size))
inputs = Image.fromarray(det_crop[:,:,::-1].astype('uint8'), 'RGB')
inputs = preprocess(inputs).unsqueeze(0)
inputs = inputs.to(device)
lms_pred_x, lms_pred_y, lms_pred_nb_x, lms_pred_nb_y, outputs_cls, max_cls = forward_pip(net, inputs, preprocess, input_size, net_stride, num_nb)
lms_pred = torch.cat((lms_pred_x, lms_pred_y), dim=1).flatten()
tmp_nb_x = lms_pred_nb_x[reverse_index1, reverse_index2].view(cfg.num_lms, max_len)
tmp_nb_y = lms_pred_nb_y[reverse_index1, reverse_index2].view(cfg.num_lms, max_len)
tmp_x = torch.mean(torch.cat((lms_pred_x, tmp_nb_x), dim=1), dim=1).view(-1,1)
tmp_y = torch.mean(torch.cat((lms_pred_y, tmp_nb_y), dim=1), dim=1).view(-1,1)
lms_pred_merge = torch.cat((tmp_x, tmp_y), dim=1).flatten()
lms_pred = lms_pred.cpu().numpy()
lms_pred_merge = lms_pred_merge.cpu().numpy()
for i in range(cfg.num_lms):
x_pred = lms_pred_merge[i*2] * det_width
y_pred = lms_pred_merge[i*2+1] * det_height
cv2.circle(frame, (int(x_pred)+det_xmin, int(y_pred)+det_ymin), 1, (0, 0, 255), 2)
count += 1
#cv2.imwrite('video_out2/'+str(count)+'.jpg', frame)
cv2.imshow('1', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
else:
break
cap.release()
cv2.destroyAllWindows()
demo_video(video_file, net, preprocess, cfg.input_size, cfg.net_stride, cfg.num_nb, cfg.use_gpu, device)
|