Spaces:

Sapphire-356
/

Video2MC

Running

App Files Files Community

Video2MC / joints_detectors /Alphapose /yolo /cam_demo.py

Sapphire-356

Change to the CPU version

aa34300 almost 2 years ago

raw

history blame contribute delete

4.65 kB

	from __future__ import division
	import time
	import torch
	import torch.nn as nn
	from torch.autograd import Variable
	import numpy as np
	import cv2
	from .util import *
	from .darknet import Darknet
	from .preprocess import prep_image, inp_to_image
	import pandas as pd
	import random
	import argparse
	import pickle as pkl

	def get_test_input(input_dim, CUDA):
	img = cv2.imread("imgs/messi.jpg")
	img = cv2.resize(img, (input_dim, input_dim))
	img_ = img[:,:,::-1].transpose((2,0,1))
	img_ = img_[np.newaxis,:,:,:]/255.0
	img_ = torch.from_numpy(img_).float()
	img_ = Variable(img_)

	if CUDA:
	img_ = img_

	return img_

	def prep_image(img, inp_dim):
	"""
	Prepare image for inputting to the neural network.

	Returns a Variable
	"""

	orig_im = img
	dim = orig_im.shape[1], orig_im.shape[0]
	img = cv2.resize(orig_im, (inp_dim, inp_dim))
	img_ = img[:,:,::-1].transpose((2,0,1)).copy()
	img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
	return img_, orig_im, dim

	def write(x, img):
	c1 = tuple(x[1:3].int())
	c2 = tuple(x[3:5].int())
	cls = int(x[-1])
	label = "{0}".format(classes[cls])
	color = random.choice(colors)
	cv2.rectangle(img, c1, c2,color, 1)
	t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
	c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
	cv2.rectangle(img, c1, c2,color, -1)
	cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1);
	return img

	def arg_parse():
	"""
	Parse arguements to the detect module

	"""


	parser = argparse.ArgumentParser(description='YOLO v3 Cam Demo')
	parser.add_argument("--confidence", dest = "confidence", help = "Object Confidence to filter predictions", default = 0.25)
	parser.add_argument("--nms_thresh", dest = "nms_thresh", help = "NMS Threshhold", default = 0.4)
	parser.add_argument("--reso", dest = 'reso', help =
	"Input resolution of the network. Increase to increase accuracy. Decrease to increase speed",
	default = "160", type = str)
	return parser.parse_args()



	if __name__ == '__main__':
	cfgfile = "cfg/yolov3-spp.cfg"
	weightsfile = "yolov3-spp.weights"
	num_classes = 80

	args = arg_parse()
	confidence = float(args.confidence)
	nms_thesh = float(args.nms_thresh)
	start = 0
	CUDA = torch.cuda.is_available()




	num_classes = 80
	bbox_attrs = 5 + num_classes

	model = Darknet(cfgfile)
	model.load_weights(weightsfile)

	model.net_info["height"] = args.reso
	inp_dim = int(model.net_info["height"])

	assert inp_dim % 32 == 0
	assert inp_dim > 32

	if CUDA:
	model

	model.eval()

	videofile = 'video.avi'

	cap = cv2.VideoCapture(0)

	assert cap.isOpened(), 'Cannot capture source'

	frames = 0
	start = time.time()
	while cap.isOpened():

	ret, frame = cap.read()
	if ret:

	img, orig_im, dim = prep_image(frame, inp_dim)

	# im_dim = torch.FloatTensor(dim).repeat(1,2)


	if CUDA:
	im_dim = im_dim
	img = img


	output = model(Variable(img), CUDA)
	output = write_results(output, confidence, num_classes, nms = True, nms_conf = nms_thesh)

	if type(output) == int:
	frames += 1
	print("FPS of the video is {:5.2f}".format( frames / (time.time() - start)))
	cv2.imshow("frame", orig_im)
	key = cv2.waitKey(1)
	if key & 0xFF == ord('q'):
	break
	continue



	output[:,1:5] = torch.clamp(output[:,1:5], 0.0, float(inp_dim))/inp_dim

	# im_dim = im_dim.repeat(output.size(0), 1)
	output[:,[1,3]] *= frame.shape[1]
	output[:,[2,4]] *= frame.shape[0]


	classes = load_classes('data/coco.names')
	colors = pkl.load(open("pallete", "rb"))

	list(map(lambda x: write(x, orig_im), output))


	cv2.imshow("frame", orig_im)
	key = cv2.waitKey(1)
	if key & 0xFF == ord('q'):
	break
	frames += 1
	print("FPS of the video is {:5.2f}".format( frames / (time.time() - start)))


	else:
	break