Spaces:

reab5555
/

Owlv2-Video-Object-Detection

Paused

App Files Files Community

Owlv2-Video-Object-Detection / app.py

reab5555

Update app.py

7d47fdc verified 12 months ago

raw

history blame

6.99 kB

	import spaces
	import gradio as gr
	import cv2
	from PIL import Image, ImageDraw, ImageFont
	import torch
	from transformers import Owlv2Processor, Owlv2ForObjectDetection
	import numpy as np
	import os
	import matplotlib.pyplot as plt
	from io import BytesIO
	import tempfile

	# Check if CUDA is available, otherwise use CPU
	device = 'cuda' if torch.cuda.is_available() else 'cpu'

	processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16")
	model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16").to(device)

	@spaces.GPU(duration=120)
	def process_video(video_path, target, progress=gr.Progress()):
	if video_path is None:
	return None, None, "Error: No video uploaded"

	if not os.path.exists(video_path):
	return None, None, f"Error: Video file not found at {video_path}"

	cap = cv2.VideoCapture(video_path)
	if not cap.isOpened():
	return None, None, f"Error: Unable to open video file at {video_path}"

	frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
	original_fps = int(cap.get(cv2.CAP_PROP_FPS))
	output_fps = 1
	frame_duration = 1 / output_fps
	video_duration = frame_count / original_fps

	processed_frames = []
	frame_scores = []

	for time in progress.tqdm(np.arange(0, video_duration, frame_duration)):
	frame_number = int(time * original_fps)
	cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
	ret, img = cap.read()
	if not ret:
	break

	pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))

	# Process single image
	inputs = processor(text=[target], images=pil_img, return_tensors="pt", padding=True).to(device)
	outputs = model(**inputs)

	target_sizes = torch.Tensor([pil_img.size[::-1]])
	results = processor.post_process_object_detection(outputs, target_sizes=target_sizes)

	draw = ImageDraw.Draw(pil_img)
	max_score = 0

	try:
	font = ImageFont.truetype("arial.ttf", 40)
	except IOError:
	font = ImageFont.load_default()

	boxes, scores, labels = results[0]["boxes"], results[0]["scores"], results[0]["labels"]

	for box, score, label in zip(boxes, scores, labels):
	if score.item() >= 0.25:
	box = [round(i, 2) for i in box.tolist()]
	object_label = target
	confidence = round(score.item(), 3)
	annotation = f"{object_label}: {confidence}"

	draw.rectangle(box, outline="red", width=2)
	text_position = (box[0], box[1] - 30)
	draw.text(text_position, annotation, fill="white", font=font)

	max_score = max(max_score, confidence)

	processed_frames.append(np.array(pil_img))
	frame_scores.append(max_score)

	cap.release()
	return processed_frames, frame_scores, None

	def create_heatmap(frame_scores, current_frame):
	plt.figure(figsize=(12, 3))
	plt.imshow([frame_scores], cmap='hot_r', aspect='auto') # 'hot_r' for reversed hot colormap
	cbar = plt.colorbar(label='Confidence')
	cbar.ax.yaxis.set_ticks_position('left')
	cbar.ax.yaxis.set_label_position('left')
	plt.title('Object Detection Heatmap')
	plt.xlabel('Frame')
	plt.yticks([])

	# Add more frame numbers on x-axis
	num_frames = len(frame_scores)
	step = max(1, num_frames // 10) # Show at most 10 frame numbers
	frame_numbers = range(0, num_frames, step)
	plt.xticks(frame_numbers, [str(i) for i in frame_numbers])

	# Add vertical line for current frame
	plt.axvline(x=current_frame, color='blue', linestyle='--', linewidth=2)

	plt.tight_layout()

	with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmp_file:
	plt.savefig(tmp_file.name, format='png', dpi=400, bbox_inches='tight')
	plt.close()

	return tmp_file.name

	def load_sample_frame(video_path):
	cap = cv2.VideoCapture(video_path)
	if not cap.isOpened():
	return None
	ret, frame = cap.read()
	cap.release()
	if not ret:
	return None
	frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	return frame_rgb

	def gradio_app():
	with gr.Blocks() as app:
	gr.Markdown("# Video Object Detection with Owlv2")

	video_input = gr.Video(label="Upload Video")
	target_input = gr.Textbox(label="Target Object", value="Elephant")
	frame_slider = gr.Slider(minimum=0, maximum=100, step=1, label="Frame", value=0)
	heatmap_output = gr.Image(label="Detection Heatmap")
	output_image = gr.Image(label="Processed Frame")
	error_output = gr.Textbox(label="Error Messages", visible=False)
	sample_video_frame = gr.Image(value=load_sample_frame("Drone Video of African Wildlife Wild Botswan.mp4"), label="Drone Video of African Wildlife Wild Botswan by wildimagesonline.com - Sample Video Frame")
	use_sample_button = gr.Button("Use Sample Video")
	progress_bar = gr.Progress()

	processed_frames = gr.State([])
	frame_scores = gr.State([])

	def process_and_update(video, target):
	frames, scores, error = process_video(video, target, progress_bar)
	if frames is not None:
	heatmap_path = create_heatmap(scores, 0) # Initial heatmap with current frame at 0
	return frames, scores, frames[0], heatmap_path, error, gr.Slider(maximum=len(frames) - 1, value=0)
	return None, None, None, None, error, gr.Slider(maximum=100, value=0)

	def update_frame_and_heatmap(frame_index, frames, scores):
	if frames and 0 <= frame_index < len(frames):
	heatmap_path = create_heatmap(scores, frame_index)
	return frames[frame_index], heatmap_path
	return None, None

	video_input.upload(process_and_update,
	inputs=[video_input, target_input],
	outputs=[processed_frames, frame_scores, output_image, heatmap_output, error_output, frame_slider])

	frame_slider.change(update_frame_and_heatmap,
	inputs=[frame_slider, processed_frames, frame_scores],
	outputs=[output_image, heatmap_output])

	def use_sample_video():
	sample_video_path = "Drone Video of African Wildlife Wild Botswan.mp4"
	return process_and_update(sample_video_path, "Elephant")

	use_sample_button.click(use_sample_video,
	inputs=None,
	outputs=[processed_frames, frame_scores, output_image, heatmap_output, error_output, frame_slider])

	# Layout
	with gr.Row():
	with gr.Column(scale=2):
	output_image
	with gr.Column(scale=1):
	sample_video_frame
	use_sample_button

	return app

	if __name__ == "__main__":
	app = gradio_app()
	app.launch(share=True)