Spaces:

ManishThota
/

GSoC-Super-Rapid-Annotator

Runtime error

App Files Files Community

GSoC-Super-Rapid-Annotator / multi_video_app.py

ManishThota

Update multi_video_app.py

2a2b9f5 verified 10 months ago

raw

history blame

4.74 kB

	import warnings
	warnings.filterwarnings("ignore")
	import gradio as gr
	import re
	from typing import Dict, List
	import csv
	import os
	import torch
	from src.video_model import describe_video
	from src.utils import parse_string, parse_annotations



	# Function to save data to a CSV file
	def save_to_csv(observations: List[Dict], output_dir: str = "outputs") -> str:
	if not os.path.exists(output_dir):
	os.makedirs(output_dir)

	csv_file = os.path.join(output_dir, "video_observations.csv")

	with open(csv_file, mode='w', newline='') as file:
	writer = csv.writer(file)
	writer.writerow(["video_name", "standing", "hands.free", "indoors", "screen.interaction_yes"])
	for observation in observations:
	writer.writerow([
	observation['video_name'],
	observation['standing'],
	observation['hands.free'],
	observation['indoors'],
	observation['screen.interaction_yes']
	])

	return csv_file

	# Function to process a single video and return the observation data
	def process_single_video(video_path: str, sitting, hands, location, screen) -> Dict:
	video_name = os.path.basename(video_path) # Extract video name from the path
	query = "Describe this video in detail and answer the questions"
	additional_info = []
	if sitting:
	additional_info.append("Is the subject in the video standing or sitting?")
	if hands:
	additional_info.append("Is the subject holding any object in their hands, if so the hands are not free else they are free?")
	if location:
	additional_info.append("Is the subject present indoors or outdoors?")
	if screen:
	additional_info.append("Is the subject interacting with a screen in the background by facing the screen?")

	end_query = """Provide the results in <annotation> tags, where 0 indicates False, 1 indicates True, and None indicates that no information is present. Follow the below examples:
	<annotation>indoors: 0</annotation>
	<annotation>standing: 1</annotation>
	<annotation>hands.free: 0</annotation>
	<annotation>screen.interaction_yes: 0</annotation>
	"""

	final_query = query + " " + " ".join(additional_info)
	final_prompt = final_query + " " + end_query

	# Assuming your describe_video function handles the video processing
	final_response = describe_video(video_path, final_prompt)

	conditions = {
	'standing': (standing, 'standing: 1', 'standing: None'),
	'hands': (hands, 'hands.free: 1', 'hands.free: None'),
	'location': (location, 'indoors: 1', 'indoors: None'),
	'screen': (screen, 'screen.interaction_yes: 1', 'screen.interaction_yes: None')
	}

	for key, (condition, to_replace, replacement) in conditions.items():
	if not condition:
	final_response = final_response.replace(to_replace, replacement)

	return final_response



	# Function to process all videos in a folder
	def process_multiple_videos(video_files: List[str], sitting, hands, location, screen):
	all_observations = []

	for video_path in video_files:
	observation = process_single_video(video_path, sitting, hands, location, screen)
	if "error" not in observation:
	all_observations.append(observation)
	else:
	print(observation["error"]) # Log any errors

	# Clear GPU cache
	torch.cuda.empty_cache()

	# Save all observations to a CSV file and return the file path
	csv_file = save_to_csv(all_observations)
	return "Processing completed. Download the CSV file.", csv_file

	# Gradio interface
	def gradio_interface(video_files, sitting, hands, location, screen):
	return process_multiple_videos(video_files, sitting, hands, location, screen)

	# Inputs
	video_files = gr.File(file_count="multiple", file_types=["video"], label="Upload multiple videos")
	sitting = gr.Checkbox(label="Sitting/Standing")
	hands = gr.Checkbox(label="Hands Free/Not Free")
	location = gr.Checkbox(label="Indoors/Outdoors")
	screen = gr.Checkbox(label="Screen Interaction")

	# Outputs
	response = gr.Textbox(label="Status")
	download_link = gr.File(label="Download CSV")

	# Gradio interface setup
	interface = gr.Interface(
	fn=gradio_interface,
	inputs=[video_files, sitting, hands, location, screen],
	outputs=[response, download_link],
	title="Batch Video Annotation",
	description="Upload multiple videos and process them sequentially, saving the results to a downloadable CSV file.",
	theme=gr.themes.Soft(primary_hue="red", secondary_hue="red"),
	allow_flagging="never"
	)

	# Launch interface
	interface.launch(debug=False)