Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,40 +1,10 @@
|
|
1 |
import warnings
|
2 |
warnings.filterwarnings("ignore")
|
3 |
import gradio as gr
|
4 |
-
import json
|
5 |
-
from pydantic import BaseModel
|
6 |
-
from typing import Dict
|
7 |
-
import csv
|
8 |
-
import os
|
9 |
from src.video_model import describe_video # Assuming this function processes the video and query
|
10 |
|
11 |
-
#
|
12 |
-
class VideoObservation(BaseModel):
|
13 |
-
is_subject_standing: int
|
14 |
-
is_subject_holding_object_in_hands: int
|
15 |
-
is_subject_present_indoors: int
|
16 |
-
is_subject_interacting_with_screen: int
|
17 |
-
|
18 |
-
# Function to save data to a CSV file and generate a download link
|
19 |
-
def save_to_csv(video_name: str, observation: VideoObservation, output_dir: str = "outputs") -> str:
|
20 |
-
if not os.path.exists(output_dir):
|
21 |
-
os.makedirs(output_dir)
|
22 |
-
|
23 |
-
csv_file = os.path.join(output_dir, "video_observations.csv")
|
24 |
-
|
25 |
-
with open(csv_file, mode='a', newline='') as file:
|
26 |
-
writer = csv.writer(file)
|
27 |
-
# Write the header if the file is empty
|
28 |
-
if file.tell() == 0:
|
29 |
-
writer.writerow(["video_name", "is_subject_standing", "is_subject_holding_object_in_hands", "is_subject_present_indoors", "is_subject_interacting_with_screen"])
|
30 |
-
# Write the data
|
31 |
-
writer.writerow([video_name, observation.is_subject_standing, observation.is_subject_holding_object_in_hands, observation.is_subject_present_indoors, observation.is_subject_interacting_with_screen])
|
32 |
-
|
33 |
-
return csv_file
|
34 |
-
|
35 |
-
# --- Function to process the video and questions, then save results to CSV ---
|
36 |
def process_video_and_questions(video, sitting, hands, location, screen):
|
37 |
-
video_name = video.split("/")[-1] # Extract video name from the path
|
38 |
query = "Describe this video in detail and answer the questions"
|
39 |
additional_info = []
|
40 |
if sitting:
|
@@ -48,26 +18,9 @@ def process_video_and_questions(video, sitting, hands, location, screen):
|
|
48 |
end_query = "Provide the results in JSON format with 0 being False and 1 being True"
|
49 |
final_query = query + " " + " ".join(additional_info)
|
50 |
final_prompt = final_query + " " + end_query
|
51 |
-
|
52 |
# Assuming your describe_video function handles the video processing
|
53 |
response = describe_video(video, final_prompt)
|
54 |
-
|
55 |
-
# Extract the JSON part from the response
|
56 |
-
start_index = response.find('{')
|
57 |
-
end_index = response.rfind('}') + 1
|
58 |
-
json_str = response[start_index:end_index]
|
59 |
-
|
60 |
-
try:
|
61 |
-
# Parse JSON string into a Pydantic model
|
62 |
-
observation = VideoObservation.parse_raw(json_str)
|
63 |
-
# Save to CSV and return the file path
|
64 |
-
csv_file = save_to_csv(video_name, observation)
|
65 |
-
except json.JSONDecodeError as e:
|
66 |
-
return f"Error parsing JSON: {e}", None
|
67 |
-
except Exception as e:
|
68 |
-
return f"An error occurred: {e}", None
|
69 |
-
|
70 |
-
return response, csv_file
|
71 |
|
72 |
# Video and text inputs for the interface
|
73 |
video = gr.Video(label="Video")
|
@@ -80,7 +33,6 @@ screen = gr.Checkbox(label="Screen Interaction")
|
|
80 |
|
81 |
# Output for the interface
|
82 |
response = gr.Textbox(label="Predicted answer", show_label=True, show_copy_button=True)
|
83 |
-
download_link = gr.File(label="Download CSV")
|
84 |
|
85 |
# Examples for the interface
|
86 |
examples = [
|
@@ -102,16 +54,17 @@ title = "GSoC Super Raid Annotator"
|
|
102 |
description = "Annotate Videos"
|
103 |
article = "<p style='text-align: center'><a href='https://github.com/OpenBMB/MiniCPM-V' target='_blank'>Model GitHub Repo</a> | <a href='https://huggingface.co/openbmb/MiniCPM-V-2_6' target='_blank'>Model Page</a></p>"
|
104 |
|
|
|
105 |
custom_theme = gr.themes.Soft(
|
|
|
106 |
primary_hue="red",
|
107 |
-
secondary_hue="red"
|
108 |
-
)
|
109 |
|
110 |
# Launch the interface
|
111 |
interface = gr.Interface(
|
112 |
-
fn=process_video_and_questions,
|
113 |
inputs=[video, sitting, hands, location, screen],
|
114 |
-
outputs=
|
115 |
examples=examples,
|
116 |
title=title,
|
117 |
description=description,
|
@@ -119,4 +72,4 @@ interface = gr.Interface(
|
|
119 |
theme=custom_theme,
|
120 |
allow_flagging="never",
|
121 |
)
|
122 |
-
interface.launch(debug=False)
|
|
|
1 |
import warnings
|
2 |
warnings.filterwarnings("ignore")
|
3 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
4 |
from src.video_model import describe_video # Assuming this function processes the video and query
|
5 |
|
6 |
+
# --- Function to construct the final query ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
def process_video_and_questions(video, sitting, hands, location, screen):
|
|
|
8 |
query = "Describe this video in detail and answer the questions"
|
9 |
additional_info = []
|
10 |
if sitting:
|
|
|
18 |
end_query = "Provide the results in JSON format with 0 being False and 1 being True"
|
19 |
final_query = query + " " + " ".join(additional_info)
|
20 |
final_prompt = final_query + " " + end_query
|
|
|
21 |
# Assuming your describe_video function handles the video processing
|
22 |
response = describe_video(video, final_prompt)
|
23 |
+
return response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
# Video and text inputs for the interface
|
26 |
video = gr.Video(label="Video")
|
|
|
33 |
|
34 |
# Output for the interface
|
35 |
response = gr.Textbox(label="Predicted answer", show_label=True, show_copy_button=True)
|
|
|
36 |
|
37 |
# Examples for the interface
|
38 |
examples = [
|
|
|
54 |
description = "Annotate Videos"
|
55 |
article = "<p style='text-align: center'><a href='https://github.com/OpenBMB/MiniCPM-V' target='_blank'>Model GitHub Repo</a> | <a href='https://huggingface.co/openbmb/MiniCPM-V-2_6' target='_blank'>Model Page</a></p>"
|
56 |
|
57 |
+
|
58 |
custom_theme = gr.themes.Soft(
|
59 |
+
# Set the primary hue of the Soft theme to your red color
|
60 |
primary_hue="red",
|
61 |
+
secondary_hue="red")
|
|
|
62 |
|
63 |
# Launch the interface
|
64 |
interface = gr.Interface(
|
65 |
+
fn=process_video_and_questions, # Updated function to handle the query construction
|
66 |
inputs=[video, sitting, hands, location, screen],
|
67 |
+
outputs=response,
|
68 |
examples=examples,
|
69 |
title=title,
|
70 |
description=description,
|
|
|
72 |
theme=custom_theme,
|
73 |
allow_flagging="never",
|
74 |
)
|
75 |
+
interface.launch(debug=False)
|