File size: 4,574 Bytes
9871891
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c78e00d
5fb8331
 
 
9871891
5fb8331
c78e00d
 
86b1263
9871891
 
 
 
c78e00d
9871891
 
 
 
 
 
 
 
 
5fb8331
 
 
9871891
 
 
 
 
 
5fb8331
 
 
 
 
 
 
78da2be
24e62e4
e6f84e4
 
 
24e62e4
 
5fb8331
 
 
 
e80df4f
5fb8331
 
ce45613
 
 
 
 
 
ab3a45b
 
ce45613
5fb8331
 
9871891
c78e00d
5fb8331
 
 
 
 
ce45613
5fb8331
 
9871891
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# # Importing the requirements
# import warnings
# warnings.filterwarnings("ignore")

# import gradio as gr
# from src.video_model import describe_video


# # Video and text inputs for the interface
# video = gr.Video(label="Video")
# query = gr.Textbox(label="Question", placeholder="Enter your question here")

# # Output for the interface
# response = gr.Textbox(label="Predicted answer", show_label=True, show_copy_button=True)

# # Examples for the interface
# examples = [
#     [
#         "videos/2016-01-01_0100_US_KNBC_Channel_4_News_1867.16-1871.38_now.mp4",
#         "Here are some frames of a video. Describe this video in detail."
#     ],
#     [
#         "videos/2016-01-01_0200_US_KNBC_Channel_4_News_1329.12-1333.29_tonight.mp4",
#         "Here are some frames of a video. Describe this video in detail."
#     ],
#     [   "videos/2016-01-01_0830_US_KNBC_Tonight_Show_with_Jimmy_Fallon_725.45-729.76_tonight.mp4", 
#         "Here are some frames of a video. Describe this video in detail."
#     ]
# ]

# # Title, description, and article for the interface
# title = "GSoC Super Raid Annotator"
# description = "Annotate Videos"
# article = "<p style='text-align: center'><a href='https://github.com/OpenBMB/MiniCPM-V' target='_blank'>Model GitHub Repo</a> | <a href='https://huggingface.co/openbmb/MiniCPM-V-2_6' target='_blank'>Model Page</a></p>"


# # Launch the interface
# interface = gr.Interface(
#     fn=describe_video,
#     inputs=[video, query],
#     outputs=response,
#     examples=examples,
#     title=title,
#     description=description,
#     article=article,
#     theme="Soft",
#     allow_flagging="never",
# )
# interface.launch(debug=False)


import warnings
warnings.filterwarnings("ignore")
import gradio as gr
from src.video_model import describe_video  # Assuming this function processes the video and query

# --- Function to construct the final query --- 
def process_video_and_questions(video, sitting, hands, location, screen):
    query = "Describe this video in detail and answer the questions"
    additional_info = []
    if sitting:
        additional_info.append("Is the subject in the video standing or sitting?")
    if hands:
        additional_info.append("Is the subject holding any object in their hands, if so the hands are not free else they are free?")
    if location:
        additional_info.append("Is the subject present indoors or outdoors?")
    if screen:
        additional_info.append("Is the subject interacting with a screen in the background by facing the screen?")
    
    final_query = query + " " + " ".join(additional_info)
    # Assuming your describe_video function handles the video processing
    response = describe_video(video, final_query) 
    return response

# Video and text inputs for the interface
video = gr.Video(label="Video")

# Options as checkboxes
sitting = gr.Checkbox(label="Sitting/Standing")
hands = gr.Checkbox(label="Hands Free/Not Free")
location = gr.Checkbox(label="Indoors/Outdoors")
screen = gr.Checkbox(label="Screen Interaction")

# Output for the interface
response = gr.Textbox(label="Predicted answer", show_label=True, show_copy_button=True)

# Examples for the interface
examples = [
    [
        "videos/2016-01-01_0100_US_KNBC_Channel_4_News_1867.16-1871.38_now.mp4",
    ],
    [
        "videos/2016-01-01_0200_US_KNBC_Channel_4_News_1329.12-1333.29_tonight.mp4",
    ],
    [   "videos/2016-01-01_0830_US_KNBC_Tonight_Show_with_Jimmy_Fallon_725.45-729.76_tonight.mp4", 
    ]
]

# Title, description, and article for the interface
title = "GSoC Super Raid Annotator"
description = "Annotate Videos"
article = "<p style='text-align: center'><a href='https://github.com/OpenBMB/MiniCPM-V' target='_blank'>Model GitHub Repo</a> | <a href='https://huggingface.co/openbmb/MiniCPM-V-2_6' target='_blank'>Model Page</a></p>"

# Define your desired red color as an RGB tuple (values from 0 to 255) or a hex code
my_red_color = (255, 0, 0)  # Example for pure red 
# my_red_color = "#FF0000"  # You can also use a hex code

custom_theme = gr.themes.Soft(
    # Set the primary hue of the Soft theme to your red color
    primary_hue=red, 
    secondary_hue=red)

# Launch the interface
interface = gr.Interface(
    fn=process_video_and_questions, # Updated function to handle the query construction
    inputs=[video, sitting, hands, location, screen], 
    outputs=response,
    examples=examples,
    title=title,
    description=description,
    article=article,
    theme=custom_theme,
    allow_flagging="never",
)
interface.launch(debug=False)