Spaces:

ManishThota
/

GSoC-Super-Rapid-Annotator

Runtime error

App Files Files Community

ManishThota commited on Aug 21, 2024

Commit

1e38ac4

verified ·

1 Parent(s): 79e8a3d

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -40

app.py CHANGED Viewed

@@ -7,38 +7,28 @@ from src.utils import parse_string, parse_annotations
 import os
 # --- Function to construct the final query ---
-def process_video_and_questions(video, sitting, hands, location, screen):
     # Extract the video name (filename)
     video_name = os.path.basename(video)
     # Construct the query with the video name included
     query = f"Answer the questions from the video\n"
     additional_info = []
-    # Initialize placeholders for annotations with None by default
-    annotations = {
-        "indoors": None,
-        "standing": None,
-        "hands.free": None,
-        "screen.interaction_yes": None
-    }
-    if sitting:
-        additional_info.append("Is the subject in the video standing or sitting?")
-        annotations["standing"] = 0  # Default value if selected
     if hands:
-        additional_info.append("Is the subject holding any object in their hands, if so the hands are not free else they are free?")
-        annotations["hands.free"] = 0  # Default value if selected
     if location:
-        additional_info.append("Is the subject present indoors or outdoors?")
-        annotations["indoors"] = 0  # Default value if selected
     if screen:
-        additional_info.append("Is the subject interacting with a screen in the background by facing the screen?")
-        annotations["screen.interaction_yes"] = 0  # Default value if selected
     end_query = """Provide the results in <annotation> tags, where 0 indicates False, 1 indicates True, and None indicates that no information is present. Follow the below examples\n:
         <annotation>indoors: 0</annotation>
@@ -52,21 +42,14 @@ def process_video_and_questions(video, sitting, hands, location, screen):
     # Assuming your describe_video function handles the video processing
     response = describe_video(video, final_prompt)
-    # Parse the response and update the corresponding annotations
-    for line in response.split('\n'):
-        if '<annotation>' in line:
-            key_value = line.replace('<annotation>', '').replace('</annotation>', '').strip().split(': ')
-            if len(key_value) == 2:
-                key, value = key_value
-                annotations[key] = value
-    # Construct the final response with all annotations
-    final_response = f"<video_name>{video_name}</video_name>\n"
-    for key, value in annotations.items():
-        final_response += f"<annotation>{key}: {value}</annotation>\n"
-    return final_response
 def output_to_csv(final_response):
@@ -82,6 +65,8 @@ def output_to_csv(final_response):
     # Combine the video name and annotation dictionary into a single row
     df = pd.DataFrame([{'video_name': video_name, **annotations_dict}])
     return df
@@ -117,9 +102,9 @@ with gr.Blocks(theme=custom_theme) as demo:
     with gr.Row():
         with gr.Column():
             video = gr.Video(label="Video")
-            sitting = gr.Checkbox(label="Sitting/Standing")
-            hands = gr.Checkbox(label="Hands Free/Not Free")
-            location = gr.Checkbox(label="Indoors/Outdoors")
             screen = gr.Checkbox(label="Screen Interaction")
             submit_btn = gr.Button("Generate Annotations")
             generate_csv_btn = gr.Button("Generate CSV")
@@ -131,7 +116,7 @@ with gr.Blocks(theme=custom_theme) as demo:
     # Event handling for the Submit button
     submit_btn.click(
         fn=process_video_and_questions,
-        inputs=[video, sitting, hands, location, screen],
         outputs=response
     )
@@ -142,6 +127,6 @@ with gr.Blocks(theme=custom_theme) as demo:
         outputs=csv_output
     )
-    gr.Examples(examples=examples, inputs=[video, sitting, hands, location, screen])
-demo.launch(debug=False)

 import os
+def parse_response()
 # --- Function to construct the final query ---
+def process_video_and_questions(video, standing, hands, location, screen):
     # Extract the video name (filename)
     video_name = os.path.basename(video)
     # Construct the query with the video name included
     query = f"Answer the questions from the video\n"
     additional_info = []
+    if standing:
+        additional_info.append("Is the subject in the video standing or sitting?\n")
+        standing_flag = True
     if hands:
+        additional_info.append("Is the subject holding any object in their hands?\n")
+        hands_flag = True
     if location:
+        additional_info.append("Is the subject present indoors?\n")
+        location_flag = True
     if screen:
+        additional_info.append("Is the subject interacting with a screen in the background by facing the screen?\n")
+        screen_flag = True
     end_query = """Provide the results in <annotation> tags, where 0 indicates False, 1 indicates True, and None indicates that no information is present. Follow the below examples\n:
         <annotation>indoors: 0</annotation>
     # Assuming your describe_video function handles the video processing
     response = describe_video(video, final_prompt)
+    final_response = f"<video_name>{video_name}</video_name>" + " \n" + response
+    if standing_flag == False:
+        final_response.replace('standing: 1', 'standing: None')
+    return final_response
 def output_to_csv(final_response):
     # Combine the video name and annotation dictionary into a single row
     df = pd.DataFrame([{'video_name': video_name, **annotations_dict}])
     return df
     with gr.Row():
         with gr.Column():
             video = gr.Video(label="Video")
+            standing = gr.Checkbox(label="Standing")
+            hands = gr.Checkbox(label="Hands Free")
+            location = gr.Checkbox(label="Indoors")
             screen = gr.Checkbox(label="Screen Interaction")
             submit_btn = gr.Button("Generate Annotations")
             generate_csv_btn = gr.Button("Generate CSV")
     # Event handling for the Submit button
     submit_btn.click(
         fn=process_video_and_questions,
+        inputs=[video, standing, hands, location, screen],
         outputs=response
     )
         outputs=csv_output
     )
+    gr.Examples(examples=examples, inputs=[video, standing, hands, location, screen])
+demo.launch(debug=False)