Spaces:

ManishThota
/

GSoC-Super-Rapid-Annotator

Runtime error

App Files Files

xet

Community

ManishThota commited on Aug 21, 2024

Commit

79e8a3d

verified ·

1 Parent(s): f9f1c5f

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -47

app.py CHANGED Viewed

@@ -15,14 +15,30 @@ def process_video_and_questions(video, sitting, hands, location, screen):
     # Construct the query with the video name included
     query = f"Answer the questions from the video\n"
     additional_info = []
     if sitting:
         additional_info.append("Is the subject in the video standing or sitting?")
     if hands:
         additional_info.append("Is the subject holding any object in their hands, if so the hands are not free else they are free?")
     if location:
         additional_info.append("Is the subject present indoors or outdoors?")
     if screen:
         additional_info.append("Is the subject interacting with a screen in the background by facing the screen?")
     end_query = """Provide the results in <annotation> tags, where 0 indicates False, 1 indicates True, and None indicates that no information is present. Follow the below examples\n:
         <annotation>indoors: 0</annotation>
@@ -36,56 +52,21 @@ def process_video_and_questions(video, sitting, hands, location, screen):
     # Assuming your describe_video function handles the video processing
     response = describe_video(video, final_prompt)
-    final_response = f"<video_name>{video_name}</video_name>" + " " + response
-    return final_response
-# def process_video_and_questions(video, sitting, hands, location, screen):
-#     # Extract the video name (filename)
-#     video_name = os.path.basename(video)
-#     # Construct the query with the video name included
-#     query = f"Describe the video in detail and answer the questions"
-#     additional_info = []
-#     # Handle each checkbox option, including those not selected (None)
-    # if sitting is not None:
-    #     additional_info.append("Is the subject in the video standing or sitting?")
-    # else:
-    #     additional_info.append("<annotation>standing: None</annotation>")
-    # if hands is not None:
-    #     additional_info.append("Is the subject holding any object in their hands, if so the hands are not free else they are free?")
-    # else:
-    #     additional_info.append("<annotation>hands.free: None</annotation>")
-    # if location is not None:
-    #     additional_info.append("Is the subject present indoors or outdoors?")
-    # else:
-    #     additional_info.append("<annotation>indoors: None</annotation>")
-    # if screen is not None:
-    #     additional_info.append("Is the subject interacting with a screen in the background by facing the screen?")
-    # else:
-    #     additional_info.append("<annotation>screen.interaction_yes: None</annotation>")
-    # # Updated end_query with structured prompt
-    # end_query = """
-    # You're an AI assistant, and your goal is to provide the results of the video analysis in the correct format as described below:
-    # <annotations>
-    # - Provide the results in <annotation> tags, where 0 indicates False, 1 indicates True, and None indicates that no information is present.
-    # - Use <annotation> tags for each attribute like indoors, standing, hands.free, and screen.interaction_yes.
-    # </annotations>
-    # """
-    # final_query = query + " " + " ".join(additional_info)
-    # final_prompt = final_query + " " + end_query
-    # # Assuming your describe_video function handles the video processing
-    # response = describe_video(video, final_prompt)
-    # final_response = f"<video_name>{video_name}</video_name>" + " " + response
-    # return final_response
 def output_to_csv(final_response):

     # Construct the query with the video name included
     query = f"Answer the questions from the video\n"
     additional_info = []
+    # Initialize placeholders for annotations with None by default
+    annotations = {
+        "indoors": None,
+        "standing": None,
+        "hands.free": None,
+        "screen.interaction_yes": None
+    }
     if sitting:
         additional_info.append("Is the subject in the video standing or sitting?")
+        annotations["standing"] = 0  # Default value if selected
     if hands:
         additional_info.append("Is the subject holding any object in their hands, if so the hands are not free else they are free?")
+        annotations["hands.free"] = 0  # Default value if selected
     if location:
         additional_info.append("Is the subject present indoors or outdoors?")
+        annotations["indoors"] = 0  # Default value if selected
     if screen:
         additional_info.append("Is the subject interacting with a screen in the background by facing the screen?")
+        annotations["screen.interaction_yes"] = 0  # Default value if selected
     end_query = """Provide the results in <annotation> tags, where 0 indicates False, 1 indicates True, and None indicates that no information is present. Follow the below examples\n:
         <annotation>indoors: 0</annotation>
     # Assuming your describe_video function handles the video processing
     response = describe_video(video, final_prompt)
+    # Parse the response and update the corresponding annotations
+    for line in response.split('\n'):
+        if '<annotation>' in line:
+            key_value = line.replace('<annotation>', '').replace('</annotation>', '').strip().split(': ')
+            if len(key_value) == 2:
+                key, value = key_value
+                annotations[key] = value
+    # Construct the final response with all annotations
+    final_response = f"<video_name>{video_name}</video_name>\n"
+    for key, value in annotations.items():
+        final_response += f"<annotation>{key}: {value}</annotation>\n"
+    return final_response
 def output_to_csv(final_response):