Spaces:

Prathamesh1420
/

bottle_lable_gradio

Sleeping

App Files Files Community

Prathamesh1420 commited on Feb 13

Commit

3265a39

verified ·

1 Parent(s): 9855222

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -21

app.py CHANGED Viewed

@@ -8,8 +8,8 @@ import gradio as gr
 from langchain_core.messages import HumanMessage
 from langchain_google_genai import ChatGoogleGenerativeAI
-# ✅ Set up Google API Key
-os.environ["GOOGLE_API_KEY"] = "AIzaSyBDVss_CkJLMcWnOxYs3LH0Q7LDi732voE"  # Replace with your actual API Key
 # ✅ Initialize the Gemini model
 gemini_model = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
@@ -18,15 +18,13 @@ gemini_model = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
 yolo_model = YOLO("best.pt")
 names = yolo_model.names  # Class names from the YOLO model
-processed_ids = set()  # Store track IDs of processed bottles
 def encode_image_to_base64(image):
     _, img_buffer = cv2.imencode('.jpg', image)
     return base64.b64encode(img_buffer).decode('utf-8')
 def analyze_image_with_gemini(image):
-    if image is None:
-        return "No image available for analysis."
     image_data = encode_image_to_base64(image)
     message = HumanMessage(content=[
@@ -51,16 +49,22 @@ def process_video(video_path):
     if not cap.isOpened():
         return "Error: Could not open video file."
-    frame_list = []
-    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    center_line = frame_height // 2  # Middle of the frame
     while True:
         ret, frame = cap.read()
         if not ret:
             break
-        frame = cv2.resize(frame, (1020, 500))
         results = yolo_model.track(frame, persist=True)
         if results[0].boxes is not None:
@@ -70,26 +74,25 @@ def process_video(video_path):
             for box, track_id, class_id in zip(boxes, track_ids, class_ids):
                 x1, y1, x2, y2 = box
-                center_y = (y1 + y2) // 2  # Compute center point
                 cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                 cvzone.putTextRect(frame, f'ID: {track_id}', (x2, y2), 1, 1)
                 cvzone.putTextRect(frame, f'{names[class_id]}', (x1, y1), 1, 1)
-                # Draw center line
-                cv2.line(frame, (0, center_line), (frame.shape[1], center_line), (0, 0, 255), 2)
-                # Check if the bottle crosses the center line
-                if center_y >= center_line and track_id not in processed_ids:
-                    processed_ids.add(track_id)
                     crop = frame[y1:y2, x1:x2]
                     response = analyze_image_with_gemini(crop)
-                    print(response)
-        frame_list.append(frame)
     cap.release()
-    return frame_list[0] if frame_list else "Error: No frames processed."
 def gradio_interface(video_path):
     if video_path is None:
@@ -100,7 +103,7 @@ def gradio_interface(video_path):
 iface = gr.Interface(
     fn=gradio_interface,
     inputs=gr.File(type="filepath", label="Upload Video"),
-    outputs=gr.Image(label="Processed Frame"),
     title="YOLO + Gemini AI Video Analysis",
     description="Upload a video to detect objects and analyze them using Gemini AI.",
 )

 from langchain_core.messages import HumanMessage
 from langchain_google_genai import ChatGoogleGenerativeAI
+# ✅ Set up Google API Key (Avoid hardcoding in production)
+os.environ["GOOGLE_API_KEY"] = "YOUR_GOOGLE_API_KEY"
 # ✅ Initialize the Gemini model
 gemini_model = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
 yolo_model = YOLO("best.pt")
 names = yolo_model.names  # Class names from the YOLO model
 def encode_image_to_base64(image):
     _, img_buffer = cv2.imencode('.jpg', image)
     return base64.b64encode(img_buffer).decode('utf-8')
 def analyze_image_with_gemini(image):
+    if image is None or image.shape[0] == 0 or image.shape[1] == 0:
+        return "Error: Invalid image."
     image_data = encode_image_to_base64(image)
     message = HumanMessage(content=[
     if not cap.isOpened():
         return "Error: Could not open video file."
+    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    fps = int(cap.get(cv2.CAP_PROP_FPS))
+    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+    output_video_path = "output.mp4"
+    out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))
+    vertical_center = width // 2
     while True:
         ret, frame = cap.read()
         if not ret:
             break
+        frame = cv2.resize(frame, (width, height))
         results = yolo_model.track(frame, persist=True)
         if results[0].boxes is not None:
             for box, track_id, class_id in zip(boxes, track_ids, class_ids):
                 x1, y1, x2, y2 = box
+                center_x = (x1 + x2) // 2
+                center_y = (y1 + y2) // 2
                 cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                 cvzone.putTextRect(frame, f'ID: {track_id}', (x2, y2), 1, 1)
                 cvzone.putTextRect(frame, f'{names[class_id]}', (x1, y1), 1, 1)
+                if abs(center_x - vertical_center) < 10:  # If the center of the box is near the vertical center
                     crop = frame[y1:y2, x1:x2]
                     response = analyze_image_with_gemini(crop)
+                    cvzone.putTextRect(frame, response, (x1, y1 - 10), 1, 1, colorT=(255, 255, 255), colorR=(0, 0, 255))
+        out.write(frame)
     cap.release()
+    out.release()
+    return output_video_path
 def gradio_interface(video_path):
     if video_path is None:
 iface = gr.Interface(
     fn=gradio_interface,
     inputs=gr.File(type="filepath", label="Upload Video"),
+    outputs=gr.Video(label="Processed Video"),
     title="YOLO + Gemini AI Video Analysis",
     description="Upload a video to detect objects and analyze them using Gemini AI.",
 )