Spaces:

Prathamesh1420
/

Bottole_lable_checking_gemini_yolo11

Running

App Files Files Community

Prathamesh1420 commited on Feb 13

Commit

6975a6c

verified ·

1 Parent(s): 7b6396d

Update app.py

Browse files

Files changed (1) hide show

app.py +90 -48

app.py CHANGED Viewed

@@ -1,63 +1,105 @@
 import streamlit as st
 import cv2
-import tempfile
-import os
 import numpy as np
 from ultralytics import YOLO
-from PIL import Image
-def process_video(video_path, model):
     cap = cv2.VideoCapture(video_path)
-    temp_output = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
-    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-    out = cv2.VideoWriter(temp_output.name, fourcc, cap.get(cv2.CAP_PROP_FPS),
-                          (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))))
     while cap.isOpened():
         ret, frame = cap.read()
         if not ret:
             break
-        results = model(frame)
-        for result in results:
-            for box in result.boxes:
-                x1, y1, x2, y2 = map(int, box.xyxy[0])
-                label = result.names[int(box.cls[0])]
-                conf = float(box.conf[0])
-                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
-                cv2.putText(frame, f'{label}: {conf:.2f}', (x1, y1 - 10),
-                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
         out.write(frame)
     cap.release()
     out.release()
-    return temp_output.name
-def main():
-    st.set_page_config(page_title="Bottle Label Checker", page_icon="🍾")
-    st.title("Bottle Label Checking System using YOLO & Gemini")
-    uploaded_video = st.file_uploader("Upload a video", type=["mp4", "avi", "mov", "mkv"])
-    if uploaded_video is not None:
-        temp_video_path = os.path.join(tempfile.gettempdir(), uploaded_video.name)
-        with open(temp_video_path, "wb") as f:
-            f.write(uploaded_video.read())
-        st.video(temp_video_path)
-        model = YOLO("yolov8n.pt")  # Load YOLO model
-        if st.button("Process Video"):
-            st.write("Processing video... This may take some time.")
-            output_path = process_video(temp_video_path, model)
-            st.video(output_path)
-            st.success("Processing complete!")
-            with open(output_path, "rb") as file:
-                st.download_button("Download Processed Video", file, file_name="processed_video.mp4", mime="video/mp4")
-if __name__ == "__main__":
-    main()

 import streamlit as st
 import cv2
 import numpy as np
+import os
+import time
+import threading
+import base64
 from ultralytics import YOLO
+from langchain_core.messages import HumanMessage
+from langchain_google_genai import ChatGoogleGenerativeAI
+# Set up Google API Key
+os.environ["GOOGLE_API_KEY"] = ""  # Replace with your API Key
+gemini_model = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
+# Load YOLO model
+yolo_model = YOLO("best.pt")
+names = yolo_model.names
+# Constants for ROI detection
+cx1 = 491
+offset = 8
+current_date = time.strftime("%Y-%m-%d")
+crop_folder = f"crop_{current_date}"
+if not os.path.exists(crop_folder):
+    os.makedirs(crop_folder)
+processed_track_ids = set()
+def encode_image_to_base64(image):
+    _, img_buffer = cv2.imencode('.jpg', image)
+    return base64.b64encode(img_buffer).decode('utf-8')
+def analyze_image_with_gemini(current_image):
+    if current_image is None:
+        return "No image available for analysis."
+    current_image_data = encode_image_to_base64(current_image)
+    message = HumanMessage(
+        content=[
+            {"type": "text", "text": "Analyze this image and check if the label is present on the bottle. Return results in a structured format."},
+            {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{current_image_data}"}, "description": "Detected product"}
+        ]
+    )
+    try:
+        response = gemini_model.invoke([message])
+        return response.content
+    except Exception as e:
+        return f"Error processing image: {e}"
+def save_crop_image(crop, track_id):
+    filename = f"{crop_folder}/{track_id}.jpg"
+    cv2.imwrite(filename, crop)
+    return filename
+def process_crop_image(crop, track_id):
+    response = analyze_image_with_gemini(crop)
+    st.session_state["responses"].append((track_id, response))
+def process_video(uploaded_file):
+    if not uploaded_file:
+        return None
+    video_bytes = uploaded_file.read()
+    video_path = "uploaded_video.mp4"
+    with open(video_path, "wb") as f:
+        f.write(video_bytes)
     cap = cv2.VideoCapture(video_path)
+    output_path = "output_video.mp4"
+    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+    out = cv2.VideoWriter(output_path, fourcc, 20.0, (1020, 500))
     while cap.isOpened():
         ret, frame = cap.read()
         if not ret:
             break
+        frame = cv2.resize(frame, (1020, 500))
+        results = yolo_model.track(frame, persist=True)
+        if results[0].boxes is not None:
+            boxes = results[0].boxes.xyxy.int().cpu().tolist()
+            track_ids = results[0].boxes.id.int().cpu().tolist() if results[0].boxes.id is not None else [-1] * len(boxes)
+            for box, track_id in zip(boxes, track_ids):
+                if track_id not in processed_track_ids:
+                    x1, y1, x2, y2 = box
+                    crop = frame[y1:y2, x1:x2]
+                    save_crop_image(crop, track_id)
+                    threading.Thread(target=process_crop_image, args=(crop, track_id)).start()
+                processed_track_ids.add(track_id)
         out.write(frame)
     cap.release()
     out.release()
+    return output_path
+st.title("Bottle Label Checking using YOLO & Gemini AI")
+st.sidebar.header("Upload a video")
+uploaded_file = st.sidebar.file_uploader("Choose a video file", type=["mp4", "avi", "mov"])
+if "responses" not in st.session_state:
+    st.session_state["responses"] = []
+if uploaded_file:
+    st.sidebar.write("Processing...")
+    output_video_path = process_video(uploaded_file)
+    st.sidebar.success("Processing completed!")
+    st.video(output_video_path)
+    st.subheader("AI Analysis Results")
+    for track_id, response in st.session_state["responses"]:
+        st.write(f"**Track ID {track_id}:** {response}")