Prathamesh1420 commited on
Commit
bc4bdda
·
verified ·
1 Parent(s): 3cca7f7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +136 -0
app.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import cv2
3
+ import numpy as np
4
+ import os
5
+ import time
6
+ import threading
7
+ import base64
8
+ from ultralytics import YOLO
9
+ from langchain_core.messages import HumanMessage
10
+ from langchain_google_genai import ChatGoogleGenerativeAI
11
+
12
+ # Set up Google API Key
13
+ os.environ["GOOGLE_API_KEY"] = "YOUR_GOOGLE_API_KEY" # Replace with your API Key
14
+ gemini_model = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
15
+
16
+ # Load YOLO model
17
+ yolo_model = YOLO("best.pt")
18
+ names = yolo_model.names
19
+
20
+ # Constants for ROI detection
21
+ cx1 = 491
22
+ offset = 8
23
+ current_date = time.strftime("%Y-%m-%d")
24
+ crop_folder = f"crop_{current_date}"
25
+ if not os.path.exists(crop_folder):
26
+ os.makedirs(crop_folder)
27
+
28
+ # Track processed IDs to avoid duplicate processing
29
+ processed_track_ids = set()
30
+ lock = threading.Lock() # Ensure thread-safe operations
31
+
32
+ def encode_image_to_base64(image):
33
+ _, img_buffer = cv2.imencode('.jpg', image)
34
+ return base64.b64encode(img_buffer).decode('utf-8')
35
+
36
+ def analyze_image_with_gemini(current_image):
37
+ if current_image is None:
38
+ return "No image available for analysis."
39
+
40
+ current_image_data = encode_image_to_base64(current_image)
41
+ message = HumanMessage(
42
+ content=[
43
+ {"type": "text", "text": "Analyze this image and check if the label is present on the bottle. Return results in a structured format."},
44
+ {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{current_image_data}"}, "description": "Detected product"}
45
+ ]
46
+ )
47
+ try:
48
+ response = gemini_model.invoke([message])
49
+ return response.content
50
+ except Exception as e:
51
+ return f"Error processing image: {e}"
52
+
53
+ def save_crop_image(crop, track_id):
54
+ filename = f"{crop_folder}/{track_id}.jpg"
55
+ cv2.imwrite(filename, crop)
56
+ return filename
57
+
58
+ def process_crop_image(crop, track_id, responses):
59
+ response = analyze_image_with_gemini(crop)
60
+ responses.append((track_id, response))
61
+
62
+ def process_video(video_path):
63
+ cap = cv2.VideoCapture(video_path)
64
+ output_path = "output_video.mp4"
65
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v")
66
+ out = cv2.VideoWriter(output_path, fourcc, 20.0, (1020, 500))
67
+
68
+ responses = []
69
+
70
+ while cap.isOpened():
71
+ ret, frame = cap.read()
72
+ if not ret:
73
+ break
74
+ frame = cv2.resize(frame, (1020, 500))
75
+
76
+ results = yolo_model.track(frame, persist=True)
77
+ if results[0].boxes is not None:
78
+ boxes = results[0].boxes.xyxy.int().cpu().tolist()
79
+ track_ids = results[0].boxes.id.int().cpu().tolist() if results[0].boxes.id is not None else [-1] * len(boxes)
80
+
81
+ for box, track_id in zip(boxes, track_ids):
82
+ with lock: # Prevent race condition
83
+ if track_id not in processed_track_ids:
84
+ x1, y1, x2, y2 = box
85
+ crop = frame[y1:y2, x1:x2]
86
+ save_crop_image(crop, track_id)
87
+ threading.Thread(target=process_crop_image, args=(crop, track_id, responses)).start()
88
+ processed_track_ids.add(track_id)
89
+
90
+ out.write(frame)
91
+
92
+ cap.release()
93
+ out.release()
94
+ return output_path, responses
95
+
96
+ def process_and_return(video_file):
97
+ if not video_file:
98
+ return None, "No video uploaded."
99
+
100
+ video_path = "uploaded_video.mp4"
101
+ with open(video_path, "wb") as f:
102
+ f.write(video_file)
103
+
104
+ output_video_path, analysis_results = process_video(video_path)
105
+
106
+ results_text = "\n".join([f"**Track ID {track_id}:** {response}" for track_id, response in analysis_results])
107
+
108
+ return output_video_path, results_text
109
+
110
+ # Gradio Interface
111
+ with gr.Blocks() as demo:
112
+ gr.Markdown("# Bottle Label Checking using YOLO & Gemini AI")
113
+
114
+ with gr.Row():
115
+ video_input = gr.File(label="Upload a video", type="binary")
116
+ process_button = gr.Button("Process Video")
117
+
118
+ with gr.Row():
119
+ video_output = gr.Video(label="Processed Video")
120
+ download_button = gr.File(label="Download Processed Video")
121
+
122
+ analysis_results = gr.Markdown(label="AI Analysis Results")
123
+
124
+ process_button.click(
125
+ fn=process_and_return,
126
+ inputs=video_input,
127
+ outputs=[video_output, analysis_results]
128
+ )
129
+
130
+ download_button.change(
131
+ fn=lambda x: x if x else None,
132
+ inputs=video_output,
133
+ outputs=download_button
134
+ )
135
+
136
+ demo.launch()