Prathamesh1420 commited on
Commit
22ddfde
·
verified ·
1 Parent(s): e0d171b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -9
app.py CHANGED
@@ -10,7 +10,7 @@ from langchain_core.messages import HumanMessage
10
  from langchain_google_genai import ChatGoogleGenerativeAI
11
 
12
  # Set up Google API Key
13
- os.environ["GOOGLE_API_KEY"] = "AIzaSyDOBd0_yNLckwsZJrpb9-CqTHFUx0Ah3R8" # Replace with your API Key
14
  gemini_model = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
15
 
16
  # Load YOLO model
@@ -31,6 +31,7 @@ def encode_image_to_base64(image):
31
  return base64.b64encode(img_buffer).decode('utf-8')
32
 
33
  def analyze_image_with_gemini(current_image):
 
34
  if current_image is None:
35
  return "No image available for analysis."
36
  current_image_data = encode_image_to_base64(current_image)
@@ -47,15 +48,18 @@ def analyze_image_with_gemini(current_image):
47
  return f"Error processing image: {e}"
48
 
49
  def save_crop_image(crop, track_id):
 
50
  filename = f"{crop_folder}/{track_id}.jpg"
51
  cv2.imwrite(filename, crop)
52
  return filename
53
 
54
  def process_crop_image(crop, track_id):
 
55
  response = analyze_image_with_gemini(crop)
56
  st.session_state["responses"].append((track_id, response))
57
 
58
  def process_video(uploaded_file):
 
59
  if not uploaded_file:
60
  return None
61
 
@@ -65,15 +69,22 @@ def process_video(uploaded_file):
65
  f.write(video_bytes)
66
 
67
  cap = cv2.VideoCapture(video_path)
 
 
 
 
 
 
 
68
  output_path = "output_video.mp4"
69
  fourcc = cv2.VideoWriter_fourcc(*"mp4v")
70
- out = cv2.VideoWriter(output_path, fourcc, 20.0, (1020, 500))
71
-
72
  while cap.isOpened():
73
  ret, frame = cap.read()
74
  if not ret:
75
  break
76
- frame = cv2.resize(frame, (1020, 500))
77
  results = yolo_model.track(frame, persist=True)
78
  if results[0].boxes is not None:
79
  boxes = results[0].boxes.xyxy.int().cpu().tolist()
@@ -84,22 +95,31 @@ def process_video(uploaded_file):
84
  crop = frame[y1:y2, x1:x2]
85
  save_crop_image(crop, track_id)
86
  threading.Thread(target=process_crop_image, args=(crop, track_id)).start()
87
- processed_track_ids.add(track_id)
 
88
  out.write(frame)
 
89
  cap.release()
90
  out.release()
 
91
  return output_path
92
 
 
93
  st.title("Bottle Label Checking using YOLO & Gemini AI")
94
- st.sidebar.header("Upload a video")
95
  uploaded_file = st.sidebar.file_uploader("Choose a video file", type=["mp4", "avi", "mov"])
 
96
  if "responses" not in st.session_state:
97
  st.session_state["responses"] = []
 
98
  if uploaded_file:
99
- st.sidebar.write("Processing...")
100
  output_video_path = process_video(uploaded_file)
101
- st.sidebar.success("Processing completed!")
102
- st.video(output_video_path)
 
 
 
103
  st.subheader("AI Analysis Results")
104
  for track_id, response in st.session_state["responses"]:
105
  st.write(f"**Track ID {track_id}:** {response}")
 
10
  from langchain_google_genai import ChatGoogleGenerativeAI
11
 
12
  # Set up Google API Key
13
+ os.environ["GOOGLE_API_KEY"] = "AIzaSyDOBd0_yNLckwsZJrpb9-CqTHFUx0Ah3R8" # Replace with your actual API key
14
  gemini_model = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
15
 
16
  # Load YOLO model
 
31
  return base64.b64encode(img_buffer).decode('utf-8')
32
 
33
  def analyze_image_with_gemini(current_image):
34
+ """Send image to Gemini API for analysis."""
35
  if current_image is None:
36
  return "No image available for analysis."
37
  current_image_data = encode_image_to_base64(current_image)
 
48
  return f"Error processing image: {e}"
49
 
50
  def save_crop_image(crop, track_id):
51
+ """Save cropped image of detected bottle."""
52
  filename = f"{crop_folder}/{track_id}.jpg"
53
  cv2.imwrite(filename, crop)
54
  return filename
55
 
56
  def process_crop_image(crop, track_id):
57
+ """Process image asynchronously using Gemini AI."""
58
  response = analyze_image_with_gemini(crop)
59
  st.session_state["responses"].append((track_id, response))
60
 
61
  def process_video(uploaded_file):
62
+ """Process uploaded video, detect objects, and create an output video."""
63
  if not uploaded_file:
64
  return None
65
 
 
69
  f.write(video_bytes)
70
 
71
  cap = cv2.VideoCapture(video_path)
72
+ if not cap.isOpened():
73
+ st.error("Error: Could not open video file.")
74
+ return None
75
+
76
+ fps = int(cap.get(cv2.CAP_PROP_FPS))
77
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
78
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
79
  output_path = "output_video.mp4"
80
  fourcc = cv2.VideoWriter_fourcc(*"mp4v")
81
+ out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
82
+
83
  while cap.isOpened():
84
  ret, frame = cap.read()
85
  if not ret:
86
  break
87
+
88
  results = yolo_model.track(frame, persist=True)
89
  if results[0].boxes is not None:
90
  boxes = results[0].boxes.xyxy.int().cpu().tolist()
 
95
  crop = frame[y1:y2, x1:x2]
96
  save_crop_image(crop, track_id)
97
  threading.Thread(target=process_crop_image, args=(crop, track_id)).start()
98
+ processed_track_ids.add(track_id)
99
+
100
  out.write(frame)
101
+
102
  cap.release()
103
  out.release()
104
+
105
  return output_path
106
 
107
+ # Streamlit UI
108
  st.title("Bottle Label Checking using YOLO & Gemini AI")
109
+ st.sidebar.header("Upload a Video")
110
  uploaded_file = st.sidebar.file_uploader("Choose a video file", type=["mp4", "avi", "mov"])
111
+
112
  if "responses" not in st.session_state:
113
  st.session_state["responses"] = []
114
+
115
  if uploaded_file:
116
+ st.sidebar.write("Processing video, please wait...")
117
  output_video_path = process_video(uploaded_file)
118
+
119
+ if output_video_path:
120
+ st.sidebar.success("Processing completed!")
121
+ st.video(output_video_path)
122
+
123
  st.subheader("AI Analysis Results")
124
  for track_id, response in st.session_state["responses"]:
125
  st.write(f"**Track ID {track_id}:** {response}")