Prathamesh1420 commited on
Commit
915ba3e
·
verified ·
1 Parent(s): 22ddfde

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -33
app.py CHANGED
@@ -10,7 +10,7 @@ from langchain_core.messages import HumanMessage
10
  from langchain_google_genai import ChatGoogleGenerativeAI
11
 
12
  # Set up Google API Key
13
- os.environ["GOOGLE_API_KEY"] = "AIzaSyDOBd0_yNLckwsZJrpb9-CqTHFUx0Ah3R8" # Replace with your actual API key
14
  gemini_model = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
15
 
16
  # Load YOLO model
@@ -24,16 +24,23 @@ current_date = time.strftime("%Y-%m-%d")
24
  crop_folder = f"crop_{current_date}"
25
  if not os.path.exists(crop_folder):
26
  os.makedirs(crop_folder)
 
 
27
  processed_track_ids = set()
 
 
 
 
 
28
 
29
  def encode_image_to_base64(image):
30
  _, img_buffer = cv2.imencode('.jpg', image)
31
  return base64.b64encode(img_buffer).decode('utf-8')
32
 
33
  def analyze_image_with_gemini(current_image):
34
- """Send image to Gemini API for analysis."""
35
  if current_image is None:
36
  return "No image available for analysis."
 
37
  current_image_data = encode_image_to_base64(current_image)
38
  message = HumanMessage(
39
  content=[
@@ -48,18 +55,19 @@ def analyze_image_with_gemini(current_image):
48
  return f"Error processing image: {e}"
49
 
50
  def save_crop_image(crop, track_id):
51
- """Save cropped image of detected bottle."""
52
  filename = f"{crop_folder}/{track_id}.jpg"
53
  cv2.imwrite(filename, crop)
54
  return filename
55
 
56
  def process_crop_image(crop, track_id):
57
- """Process image asynchronously using Gemini AI."""
58
  response = analyze_image_with_gemini(crop)
59
- st.session_state["responses"].append((track_id, response))
 
 
 
 
60
 
61
  def process_video(uploaded_file):
62
- """Process uploaded video, detect objects, and create an output video."""
63
  if not uploaded_file:
64
  return None
65
 
@@ -69,57 +77,46 @@ def process_video(uploaded_file):
69
  f.write(video_bytes)
70
 
71
  cap = cv2.VideoCapture(video_path)
72
- if not cap.isOpened():
73
- st.error("Error: Could not open video file.")
74
- return None
75
-
76
- fps = int(cap.get(cv2.CAP_PROP_FPS))
77
- width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
78
- height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
79
  output_path = "output_video.mp4"
80
  fourcc = cv2.VideoWriter_fourcc(*"mp4v")
81
- out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
82
-
83
  while cap.isOpened():
84
  ret, frame = cap.read()
85
  if not ret:
86
  break
 
87
 
88
  results = yolo_model.track(frame, persist=True)
89
  if results[0].boxes is not None:
90
  boxes = results[0].boxes.xyxy.int().cpu().tolist()
91
  track_ids = results[0].boxes.id.int().cpu().tolist() if results[0].boxes.id is not None else [-1] * len(boxes)
 
92
  for box, track_id in zip(boxes, track_ids):
93
- if track_id not in processed_track_ids:
94
- x1, y1, x2, y2 = box
95
- crop = frame[y1:y2, x1:x2]
96
- save_crop_image(crop, track_id)
97
- threading.Thread(target=process_crop_image, args=(crop, track_id)).start()
98
- processed_track_ids.add(track_id)
 
99
 
100
  out.write(frame)
101
-
102
  cap.release()
103
  out.release()
104
-
105
  return output_path
106
 
107
- # Streamlit UI
108
  st.title("Bottle Label Checking using YOLO & Gemini AI")
109
- st.sidebar.header("Upload a Video")
110
  uploaded_file = st.sidebar.file_uploader("Choose a video file", type=["mp4", "avi", "mov"])
111
 
112
- if "responses" not in st.session_state:
113
- st.session_state["responses"] = []
114
-
115
  if uploaded_file:
116
- st.sidebar.write("Processing video, please wait...")
117
  output_video_path = process_video(uploaded_file)
 
 
118
 
119
- if output_video_path:
120
- st.sidebar.success("Processing completed!")
121
- st.video(output_video_path)
122
-
123
  st.subheader("AI Analysis Results")
124
  for track_id, response in st.session_state["responses"]:
125
  st.write(f"**Track ID {track_id}:** {response}")
 
10
  from langchain_google_genai import ChatGoogleGenerativeAI
11
 
12
  # Set up Google API Key
13
+ os.environ["GOOGLE_API_KEY"] = "AIzaSyDOBd0_yNLckwsZJrpb9-CqTHFUx0Ah3R8" # Replace with your API Key
14
  gemini_model = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
15
 
16
  # Load YOLO model
 
24
  crop_folder = f"crop_{current_date}"
25
  if not os.path.exists(crop_folder):
26
  os.makedirs(crop_folder)
27
+
28
+ # Track processed IDs to avoid duplicate processing
29
  processed_track_ids = set()
30
+ lock = threading.Lock() # Ensure thread-safe operations
31
+
32
+ # Ensure session state is initialized
33
+ if "responses" not in st.session_state:
34
+ st.session_state["responses"] = []
35
 
36
  def encode_image_to_base64(image):
37
  _, img_buffer = cv2.imencode('.jpg', image)
38
  return base64.b64encode(img_buffer).decode('utf-8')
39
 
40
  def analyze_image_with_gemini(current_image):
 
41
  if current_image is None:
42
  return "No image available for analysis."
43
+
44
  current_image_data = encode_image_to_base64(current_image)
45
  message = HumanMessage(
46
  content=[
 
55
  return f"Error processing image: {e}"
56
 
57
  def save_crop_image(crop, track_id):
 
58
  filename = f"{crop_folder}/{track_id}.jpg"
59
  cv2.imwrite(filename, crop)
60
  return filename
61
 
62
  def process_crop_image(crop, track_id):
 
63
  response = analyze_image_with_gemini(crop)
64
+
65
+ # Thread-safe way to update session state
66
+ if "responses" not in st.session_state:
67
+ st.session_state["responses"] = []
68
+ st.session_state["responses"] = st.session_state["responses"] + [(track_id, response)]
69
 
70
  def process_video(uploaded_file):
 
71
  if not uploaded_file:
72
  return None
73
 
 
77
  f.write(video_bytes)
78
 
79
  cap = cv2.VideoCapture(video_path)
 
 
 
 
 
 
 
80
  output_path = "output_video.mp4"
81
  fourcc = cv2.VideoWriter_fourcc(*"mp4v")
82
+ out = cv2.VideoWriter(output_path, fourcc, 20.0, (1020, 500))
83
+
84
  while cap.isOpened():
85
  ret, frame = cap.read()
86
  if not ret:
87
  break
88
+ frame = cv2.resize(frame, (1020, 500))
89
 
90
  results = yolo_model.track(frame, persist=True)
91
  if results[0].boxes is not None:
92
  boxes = results[0].boxes.xyxy.int().cpu().tolist()
93
  track_ids = results[0].boxes.id.int().cpu().tolist() if results[0].boxes.id is not None else [-1] * len(boxes)
94
+
95
  for box, track_id in zip(boxes, track_ids):
96
+ with lock: # Prevent race condition
97
+ if track_id not in processed_track_ids:
98
+ x1, y1, x2, y2 = box
99
+ crop = frame[y1:y2, x1:x2]
100
+ save_crop_image(crop, track_id)
101
+ threading.Thread(target=process_crop_image, args=(crop, track_id)).start()
102
+ processed_track_ids.add(track_id)
103
 
104
  out.write(frame)
105
+
106
  cap.release()
107
  out.release()
 
108
  return output_path
109
 
 
110
  st.title("Bottle Label Checking using YOLO & Gemini AI")
111
+ st.sidebar.header("Upload a video")
112
  uploaded_file = st.sidebar.file_uploader("Choose a video file", type=["mp4", "avi", "mov"])
113
 
 
 
 
114
  if uploaded_file:
115
+ st.sidebar.write("Processing...")
116
  output_video_path = process_video(uploaded_file)
117
+ st.sidebar.success("Processing completed!")
118
+ st.video(output_video_path)
119
 
 
 
 
 
120
  st.subheader("AI Analysis Results")
121
  for track_id, response in st.session_state["responses"]:
122
  st.write(f"**Track ID {track_id}:** {response}")