Prathamesh1420 commited on
Commit
ba059c9
·
verified ·
1 Parent(s): 046137b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -44
app.py CHANGED
@@ -1,4 +1,4 @@
1
- import streamlit as st
2
  import cv2
3
  import numpy as np
4
  import os
@@ -10,7 +10,7 @@ from langchain_core.messages import HumanMessage
10
  from langchain_google_genai import ChatGoogleGenerativeAI
11
 
12
  # Set up Google API Key
13
- os.environ["GOOGLE_API_KEY"] = "AIzaSyDOBd0_yNLckwsZJrpb9-CqTHFUx0Ah3R8" # Replace with your API Key
14
  gemini_model = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
15
 
16
  # Load YOLO model
@@ -29,10 +29,6 @@ if not os.path.exists(crop_folder):
29
  processed_track_ids = set()
30
  lock = threading.Lock() # Ensure thread-safe operations
31
 
32
- # Ensure session state is initialized
33
- if "responses" not in st.session_state:
34
- st.session_state["responses"] = []
35
-
36
  def encode_image_to_base64(image):
37
  _, img_buffer = cv2.imencode('.jpg', image)
38
  return base64.b64encode(img_buffer).decode('utf-8')
@@ -59,28 +55,18 @@ def save_crop_image(crop, track_id):
59
  cv2.imwrite(filename, crop)
60
  return filename
61
 
62
- def process_crop_image(crop, track_id):
63
  response = analyze_image_with_gemini(crop)
64
-
65
- # Thread-safe way to update session state
66
- if "responses" not in st.session_state:
67
- st.session_state["responses"] = []
68
- st.session_state["responses"] = st.session_state["responses"] + [(track_id, response)]
69
-
70
- def process_video(uploaded_file):
71
- if not uploaded_file:
72
- return None
73
-
74
- video_bytes = uploaded_file.read()
75
- video_path = "uploaded_video.mp4"
76
- with open(video_path, "wb") as f:
77
- f.write(video_bytes)
78
-
79
  cap = cv2.VideoCapture(video_path)
80
  output_path = "output_video.mp4"
81
  fourcc = cv2.VideoWriter_fourcc(*"mp4v")
82
  out = cv2.VideoWriter(output_path, fourcc, 20.0, (1020, 500))
83
 
 
 
84
  while cap.isOpened():
85
  ret, frame = cap.read()
86
  if not ret:
@@ -98,37 +84,53 @@ def process_video(uploaded_file):
98
  x1, y1, x2, y2 = box
99
  crop = frame[y1:y2, x1:x2]
100
  save_crop_image(crop, track_id)
101
- threading.Thread(target=process_crop_image, args=(crop, track_id)).start()
102
  processed_track_ids.add(track_id)
103
 
104
  out.write(frame)
105
 
106
  cap.release()
107
  out.release()
108
- return output_path
 
 
 
 
109
 
110
- def get_video_download_link(file_path):
111
- """Generates a download link for the processed video."""
112
- with open(file_path, "rb") as file:
113
- video_bytes = file.read()
114
- b64 = base64.b64encode(video_bytes).decode()
115
- return f'<a href="data:video/mp4;base64,{b64}" download="output_video.mp4">Download Processed Video</a>'
 
 
 
116
 
117
- st.title("Bottle Label Checking using YOLO & Gemini AI")
118
- st.sidebar.header("Upload a video")
119
- uploaded_file = st.sidebar.file_uploader("Choose a video file", type=["mp4", "avi", "mov"])
120
 
121
- if uploaded_file:
122
- st.sidebar.write("Processing... Please wait!")
123
- output_video_path = process_video(uploaded_file)
124
- st.sidebar.success("Processing completed!")
125
 
126
- # Show processed video
127
- st.video(output_video_path)
 
128
 
129
- # Provide download link for the video
130
- st.markdown(get_video_download_link(output_video_path), unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
131
 
132
- st.subheader("AI Analysis Results")
133
- for track_id, response in st.session_state["responses"]:
134
- st.write(f"**Track ID {track_id}:** {response}")
 
1
+ import gradio as gr
2
  import cv2
3
  import numpy as np
4
  import os
 
10
  from langchain_google_genai import ChatGoogleGenerativeAI
11
 
12
  # Set up Google API Key
13
+ os.environ["GOOGLE_API_KEY"] = "YOUR_GOOGLE_API_KEY" # Replace with your API Key
14
  gemini_model = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
15
 
16
  # Load YOLO model
 
29
  processed_track_ids = set()
30
  lock = threading.Lock() # Ensure thread-safe operations
31
 
 
 
 
 
32
  def encode_image_to_base64(image):
33
  _, img_buffer = cv2.imencode('.jpg', image)
34
  return base64.b64encode(img_buffer).decode('utf-8')
 
55
  cv2.imwrite(filename, crop)
56
  return filename
57
 
58
+ def process_crop_image(crop, track_id, responses):
59
  response = analyze_image_with_gemini(crop)
60
+ responses.append((track_id, response))
61
+
62
+ def process_video(video_path):
 
 
 
 
 
 
 
 
 
 
 
 
63
  cap = cv2.VideoCapture(video_path)
64
  output_path = "output_video.mp4"
65
  fourcc = cv2.VideoWriter_fourcc(*"mp4v")
66
  out = cv2.VideoWriter(output_path, fourcc, 20.0, (1020, 500))
67
 
68
+ responses = []
69
+
70
  while cap.isOpened():
71
  ret, frame = cap.read()
72
  if not ret:
 
84
  x1, y1, x2, y2 = box
85
  crop = frame[y1:y2, x1:x2]
86
  save_crop_image(crop, track_id)
87
+ threading.Thread(target=process_crop_image, args=(crop, track_id, responses)).start()
88
  processed_track_ids.add(track_id)
89
 
90
  out.write(frame)
91
 
92
  cap.release()
93
  out.release()
94
+ return output_path, responses
95
+
96
+ def process_and_return(video_file):
97
+ if not video_file:
98
+ return None, "No video uploaded."
99
 
100
+ video_path = "uploaded_video.mp4"
101
+ with open(video_path, "wb") as f:
102
+ f.write(video_file)
103
+
104
+ output_video_path, analysis_results = process_video(video_path)
105
+
106
+ results_text = "\n".join([f"**Track ID {track_id}:** {response}" for track_id, response in analysis_results])
107
+
108
+ return output_video_path, results_text
109
 
110
+ # Gradio Interface
111
+ with gr.Blocks() as demo:
112
+ gr.Markdown("# Bottle Label Checking using YOLO & Gemini AI")
113
 
114
+ with gr.Row():
115
+ video_input = gr.File(label="Upload a video", type="binary")
116
+ process_button = gr.Button("Process Video")
 
117
 
118
+ with gr.Row():
119
+ video_output = gr.Video(label="Processed Video")
120
+ download_button = gr.File(label="Download Processed Video")
121
 
122
+ analysis_results = gr.Markdown(label="AI Analysis Results")
123
+
124
+ process_button.click(
125
+ fn=process_and_return,
126
+ inputs=video_input,
127
+ outputs=[video_output, analysis_results]
128
+ )
129
+
130
+ download_button.change(
131
+ fn=lambda x: x if x else None,
132
+ inputs=video_output,
133
+ outputs=download_button
134
+ )
135
 
136
+ demo.launch()