Prathamesh1420 commited on
Commit
3819cf9
Β·
verified Β·
1 Parent(s): 3265a39

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +486 -2
app.py CHANGED
@@ -1,4 +1,4 @@
1
- import cv2
2
  import numpy as np
3
  from ultralytics import YOLO
4
  import cvzone
@@ -9,7 +9,7 @@ from langchain_core.messages import HumanMessage
9
  from langchain_google_genai import ChatGoogleGenerativeAI
10
 
11
  # βœ… Set up Google API Key (Avoid hardcoding in production)
12
- os.environ["GOOGLE_API_KEY"] = "YOUR_GOOGLE_API_KEY"
13
 
14
  # βœ… Initialize the Gemini model
15
  gemini_model = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
@@ -108,5 +108,489 @@ iface = gr.Interface(
108
  description="Upload a video to detect objects and analyze them using Gemini AI.",
109
  )
110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  if __name__ == "__main__":
112
  iface.launch(share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''import cv2
2
  import numpy as np
3
  from ultralytics import YOLO
4
  import cvzone
 
9
  from langchain_google_genai import ChatGoogleGenerativeAI
10
 
11
  # βœ… Set up Google API Key (Avoid hardcoding in production)
12
+ os.environ["GOOGLE_API_KEY"] = "AIzaSyCC-QiN5S42PQDxH6HUg-d-jye-jgc2_oM"
13
 
14
  # βœ… Initialize the Gemini model
15
  gemini_model = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
 
108
  description="Upload a video to detect objects and analyze them using Gemini AI.",
109
  )
110
 
111
+ if __name__ == "__main__":
112
+ iface.launch(share=True)'''
113
+
114
+ '''import cv2
115
+ import numpy as np
116
+ from ultralytics import YOLO
117
+ import cvzone
118
+ import base64
119
+ import os
120
+ import gradio as gr
121
+ from langchain_core.messages import HumanMessage
122
+ from langchain_google_genai import ChatGoogleGenerativeAI
123
+
124
+ # βœ… Set up Google API Key (Avoid hardcoding in production)
125
+ os.environ["GOOGLE_API_KEY"] = "AIzaSyCC-QiN5S42PQDxH6HUg-d-jye-jgc2_oM" # Replace with your actual API Key
126
+
127
+ # βœ… Initialize the Gemini model
128
+ gemini_model = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
129
+
130
+ # βœ… Load the YOLO model
131
+ yolo_model = YOLO("best.pt")
132
+ names = yolo_model.names # Class names from the YOLO model
133
+
134
+ def encode_image_to_base64(image):
135
+ """Encodes an image to a base64 string."""
136
+ _, img_buffer = cv2.imencode('.jpg', image)
137
+ return base64.b64encode(img_buffer).decode('utf-8')
138
+
139
+ def analyze_image_with_gemini(image):
140
+ """Sends an image to Gemini AI for analysis."""
141
+ if image is None:
142
+ return "No image available for analysis."
143
+
144
+ image_data = encode_image_to_base64(image)
145
+ message = HumanMessage(content=[
146
+ {"type": "text", "text": """
147
+ Analyze this image and determine if the label is present on the bottle.
148
+ Return the result strictly in a structured table format:
149
+
150
+ | Label Present | Damage |
151
+ |--------------|--------|
152
+ | Yes/No | Yes/No |
153
+ """},
154
+ {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}, "description": "Detected product"}
155
+ ])
156
+ try:
157
+ response = gemini_model.invoke([message])
158
+ return response.content
159
+ except Exception as e:
160
+ return f"Error processing image: {e}"
161
+
162
+ def process_video(video_path):
163
+ """Processes the uploaded video frame by frame using YOLO and Gemini AI."""
164
+ cap = cv2.VideoCapture(video_path)
165
+ if not cap.isOpened():
166
+ return "Error: Could not open video file."
167
+
168
+ frame_list = []
169
+ while True:
170
+ ret, frame = cap.read()
171
+ if not ret:
172
+ break
173
+
174
+ frame = cv2.resize(frame, (1020, 500)) # Resize for processing
175
+ results = yolo_model.track(frame, persist=True)
176
+
177
+ if results[0].boxes is not None:
178
+ boxes = results[0].boxes.xyxy.int().cpu().tolist()
179
+ class_ids = results[0].boxes.cls.int().cpu().tolist()
180
+ track_ids = results[0].boxes.id.int().cpu().tolist() if results[0].boxes.id is not None else [-1] * len(boxes)
181
+
182
+ for box, track_id, class_id in zip(boxes, track_ids, class_ids):
183
+ x1, y1, x2, y2 = box
184
+ cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
185
+ cvzone.putTextRect(frame, f'ID: {track_id}', (x2, y2), 1, 1)
186
+ cvzone.putTextRect(frame, f'{names[class_id]}', (x1, y1), 1, 1)
187
+
188
+ # Extract and analyze detected object
189
+ crop = frame[y1:y2, x1:x2]
190
+ response = analyze_image_with_gemini(crop)
191
+ print(response) # Log Gemini AI response
192
+
193
+ frame_list.append(frame)
194
+
195
+ cap.release() # Free resources
196
+ return frame_list[0] if frame_list else "Error: No frames processed."
197
+
198
+ def gradio_interface(video_path):
199
+ """Handles Gradio video input and processes it."""
200
+ if video_path is None:
201
+ return "Error: No video uploaded."
202
+ return process_video(video_path)
203
+
204
+ # βœ… Gradio UI setup
205
+ iface = gr.Interface(
206
+ fn=gradio_interface,
207
+ inputs=gr.File(type="filepath", label="Upload Video"), # Accepts video files
208
+ outputs=gr.Image(label="Processed Frame"), # Shows a single processed frame
209
+ title="YOLO + Gemini AI Video Analysis",
210
+ description="Upload a video to detect objects and analyze them using Gemini AI.",
211
+ )
212
+
213
+ if __name__ == "__main__":
214
+ iface.launch(share=True) # Enables a public link for testing
215
+ '''
216
+
217
+ '''
218
+ import cv2
219
+ import numpy as np
220
+ from ultralytics import YOLO
221
+ import cvzone
222
+ import base64
223
+ import os
224
+ import gradio as gr
225
+ from langchain_core.messages import HumanMessage
226
+ from langchain_google_genai import ChatGoogleGenerativeAI
227
+
228
+ # βœ… Set up Google API Key (Avoid hardcoding in production)
229
+ os.environ["GOOGLE_API_KEY"] = "AIzaSyCC-QiN5S42PQDxH6HUg-d-jye-jgc2_oM" # Replace with your actual API Key
230
+
231
+ # βœ… Initialize the Gemini model
232
+ gemini_model = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
233
+
234
+ # βœ… Load the YOLO model
235
+ yolo_model = YOLO("best.pt") # Ensure "best.pt" is in the working directory
236
+ names = yolo_model.names # Class names from the YOLO model
237
+
238
+ def encode_image_to_base64(image):
239
+ """Encodes an image to a base64 string."""
240
+ _, img_buffer = cv2.imencode('.jpg', image)
241
+ return base64.b64encode(img_buffer).decode('utf-8')
242
+
243
+ def analyze_image_with_gemini(image):
244
+ """Sends an image to Gemini AI for analysis."""
245
+ if image is None or image.shape[0] == 0 or image.shape[1] == 0:
246
+ return "Error: Invalid image."
247
+
248
+ image_data = encode_image_to_base64(image)
249
+ message = HumanMessage(content=[
250
+ {"type": "text", "text": """
251
+ Analyze this image and determine if the label is present on the bottle.
252
+ Return the result strictly in a structured table format:
253
+
254
+ | Label Present | Damage |
255
+ |--------------|--------|
256
+ | Yes/No | Yes/No |
257
+ """},
258
+ {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}, "description": "Detected product"}
259
+ ])
260
+
261
+ try:
262
+ response = gemini_model.invoke([message])
263
+ return response.content
264
+ except Exception as e:
265
+ return f"Error processing image: {e}"
266
+
267
+ def process_video(video_path):
268
+ """Processes the uploaded video frame by frame using YOLO and Gemini AI."""
269
+ cap = cv2.VideoCapture(video_path)
270
+ if not cap.isOpened():
271
+ return "Error: Could not open video file."
272
+
273
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
274
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
275
+ fps = int(cap.get(cv2.CAP_PROP_FPS))
276
+
277
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v")
278
+ output_video_path = "output.mp4"
279
+ out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))
280
+
281
+ vertical_center = width // 2
282
+
283
+ frame_count = 0
284
+ while True:
285
+ ret, frame = cap.read()
286
+ if not ret:
287
+ break
288
+
289
+ frame_count += 1
290
+ frame = cv2.resize(frame, (width, height))
291
+ results = yolo_model.track(frame, persist=True)
292
+
293
+ if results and results[0].boxes is not None and results[0].boxes.xyxy is not None:
294
+ boxes = results[0].boxes.xyxy.int().cpu().tolist()
295
+ class_ids = results[0].boxes.cls.int().cpu().tolist()
296
+ track_ids = results[0].boxes.id.int().cpu().tolist() if results[0].boxes.id is not None else [-1] * len(boxes)
297
+
298
+ for box, track_id, class_id in zip(boxes, track_ids, class_ids):
299
+ x1, y1, x2, y2 = box
300
+ center_x = (x1 + x2) // 2
301
+
302
+ # Draw detection box and label
303
+ cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
304
+ cvzone.putTextRect(frame, f'ID: {track_id}', (x2, y2), 1, 1)
305
+ cvzone.putTextRect(frame, f'{names[class_id]}', (x1, y1), 1, 1)
306
+
307
+ # If object is near vertical center, analyze
308
+ if abs(center_x - vertical_center) < 10:
309
+ crop = frame[y1:y2, x1:x2]
310
+ response = analyze_image_with_gemini(crop)
311
+
312
+ # Log response and display on frame
313
+ print(f"Frame {frame_count}, Object {track_id}: {response}")
314
+ cvzone.putTextRect(frame, response, (x1, y1 - 10), 1, 1, colorT=(255, 255, 255), colorR=(0, 0, 255))
315
+
316
+ out.write(frame)
317
+
318
+ cap.release()
319
+ out.release()
320
+
321
+ return output_video_path
322
+
323
+ def gradio_interface(video_path):
324
+ """Handles Gradio video input and processes it."""
325
+ if video_path is None:
326
+ return "Error: No video uploaded."
327
+ return process_video(video_path)
328
+
329
+ # βœ… Gradio UI setup
330
+ iface = gr.Interface(
331
+ fn=gradio_interface,
332
+ inputs=gr.File(type="filepath", label="Upload Video"), # Accepts video files
333
+ outputs=gr.Video(label="Processed Video"), # Outputs processed video
334
+ title="YOLO + Gemini AI Video Analysis",
335
+ description="Upload a video to detect objects and analyze them using Gemini AI.",
336
+ )
337
+
338
  if __name__ == "__main__":
339
  iface.launch(share=True)
340
+ '''
341
+
342
+ '''
343
+ import cv2
344
+ import numpy as np
345
+ from ultralytics import YOLO
346
+ import cvzone
347
+ import base64
348
+ import os
349
+ import gradio as gr
350
+ from langchain_core.messages import HumanMessage
351
+ from langchain_google_genai import ChatGoogleGenerativeAI
352
+
353
+ # βœ… Set up Google API Key securely (Avoid hardcoding in production)
354
+ os.environ["GOOGLE_API_KEY"] = "AIzaSyCC-QiN5S42PQDxH6HUg-d-jye-jgc2_oM" # Replace with your actual API Key
355
+
356
+ # βœ… Initialize the Gemini model
357
+ gemini_model = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
358
+
359
+ # βœ… Load the YOLO model
360
+ yolo_model = YOLO("best.pt") # Ensure "best.pt" is in the working directory
361
+ names = yolo_model.names # Class names from the YOLO model
362
+
363
+ def encode_image_to_base64(image):
364
+ """Encodes an image to a base64 string."""
365
+ _, img_buffer = cv2.imencode('.jpg', image)
366
+ return base64.b64encode(img_buffer).decode('utf-8')
367
+
368
+ def analyze_image_with_gemini(image):
369
+ """Sends an image to Gemini AI for analysis."""
370
+ if image is None or image.shape[0] == 0 or image.shape[1] == 0:
371
+ return "Error: Invalid image."
372
+
373
+ image_data = encode_image_to_base64(image)
374
+ message = HumanMessage(content=[
375
+ {"type": "text", "text": """
376
+ Analyze this image and determine if the label is present on the bottle.
377
+ Return the result strictly in a structured table format:
378
+
379
+ | Label Present | Damage |
380
+ |--------------|--------|
381
+ | Yes/No | Yes/No |
382
+ """},
383
+ {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}, "description": "Detected product"}
384
+ ])
385
+
386
+ try:
387
+ response = gemini_model.invoke([message])
388
+ return response.content
389
+ except Exception as e:
390
+ return f"Error processing image: {e}"
391
+
392
+ def process_video(video_path):
393
+ """Processes the uploaded video frame by frame using YOLO and Gemini AI."""
394
+ cap = cv2.VideoCapture(video_path)
395
+ if not cap.isOpened():
396
+ return "Error: Could not open video file."
397
+
398
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
399
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
400
+ fps = int(cap.get(cv2.CAP_PROP_FPS))
401
+
402
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v")
403
+ output_video_path = "/tmp/output.mp4" # Use /tmp for Hugging Face Spaces
404
+ out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))
405
+
406
+ vertical_center = width // 2
407
+
408
+ frame_count = 0
409
+ while True:
410
+ ret, frame = cap.read()
411
+ if not ret:
412
+ break
413
+
414
+ frame_count += 1
415
+ frame = cv2.resize(frame, (width, height))
416
+ results = yolo_model.track(frame, persist=True)
417
+
418
+ if results and results[0].boxes is not None and results[0].boxes.xyxy is not None:
419
+ boxes = results[0].boxes.xyxy.int().cpu().tolist()
420
+ class_ids = results[0].boxes.cls.int().cpu().tolist()
421
+ track_ids = results[0].boxes.id.int().cpu().tolist() if results[0].boxes.id is not None else [-1] * len(boxes)
422
+
423
+ for box, track_id, class_id in zip(boxes, track_ids, class_ids):
424
+ x1, y1, x2, y2 = box
425
+ center_x = (x1 + x2) // 2
426
+
427
+ # Draw detection box and label
428
+ cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
429
+ cvzone.putTextRect(frame, f'ID: {track_id}', (x2, y2), 1, 1)
430
+ cvzone.putTextRect(frame, f'{names[class_id]}', (x1, y1), 1, 1)
431
+
432
+ # If object is near vertical center, analyze
433
+ if abs(center_x - vertical_center) < 10:
434
+ crop = frame[y1:y2, x1:x2]
435
+ response = analyze_image_with_gemini(crop)
436
+
437
+ # Log response and display on frame
438
+ print(f"Frame {frame_count}, Object {track_id}: {response}")
439
+ cvzone.putTextRect(frame, response, (x1, y1 - 10), 1, 1, colorT=(255, 255, 255), colorR=(0, 0, 255))
440
+
441
+ out.write(frame)
442
+
443
+ cap.release()
444
+ out.release()
445
+
446
+ return output_video_path
447
+
448
+ def gradio_interface(video_file):
449
+ """Handles Gradio video input and processes it."""
450
+ if video_file is None:
451
+ return "Error: No video uploaded."
452
+
453
+ processed_video = process_video(video_file)
454
+ return processed_video # Return the processed video file
455
+
456
+ # βœ… Gradio UI setup
457
+ iface = gr.Interface(
458
+ fn=gradio_interface,
459
+ inputs=gr.File(type="filepath", label="Upload Video"), # Accepts video files
460
+ outputs=gr.Video(label="Processed Video"), # Outputs processed video
461
+ title="YOLO + Gemini AI Video Analysis",
462
+ description="Upload a video to detect objects and analyze them using Gemini AI.",
463
+ )
464
+
465
+ if __name__ == "__main__":
466
+ iface.launch(share=True)
467
+
468
+ #working
469
+ '''
470
+
471
+
472
+
473
+ import cv2
474
+ import numpy as np
475
+ from ultralytics import YOLO
476
+ import cvzone
477
+ import base64
478
+ import os
479
+ import gradio as gr
480
+ from langchain_core.messages import HumanMessage
481
+ from langchain_google_genai import ChatGoogleGenerativeAI
482
+
483
+ # βœ… Set up Google API Key (Avoid hardcoding in production)
484
+ os.environ["GOOGLE_API_KEY"] = "GOOGLE_API_KEY" # Replace with your actual API Key
485
+
486
+ # βœ… Initialize the Gemini model
487
+ gemini_model = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
488
+
489
+ # βœ… Load the YOLO model
490
+ yolo_model = YOLO("best.pt") # Ensure "best.pt" is in the working directory
491
+ names = yolo_model.names # Class names from the YOLO model
492
+
493
+ def encode_image_to_base64(image):
494
+ """Encodes an image to a base64 string."""
495
+ _, img_buffer = cv2.imencode('.jpg', image)
496
+ return base64.b64encode(img_buffer).decode('utf-8')
497
+
498
+ def analyze_image_with_gemini(image):
499
+ """Sends an image to Gemini AI for analysis."""
500
+ if image is None or image.shape[0] == 0 or image.shape[1] == 0:
501
+ return "Error: Invalid image."
502
+
503
+ image_data = encode_image_to_base64(image)
504
+ message = HumanMessage(content=[
505
+ {"type": "text", "text": """
506
+ Analyze this image and determine if the label is present on the bottle.
507
+ Return the result strictly in a structured table format:
508
+
509
+ | Label Present | Damage |
510
+ |--------------|--------|
511
+ | Yes/No | Yes/No |
512
+ """},
513
+ {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}, "description": "Detected product"}
514
+ ])
515
+
516
+ try:
517
+ response = gemini_model.invoke([message])
518
+ return response.content
519
+ except Exception as e:
520
+ return f"Error processing image: {e}"
521
+
522
+ def process_video(video_path):
523
+ """Processes the uploaded video frame by frame using YOLO and Gemini AI."""
524
+ cap = cv2.VideoCapture(video_path)
525
+ if not cap.isOpened():
526
+ return "Error: Could not open video file."
527
+
528
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
529
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
530
+ fps = int(cap.get(cv2.CAP_PROP_FPS))
531
+
532
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v")
533
+ output_video_path = "output.mp4"
534
+ out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))
535
+
536
+ vertical_center = width // 2
537
+ analyzed_objects = {} # Dictionary to store analyzed objects
538
+
539
+ while True:
540
+ ret, frame = cap.read()
541
+ if not ret:
542
+ break
543
+
544
+ results = yolo_model.track(frame, persist=True)
545
+
546
+ if results and results[0].boxes is not None and results[0].boxes.xyxy is not None:
547
+ boxes = results[0].boxes.xyxy.int().cpu().tolist()
548
+ class_ids = results[0].boxes.cls.int().cpu().tolist()
549
+ track_ids = results[0].boxes.id.int().cpu().tolist() if results[0].boxes.id is not None else [-1] * len(boxes)
550
+
551
+ for box, track_id, class_id in zip(boxes, track_ids, class_ids):
552
+ x1, y1, x2, y2 = box
553
+ center_x = (x1 + x2) // 2
554
+
555
+ # Draw detection box and label
556
+ cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
557
+ cvzone.putTextRect(frame, f'ID: {track_id}', (x2, y2), 1, 1)
558
+ cvzone.putTextRect(frame, f'{names[class_id]}', (x1, y1), 1, 1)
559
+
560
+ # If object is near vertical center and hasn't been analyzed yet
561
+ if abs(center_x - vertical_center) < 10 and track_id not in analyzed_objects:
562
+ crop = frame[y1:y2, x1:x2]
563
+ response = analyze_image_with_gemini(crop)
564
+
565
+ # Store analyzed object to prevent duplicate analysis
566
+ analyzed_objects[track_id] = response
567
+
568
+ # Log response and display on frame
569
+ print(f"Object {track_id}: {response}")
570
+ cvzone.putTextRect(frame, response, (x1, y1 - 10), 1, 1, colorT=(255, 255, 255), colorR=(0, 0, 255))
571
+
572
+ out.write(frame)
573
+
574
+ cap.release()
575
+ out.release()
576
+
577
+ return output_video_path
578
+
579
+ def gradio_interface(video_path):
580
+ """Handles Gradio video input and processes it."""
581
+ if video_path is None:
582
+ return "Error: No video uploaded."
583
+ return process_video(video_path)
584
+
585
+ # βœ… Gradio UI setup
586
+ iface = gr.Interface(
587
+ fn=gradio_interface,
588
+ inputs=gr.File(type="filepath", label="Upload Video"), # Accepts video files
589
+ outputs=gr.Video(label="Processed Video"), # Outputs processed video
590
+ title="YOLO + Gemini AI Video Analysis",
591
+ description="Upload a video to detect objects and analyze them using Gemini AI.",
592
+ )
593
+
594
+ if __name__ == "__main__":
595
+ iface.launch(share=True)
596
+