capradeepgujaran's picture
Create app.py
f5fbd23 verified
raw
history blame
4.28 kB
import gradio as gr
from video_rag_tool import VideoRAGTool
import tempfile
import os
from PIL import Image
import cv2
import numpy as np
import torch
class VideoRAGApp:
def __init__(self):
self.rag_tool = VideoRAGTool()
self.current_video_path = None
self.processed = False
def process_video(self, video_file):
"""Process uploaded video and return status message"""
if video_file is None:
return "Please upload a video first."
# Save uploaded video to temporary file
temp_dir = tempfile.mkdtemp()
temp_path = os.path.join(temp_dir, "uploaded_video.mp4")
with open(temp_path, "wb") as f:
f.write(video_file)
self.current_video_path = temp_path
try:
self.rag_tool.process_video(temp_path)
self.processed = True
return "Video processed successfully! You can now ask questions about the video."
except Exception as e:
return f"Error processing video: {str(e)}"
def query_video(self, query_text):
"""Query the video and return relevant frames with descriptions"""
if not self.processed:
return "Please process a video first."
try:
results = self.rag_tool.query_video(query_text, k=4)
# Extract frames for display
frames = []
captions = []
cap = cv2.VideoCapture(self.current_video_path)
for result in results:
frame_number = result['frame_number']
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
ret, frame = cap.read()
if ret:
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frames.append(Image.fromarray(frame_rgb))
caption = f"Timestamp: {result['timestamp']:.2f}s\n"
caption += f"Relevance: {result['relevance_score']:.2f}"
captions.append(caption)
cap.release()
return frames, captions
except Exception as e:
return f"Error querying video: {str(e)}"
def create_interface(self):
"""Create and return Gradio interface"""
with gr.Blocks(title="Video Chat RAG") as interface:
gr.Markdown("# Video Chat RAG")
gr.Markdown("Upload a video and ask questions about its content!")
with gr.Row():
video_input = gr.File(
label="Upload Video",
file_types=["video"],
)
process_button = gr.Button("Process Video")
status_output = gr.Textbox(
label="Status",
interactive=False
)
with gr.Row():
query_input = gr.Textbox(
label="Ask about the video",
placeholder="What's happening in the video?"
)
query_button = gr.Button("Search")
with gr.Row():
gallery = gr.Gallery(
label="Retrieved Frames",
show_label=True,
elem_id="gallery",
columns=[2],
rows=[2],
height="auto"
)
captions = gr.Textbox(
label="Frame Details",
interactive=False
)
# Set up event handlers
process_button.click(
fn=self.process_video,
inputs=[video_input],
outputs=[status_output]
)
query_button.click(
fn=self.query_video,
inputs=[query_input],
outputs=[gallery, captions]
)
return interface
# For Hugging Face Spaces deployment
app = VideoRAGApp()
interface = app.create_interface()
# Launch the app (for local testing)
if __name__ == "__main__":
interface.launch()