CodeCompetitionClaudeVsGPT /
awacke1's picture
Rename to
8433575 verified
import streamlit as st
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import torch
import json
import os
from pathlib import Path
class VideoRetrieval:
def __init__(self, use_dummy_data=True):
self.text_model = SentenceTransformer('all-MiniLM-L6-v2')
if use_dummy_data:
def create_dummy_data(self):
"""Create dummy features and metadata for demonstration"""
# Create dummy features
n_clips = 20
feature_dim = 384 # matching the dimension of all-MiniLM-L6-v2
self.features = {
'visual_features': np.random.randn(n_clips, feature_dim),
'scene_features': np.random.randn(n_clips, feature_dim),
'object_features': np.random.randn(n_clips, feature_dim)
# Create dummy metadata
movie_titles = [
"The Matrix", "Inception", "The Dark Knight", "Pulp Fiction",
"The Shawshank Redemption", "Forrest Gump", "The Godfather",
"Fight Club", "Interstellar", "The Silence of the Lambs"
descriptions = [
"A dramatic confrontation in a dark room where the truth is revealed",
"A high-stakes chase through a crowded city street",
"An emotional reunion between long-lost friends",
"A tense negotiation that determines the fate of many",
"A quiet moment of reflection before a life-changing decision"
# Sample YouTube clips (famous movie scenes)
youtube_clips = [
"", # Matrix - Red Pill Blue Pill
"", # Inception - Hallway Fight
"", # Dark Knight - Interrogation
"", # Pulp Fiction - Restaurant
"", # Shawshank - Hope Speech
data = []
for i in range(n_clips):
'clip_id': f'clip_{i}',
'movie_title': movie_titles[i % len(movie_titles)],
'description': descriptions[i % len(descriptions)],
'timestamp': f'{(i*5):02d}:00 - {(i*5+3):02d}:00',
'duration': '3:00',
'youtube_url': youtube_clips[i % len(youtube_clips)]
self.clips_df = pd.DataFrame(data)
def load_data(self):
"""Load actual pre-computed features and metadata"""
self.features = {
'visual_features': np.load('path_to_visual_features.npy'),
'scene_features': np.load('path_to_scene_features.npy'),
'object_features': np.load('path_to_object_features.npy')
self.clips_df = pd.read_csv('clips_metadata.csv')
except FileNotFoundError as e:
st.error(f"Error loading data: {e}. Falling back to dummy data.")
def encode_query(self, query_text):
"""Encode the text query into embeddings"""
return self.text_model.encode(query_text)
def compute_similarity(self, query_embedding, feature_type='visual_features'):
"""Compute similarity between query and video features"""
similarities = cosine_similarity(
query_embedding.reshape(1, -1),
return similarities[0]
def retrieve_clips(self, query_text, top_k=3):
"""Retrieve top-k most relevant clips based on query"""
# Encode query
query_embedding = self.encode_query(query_text)
# Compute similarities for different feature types
similarities = {}
weights = {
'visual_features': 0.4,
'scene_features': 0.3,
'object_features': 0.3
for feat_type, weight in weights.items():
similarities[feat_type] = self.compute_similarity(query_embedding, feat_type) * weight
# Combine similarities
combined_similarities = sum(similarities.values())
# Get top-k indices
top_indices = np.argsort(combined_similarities)[-top_k:][::-1]
# Return clip information
results = []
for idx in top_indices:
'clip_id': self.clips_df.iloc[idx]['clip_id'],
'movie_title': self.clips_df.iloc[idx]['movie_title'],
'description': self.clips_df.iloc[idx]['description'],
'timestamp': self.clips_df.iloc[idx]['timestamp'],
'youtube_url': self.clips_df.iloc[idx]['youtube_url'],
'similarity_score': float(combined_similarities[idx]) # Convert to float for JSON serialization
return results
def main():
page_title="Movie Scene Retrieval System",
st.title("🎬 Movie Scene Retrieval System")
Search for movie scenes using natural language descriptions.
The system will retrieve the most relevant 2-3 minute clips based on your query.
*Note: This is a demo version using simulated data.*
# Initialize retrieval system
retrieval_system = st.session_state.retrieval_system
except AttributeError:
retrieval_system = VideoRetrieval(use_dummy_data=True)
st.session_state.retrieval_system = retrieval_system
# Search interface
col1, col2 = st.columns([3, 1])
with col1:
query = st.text_input(
"Enter your scene description:",
placeholder="e.g., A dramatic confrontation between two characters in a dark room"
with col2:
num_results = st.slider("Number of results:", min_value=1, max_value=5, value=3)
if st.button("πŸ” Search", type="primary"):
if not query:
st.warning("Please enter a scene description.")
with st.spinner("Searching for relevant clips..."):
results = retrieval_system.retrieve_clips(query, top_k=num_results)
for i, result in enumerate(results, 1):
with st.container():
cols = st.columns([2, 1])
with cols[0]:
st.markdown(f"**Scene Description:**")
st.text(f"⏱️ Timestamp: {result['timestamp']}")
# Add video player
if result['youtube_url']:['youtube_url'])
with cols[1]:
st.markdown("**Relevance Score:**")
score = min(1.0, max(0.0, result['similarity_score']))
st.text(f"{score:.2%} match")
# Add direct YouTube link
st.markdown(f"[πŸ”— Watch on YouTube]({result['youtube_url']})")
st.text("Click to open in a new tab")
# Sidebar with additional information
with st.sidebar:
st.header("ℹ️ About")
This demo system simulates a video retrieval engine that uses:
- πŸŽ₯ Visual scene understanding
- πŸ‘₯ Character interaction analysis
- 🎯 Object detection
- 🎭 Action recognition
In a production system, these features would be pre-computed
from actual movie clips using state-of-the-art AI models.
st.header("βš™οΈ Feature Weights")
st.write("Current weights used for similarity computation:")
st.write("- 🎬 Visual Features: 40%")
st.write("- 🏞️ Scene Features: 30%")
st.write("- πŸ“¦ Object Features: 30%")
if __name__ == "__main__":