awacke1 commited on
Commit
9a9cd51
Β·
verified Β·
1 Parent(s): a86ba98

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -66
app.py CHANGED
@@ -9,22 +9,65 @@ import os
9
  from pathlib import Path
10
 
11
  class VideoRetrieval:
12
- def __init__(self):
13
  self.text_model = SentenceTransformer('all-MiniLM-L6-v2')
14
- self.load_data()
 
 
 
 
 
 
 
 
 
15
 
16
- def load_data(self):
17
- # Load pre-computed features
18
- # In practice, these would be loaded from your actual feature files
19
  self.features = {
20
- 'visual_features': np.load('path_to_visual_features.npy'),
21
- 'scene_features': np.load('path_to_scene_features.npy'),
22
- 'object_features': np.load('path_to_object_features.npy')
23
  }
24
 
25
- # Load clip metadata
26
- self.clips_df = pd.read_csv('clips_metadata.csv')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  def encode_query(self, query_text):
29
  """Encode the text query into embeddings"""
30
  return self.text_model.encode(query_text)
@@ -67,88 +110,90 @@ class VideoRetrieval:
67
  'movie_title': self.clips_df.iloc[idx]['movie_title'],
68
  'description': self.clips_df.iloc[idx]['description'],
69
  'timestamp': self.clips_df.iloc[idx]['timestamp'],
70
- 'similarity_score': combined_similarities[idx]
71
  })
72
 
73
  return results
74
 
75
- # Streamlit UI
76
  def main():
77
- st.title("Movie Scene Retrieval System")
 
 
 
 
 
 
78
  st.write("""
79
  Search for movie scenes using natural language descriptions.
80
  The system will retrieve the most relevant 2-3 minute clips based on your query.
 
 
81
  """)
82
 
83
  # Initialize retrieval system
84
  try:
85
  retrieval_system = st.session_state.retrieval_system
86
  except AttributeError:
87
- retrieval_system = VideoRetrieval()
88
  st.session_state.retrieval_system = retrieval_system
89
 
90
  # Search interface
91
- query = st.text_input("Enter your scene description:",
92
- "A dramatic confrontation between two characters in a dark room")
93
 
94
- num_results = st.slider("Number of results to show:", min_value=1, max_value=5, value=3)
 
 
 
 
95
 
96
- if st.button("Search"):
97
- with st.spinner("Searching for relevant clips..."):
98
- results = retrieval_system.retrieve_clips(query, top_k=num_results)
99
-
100
- for i, result in enumerate(results, 1):
101
- st.subheader(f"Result {i}: {result['movie_title']}")
102
- col1, col2 = st.columns([2, 1])
103
-
104
- with col1:
105
- st.write("**Scene Description:**")
106
- st.write(result['description'])
107
- st.write(f"**Timestamp:** {result['timestamp']}")
108
-
109
- with col2:
110
- st.write("**Similarity Score:**")
111
- st.progress(float(result['similarity_score']))
112
 
113
- # In practice, you would have a way to play the video clip here
114
- st.write("---")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
- # Additional features
117
  with st.sidebar:
118
- st.header("About")
119
  st.write("""
120
- This system uses pre-computed visual features from several expert models to retrieve
121
- relevant movie clips based on natural language descriptions. Features include:
122
 
123
- - Visual scene understanding
124
- - Character interaction analysis
125
- - Object detection
126
- - Action recognition
 
 
 
127
  """)
128
 
129
- st.header("Feature Weights")
130
  st.write("Current weights used for similarity computation:")
131
- st.write("- Visual Features: 40%")
132
- st.write("- Scene Features: 30%")
133
- st.write("- Object Features: 30%")
134
 
135
  if __name__ == "__main__":
136
- main()
137
-
138
- # Requirements.txt
139
- '''
140
- streamlit==1.22.0
141
- pandas==1.5.3
142
- numpy==1.23.5
143
- sentence-transformers==2.2.2
144
- scikit-learn==1.2.2
145
- torch==2.0.0
146
-
147
- streamlit
148
- pandas
149
- numpy
150
- sentence-transformers
151
- scikit-learn
152
- torch
153
-
154
- '''
 
9
  from pathlib import Path
10
 
11
  class VideoRetrieval:
12
+ def __init__(self, use_dummy_data=True):
13
  self.text_model = SentenceTransformer('all-MiniLM-L6-v2')
14
+ if use_dummy_data:
15
+ self.create_dummy_data()
16
+ else:
17
+ self.load_data()
18
+
19
+ def create_dummy_data(self):
20
+ """Create dummy features and metadata for demonstration"""
21
+ # Create dummy features
22
+ n_clips = 20
23
+ feature_dim = 384 # matching the dimension of all-MiniLM-L6-v2
24
 
 
 
 
25
  self.features = {
26
+ 'visual_features': np.random.randn(n_clips, feature_dim),
27
+ 'scene_features': np.random.randn(n_clips, feature_dim),
28
+ 'object_features': np.random.randn(n_clips, feature_dim)
29
  }
30
 
31
+ # Create dummy metadata
32
+ movie_titles = [
33
+ "The Matrix", "Inception", "The Dark Knight", "Pulp Fiction",
34
+ "The Shawshank Redemption", "Forrest Gump", "The Godfather",
35
+ "Fight Club", "Interstellar", "The Silence of the Lambs"
36
+ ]
37
+
38
+ descriptions = [
39
+ "A dramatic confrontation in a dark room where the truth is revealed",
40
+ "A high-stakes chase through a crowded city street",
41
+ "An emotional reunion between long-lost friends",
42
+ "A tense negotiation that determines the fate of many",
43
+ "A quiet moment of reflection before a life-changing decision"
44
+ ]
45
+
46
+ data = []
47
+ for i in range(n_clips):
48
+ data.append({
49
+ 'clip_id': f'clip_{i}',
50
+ 'movie_title': movie_titles[i % len(movie_titles)],
51
+ 'description': descriptions[i % len(descriptions)],
52
+ 'timestamp': f'{(i*5):02d}:00 - {(i*5+3):02d}:00',
53
+ 'duration': '3:00'
54
+ })
55
+
56
+ self.clips_df = pd.DataFrame(data)
57
 
58
+ def load_data(self):
59
+ """Load actual pre-computed features and metadata"""
60
+ try:
61
+ self.features = {
62
+ 'visual_features': np.load('path_to_visual_features.npy'),
63
+ 'scene_features': np.load('path_to_scene_features.npy'),
64
+ 'object_features': np.load('path_to_object_features.npy')
65
+ }
66
+ self.clips_df = pd.read_csv('clips_metadata.csv')
67
+ except FileNotFoundError as e:
68
+ st.error(f"Error loading data: {e}. Falling back to dummy data.")
69
+ self.create_dummy_data()
70
+
71
  def encode_query(self, query_text):
72
  """Encode the text query into embeddings"""
73
  return self.text_model.encode(query_text)
 
110
  'movie_title': self.clips_df.iloc[idx]['movie_title'],
111
  'description': self.clips_df.iloc[idx]['description'],
112
  'timestamp': self.clips_df.iloc[idx]['timestamp'],
113
+ 'similarity_score': float(combined_similarities[idx]) # Convert to float for JSON serialization
114
  })
115
 
116
  return results
117
 
 
118
  def main():
119
+ st.set_page_config(
120
+ page_title="Movie Scene Retrieval System",
121
+ page_icon="🎬",
122
+ layout="wide"
123
+ )
124
+
125
+ st.title("🎬 Movie Scene Retrieval System")
126
  st.write("""
127
  Search for movie scenes using natural language descriptions.
128
  The system will retrieve the most relevant 2-3 minute clips based on your query.
129
+
130
+ *Note: This is a demo version using simulated data.*
131
  """)
132
 
133
  # Initialize retrieval system
134
  try:
135
  retrieval_system = st.session_state.retrieval_system
136
  except AttributeError:
137
+ retrieval_system = VideoRetrieval(use_dummy_data=True)
138
  st.session_state.retrieval_system = retrieval_system
139
 
140
  # Search interface
141
+ col1, col2 = st.columns([3, 1])
 
142
 
143
+ with col1:
144
+ query = st.text_input(
145
+ "Enter your scene description:",
146
+ placeholder="e.g., A dramatic confrontation between two characters in a dark room"
147
+ )
148
 
149
+ with col2:
150
+ num_results = st.slider("Number of results:", min_value=1, max_value=5, value=3)
151
+
152
+ if st.button("πŸ” Search", type="primary"):
153
+ if not query:
154
+ st.warning("Please enter a scene description.")
155
+ else:
156
+ with st.spinner("Searching for relevant clips..."):
157
+ results = retrieval_system.retrieve_clips(query, top_k=num_results)
 
 
 
 
 
 
 
158
 
159
+ for i, result in enumerate(results, 1):
160
+ with st.container():
161
+ st.subheader(f"{result['movie_title']}")
162
+ cols = st.columns([2, 1])
163
+
164
+ with cols[0]:
165
+ st.markdown(f"**Scene Description:**")
166
+ st.write(result['description'])
167
+ st.text(f"⏱️ Timestamp: {result['timestamp']}")
168
+
169
+ with cols[1]:
170
+ st.markdown("**Relevance Score:**")
171
+ score = min(1.0, max(0.0, result['similarity_score']))
172
+ st.progress(score)
173
+ st.text(f"{score:.2%} match")
174
+
175
+ st.divider()
176
 
177
+ # Sidebar with additional information
178
  with st.sidebar:
179
+ st.header("ℹ️ About")
180
  st.write("""
181
+ This demo system simulates a video retrieval engine that uses:
 
182
 
183
+ - πŸŽ₯ Visual scene understanding
184
+ - πŸ‘₯ Character interaction analysis
185
+ - 🎯 Object detection
186
+ - 🎭 Action recognition
187
+
188
+ In a production system, these features would be pre-computed
189
+ from actual movie clips using state-of-the-art AI models.
190
  """)
191
 
192
+ st.header("βš™οΈ Feature Weights")
193
  st.write("Current weights used for similarity computation:")
194
+ st.write("- 🎬 Visual Features: 40%")
195
+ st.write("- 🏞️ Scene Features: 30%")
196
+ st.write("- πŸ“¦ Object Features: 30%")
197
 
198
  if __name__ == "__main__":
199
+ main()