awacke1 commited on
Commit
4976812
Β·
verified Β·
1 Parent(s): ceb3a99

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +327 -257
app.py CHANGED
@@ -13,21 +13,10 @@ import edge_tts
13
  import asyncio
14
  import base64
15
  import requests
16
- import plotly.graph_objects as go
17
- from gradio_client import Client
18
  from collections import defaultdict
19
- from bs4 import BeautifulSoup
20
  from audio_recorder_streamlit import audio_recorder
21
  import streamlit.components.v1 as components
22
 
23
- # Page configuration
24
- st.set_page_config(
25
- page_title="Video Search & Research Assistant",
26
- page_icon="πŸŽ₯",
27
- layout="wide",
28
- initial_sidebar_state="auto",
29
- )
30
-
31
  # Initialize session state
32
  if 'search_history' not in st.session_state:
33
  st.session_state['search_history'] = []
@@ -37,18 +26,10 @@ if 'transcript_history' not in st.session_state:
37
  st.session_state['transcript_history'] = []
38
  if 'should_rerun' not in st.session_state:
39
  st.session_state['should_rerun'] = False
40
-
41
- # Custom styling
42
- st.markdown("""
43
- <style>
44
- .main { background: linear-gradient(to right, #1a1a1a, #2d2d2d); color: #fff; }
45
- .stMarkdown { font-family: 'Helvetica Neue', sans-serif; }
46
- .stButton>button { margin-right: 0.5rem; }
47
- </style>
48
- """, unsafe_allow_html=True)
49
-
50
- # Initialize components
51
- speech_component = components.declare_component("speech_recognition", path="mycomponent")
52
 
53
  class VideoSearch:
54
  def __init__(self):
@@ -56,229 +37,245 @@ class VideoSearch:
56
  self.load_dataset()
57
 
58
  def fetch_dataset_rows(self):
59
- """Fetch dataset from Hugging Face API with debug and caching"""
60
  try:
61
- st.info("Fetching from Hugging Face API...")
62
  url = "https://datasets-server.huggingface.co/first-rows?dataset=omegalabsinc%2Fomega-multimodal&config=default&split=train"
63
-
64
  response = requests.get(url, timeout=30)
65
- st.write(f"Response status: {response.status_code}")
66
 
67
  if response.status_code == 200:
68
  data = response.json()
69
-
70
  if 'rows' in data:
71
- # Extract actual row data from the nested structure
72
  processed_rows = []
73
  for row_data in data['rows']:
74
- if 'row' in row_data: # Access the nested 'row' data
75
- processed_rows.append(row_data['row'])
 
 
 
 
 
 
 
76
 
77
  df = pd.DataFrame(processed_rows)
78
-
79
- # Debug output
80
- st.write("DataFrame columns after processing:", list(df.columns))
81
- st.write("Number of rows:", len(df))
82
-
83
  return df
84
- else:
85
- st.error("No 'rows' found in API response")
86
- st.write("Raw API Response:", data)
87
- return self.load_example_data()
88
- else:
89
- st.error(f"API request failed with status code: {response.status_code}")
90
- return self.load_example_data()
91
 
92
  except Exception as e:
93
- st.error(f"Error fetching dataset: {str(e)}")
94
  return self.load_example_data()
95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  def load_example_data(self):
97
  """Load example data as fallback"""
98
  example_data = [
99
  {
100
  "video_id": "cd21da96-fcca-4c94-a60f-0b1e4e1e29fc",
101
  "youtube_id": "IO-vwtyicn4",
102
- "description": "This video shows a close-up of an ancient text carved into a surface, with the text appearing to be in a cursive script.",
103
  "views": 45489,
104
  "start_time": 1452,
105
  "end_time": 1458,
106
  "video_embed": [0.014160037972033024, -0.003111184574663639, -0.016604168340563774],
107
  "description_embed": [-0.05835828185081482, 0.02589797042310238, 0.11952091753482819]
108
- },
109
- {
110
- "video_id": "a8ebde7d-d717-4c1e-8be4-bdb4bc0c544f",
111
- "youtube_id": "mo4rEyF7gTE",
112
- "description": "This video shows a close-up view of a classical architectural structure, featuring stone statues with ornate details.",
113
- "views": 4468,
114
- "start_time": 318,
115
- "end_time": 324,
116
- "video_embed": [0.015160037972033024, -0.004111184574663639, -0.017604168340563774],
117
- "description_embed": [-0.06835828185081482, 0.03589797042310238, 0.12952091753482819]
118
- },
119
- {
120
- "video_id": "d1be64a6-22e2-4fbd-a176-20749e7c3d8a",
121
- "youtube_id": "IO-vwtyicn4",
122
- "description": "This video shows a weathered ancient painting depicting figures in classical style with vibrant colors preserved.",
123
- "views": 45489,
124
- "start_time": 1698,
125
- "end_time": 1704,
126
- "video_embed": [0.016160037972033024, -0.005111184574663639, -0.018604168340563774],
127
- "description_embed": [-0.07835828185081482, 0.04589797042310238, 0.13952091753482819]
128
  }
129
  ]
130
  return pd.DataFrame(example_data)
131
 
132
- def prepare_features(self):
133
- """Prepare and cache embeddings"""
134
- try:
135
- if 'video_embed' not in self.dataset.columns:
136
- st.warning("Using example data embeddings")
137
- self.dataset = self.load_example_data()
138
-
139
- # Debug: Show raw data types and first row
140
- st.write("Data Types:", self.dataset.dtypes)
141
- st.write("\nFirst row of embeddings:")
142
- st.write("video_embed type:", type(self.dataset['video_embed'].iloc[0]))
143
- st.write("video_embed content:", self.dataset['video_embed'].iloc[0])
144
- st.write("\ndescription_embed type:", type(self.dataset['description_embed'].iloc[0]))
145
- st.write("description_embed content:", self.dataset['description_embed'].iloc[0])
146
-
147
- # Convert string representations of embeddings back to numpy arrays
148
- def safe_eval_list(s):
149
- try:
150
- # Clean the string representation
151
- if isinstance(s, str):
152
- s = s.replace('[', '').replace(']', '').strip()
153
- # Split by whitespace and/or commas
154
- numbers = [float(x.strip()) for x in s.split() if x.strip()]
155
- return numbers
156
- elif isinstance(s, list):
157
- return [float(x) for x in s]
158
- else:
159
- st.error(f"Unexpected type for embedding: {type(s)}")
160
- return None
161
- except Exception as e:
162
- st.error(f"Error parsing embedding: {str(e)}")
163
- st.write("Problematic string:", s)
164
- return None
165
-
166
- # Process embeddings with detailed error reporting
167
- video_embeds = []
168
- text_embeds = []
169
-
170
- for idx in range(len(self.dataset)):
171
- try:
172
- video_embed = safe_eval_list(self.dataset['video_embed'].iloc[idx])
173
- desc_embed = safe_eval_list(self.dataset['description_embed'].iloc[idx])
174
-
175
- if video_embed is not None and desc_embed is not None:
176
- video_embeds.append(video_embed)
177
- text_embeds.append(desc_embed)
178
- else:
179
- st.warning(f"Skipping row {idx} due to parsing failure")
180
- except Exception as e:
181
- st.error(f"Error processing row {idx}: {str(e)}")
182
- st.write("Row data:", self.dataset.iloc[idx])
183
-
184
- if video_embeds and text_embeds:
185
- try:
186
- self.video_embeds = np.array(video_embeds)
187
- self.text_embeds = np.array(text_embeds)
188
- st.success(f"Successfully processed {len(video_embeds)} embeddings")
189
- st.write("Video embeddings shape:", self.video_embeds.shape)
190
- st.write("Text embeddings shape:", self.text_embeds.shape)
191
- except Exception as e:
192
- st.error(f"Error converting to numpy arrays: {str(e)}")
193
- else:
194
- st.warning("No valid embeddings found, using random embeddings")
195
- num_rows = len(self.dataset)
196
- self.video_embeds = np.random.randn(num_rows, 384)
197
- self.text_embeds = np.random.randn(num_rows, 384)
198
-
199
- except Exception as e:
200
- st.error(f"Error preparing features: {str(e)}")
201
- import traceback
202
- st.write("Traceback:", traceback.format_exc())
203
- # Create random embeddings as fallback
204
- num_rows = len(self.dataset)
205
- self.video_embeds = np.random.randn(num_rows, 384)
206
- self.text_embeds = np.random.randn(num_rows, 384)
207
-
208
  def load_dataset(self):
209
- try:
210
- self.dataset = self.fetch_dataset_rows()
211
- if self.dataset is not None:
212
- self.prepare_features()
213
- else:
214
- self.create_dummy_data()
215
- except Exception as e:
216
- st.error(f"Error loading dataset: {e}")
217
- self.create_dummy_data()
218
-
219
- def prepare_features(self):
220
- try:
221
- self.video_embeds = np.array([json.loads(e) if isinstance(e, str) else e
222
- for e in self.dataset.video_embed])
223
- self.text_embeds = np.array([json.loads(e) if isinstance(e, str) else e
224
- for e in self.dataset.description_embed])
225
- except Exception as e:
226
- st.error(f"Error preparing features: {e}")
227
- num_rows = len(self.dataset)
228
- self.video_embeds = np.random.randn(num_rows, 384)
229
- self.text_embeds = np.random.randn(num_rows, 384)
230
-
231
- def search(self, query, top_k=5):
232
  query_embedding = self.text_model.encode([query])[0]
233
-
234
  video_sims = cosine_similarity([query_embedding], self.video_embeds)[0]
235
  text_sims = cosine_similarity([query_embedding], self.text_embeds)[0]
236
-
237
  combined_sims = 0.5 * video_sims + 0.5 * text_sims
 
 
 
 
 
 
 
 
238
  top_indices = np.argsort(combined_sims)[-top_k:][::-1]
239
 
240
  results = []
241
  for idx in top_indices:
242
- results.append({
243
- 'video_id': self.dataset.iloc[idx]['video_id'],
244
- 'youtube_id': self.dataset.iloc[idx]['youtube_id'],
245
- 'description': self.dataset.iloc[idx]['description'],
246
- 'start_time': self.dataset.iloc[idx]['start_time'],
247
- 'end_time': self.dataset.iloc[idx]['end_time'],
248
- 'relevance_score': float(combined_sims[idx]),
249
- 'views': self.dataset.iloc[idx]['views']
250
- })
251
  return results
252
 
253
- def perform_arxiv_search(query, vocal_summary=True, extended_refs=False):
254
- """Perform Arxiv search with audio summaries"""
255
- try:
256
- client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
257
- refs = client.predict(query, 20, "Semantic Search",
258
- "mistralai/Mixtral-8x7B-Instruct-v0.1",
259
- api_name="/update_with_rag_md")[0]
260
- response = client.predict(query, "mistralai/Mixtral-8x7B-Instruct-v0.1",
261
- True, api_name="/ask_llm")
 
 
262
 
263
- result = f"### πŸ”Ž {query}\n\n{response}\n\n{refs}"
264
- st.markdown(result)
 
 
 
 
 
265
 
266
- if vocal_summary:
267
- audio_file = asyncio.run(generate_speech(response[:500]))
268
- if audio_file:
269
- st.audio(audio_file)
270
- os.remove(audio_file)
271
 
272
- return result
273
- except Exception as e:
274
- st.error(f"Error in Arxiv search: {e}")
275
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
 
277
  async def generate_speech(text, voice="en-US-AriaNeural"):
278
  """Generate speech using Edge TTS"""
279
  if not text.strip():
280
  return None
281
-
282
  try:
283
  communicate = edge_tts.Communicate(text, voice)
284
  audio_file = f"speech_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3"
@@ -288,79 +285,130 @@ async def generate_speech(text, voice="en-US-AriaNeural"):
288
  st.error(f"Error generating speech: {e}")
289
  return None
290
 
291
- def process_audio_input(audio_bytes):
292
- """Process audio input from recorder"""
293
- if audio_bytes:
294
- # Save temporary file
295
- audio_path = f"temp_audio_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
296
- with open(audio_path, "wb") as f:
297
- f.write(audio_bytes)
298
-
299
- # Here you would typically use a speech-to-text service
300
- # For now, we'll just acknowledge the recording
301
- st.success("Audio recorded successfully!")
302
-
303
- # Cleanup
304
- if os.path.exists(audio_path):
305
- os.remove(audio_path)
306
-
307
- return True
308
- return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
309
 
310
  def main():
311
- st.title("πŸŽ₯ Video Search & Research Assistant")
312
 
313
- # Initialize search
314
  search = VideoSearch()
315
 
316
- # Create main tabs
317
- tab1, tab2, tab3 = st.tabs(["πŸ” Video Search", "πŸŽ™οΈ Voice & Audio", "πŸ“š Arxiv Research"])
318
 
319
  with tab1:
320
- st.subheader("Search Video Dataset")
321
-
322
- # Text search
323
- query = st.text_input("Enter your search query:")
324
- col1, col2 = st.columns(2)
325
 
 
 
326
  with col1:
327
- search_button = st.button("πŸ” Search")
 
328
  with col2:
329
- num_results = st.slider("Number of results:", 1, 10, 5)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
 
331
- if search_button and query:
332
- results = search.search(query, num_results)
333
  st.session_state['search_history'].append({
334
  'query': query,
335
  'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
336
- 'results': results
337
  })
338
 
339
  for i, result in enumerate(results, 1):
340
- with st.expander(f"Result {i}: {result['description'][:100]}...", expanded=i==1):
 
341
  cols = st.columns([2, 1])
342
-
343
  with cols[0]:
344
- st.markdown(f"**Full Description:**")
345
  st.write(result['description'])
346
  st.markdown(f"**Time Range:** {result['start_time']}s - {result['end_time']}s")
347
  st.markdown(f"**Views:** {result['views']:,}")
348
 
349
  with cols[1]:
350
  st.markdown(f"**Relevance Score:** {result['relevance_score']:.2%}")
351
- if result['youtube_id']:
352
  st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result['start_time']}")
353
 
354
- # Generate audio summary
355
- if st.button(f"πŸ”Š Generate Audio Summary", key=f"audio_{i}"):
356
  summary = f"Video summary: {result['description'][:200]}"
357
  audio_file = asyncio.run(generate_speech(summary))
358
  if audio_file:
359
  st.audio(audio_file)
360
- os.remove(audio_file)
361
-
 
362
  with tab2:
363
- st.subheader("Voice Input & Audio Recording")
364
 
365
  col1, col2 = st.columns(2)
366
  with col1:
@@ -372,49 +420,71 @@ def main():
372
  st.markdown("**Transcribed Text:**")
373
  st.write(voice_input)
374
 
375
- if st.button("πŸ” Search Videos"):
376
- results = search.search(voice_input, num_results)
 
377
  for i, result in enumerate(results, 1):
378
  with st.expander(f"Result {i}", expanded=i==1):
379
  st.write(result['description'])
380
- if result['youtube_id']:
381
- st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result['start_time']}")
382
 
383
  with col2:
384
- st.write("🎡 Audio Recorder")
385
  audio_bytes = audio_recorder()
386
  if audio_bytes:
387
- process_audio_input(audio_bytes)
 
 
 
 
 
388
 
389
  with tab3:
390
- st.subheader("Arxiv Research")
391
- arxiv_query = st.text_input("πŸ” Research Query:")
392
-
393
- col1, col2 = st.columns(2)
394
- with col1:
395
- vocal_summary = st.checkbox("Generate Audio Summary", value=True)
396
- with col2:
397
- extended_refs = st.checkbox("Include Extended References", value=False)
398
-
399
- if st.button("πŸ” Search Arxiv") and arxiv_query:
400
- perform_arxiv_search(arxiv_query, vocal_summary, extended_refs)
401
 
402
- # Sidebar for history and settings
403
  with st.sidebar:
404
  st.subheader("βš™οΈ Settings & History")
405
 
406
  if st.button("πŸ—‘οΈ Clear History"):
407
  st.session_state['search_history'] = []
408
- st.experimental_rerun()
409
 
410
  st.markdown("### Recent Searches")
411
  for entry in reversed(st.session_state['search_history'][-5:]):
412
- st.markdown(f"**{entry['timestamp']}**: {entry['query']}")
 
 
413
 
414
  st.markdown("### Voice Settings")
415
  st.selectbox("TTS Voice:",
416
  ["en-US-AriaNeural", "en-US-GuyNeural", "en-GB-SoniaNeural"],
417
  key="tts_voice")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
418
 
419
  if __name__ == "__main__":
420
  main()
 
13
  import asyncio
14
  import base64
15
  import requests
 
 
16
  from collections import defaultdict
 
17
  from audio_recorder_streamlit import audio_recorder
18
  import streamlit.components.v1 as components
19
 
 
 
 
 
 
 
 
 
20
  # Initialize session state
21
  if 'search_history' not in st.session_state:
22
  st.session_state['search_history'] = []
 
26
  st.session_state['transcript_history'] = []
27
  if 'should_rerun' not in st.session_state:
28
  st.session_state['should_rerun'] = False
29
+ if 'search_columns' not in st.session_state:
30
+ st.session_state['search_columns'] = []
31
+ if 'initial_search_done' not in st.session_state:
32
+ st.session_state['initial_search_done'] = False
 
 
 
 
 
 
 
 
33
 
34
  class VideoSearch:
35
  def __init__(self):
 
37
  self.load_dataset()
38
 
39
  def fetch_dataset_rows(self):
40
+ """Fetch dataset from Hugging Face API"""
41
  try:
 
42
  url = "https://datasets-server.huggingface.co/first-rows?dataset=omegalabsinc%2Fomega-multimodal&config=default&split=train"
 
43
  response = requests.get(url, timeout=30)
 
44
 
45
  if response.status_code == 200:
46
  data = response.json()
 
47
  if 'rows' in data:
 
48
  processed_rows = []
49
  for row_data in data['rows']:
50
+ row = row_data.get('row', row_data)
51
+ for key in row:
52
+ if any(term in key.lower() for term in ['embed', 'vector', 'encoding']):
53
+ if isinstance(row[key], str):
54
+ try:
55
+ row[key] = [float(x.strip()) for x in row[key].strip('[]').split(',') if x.strip()]
56
+ except:
57
+ continue
58
+ processed_rows.append(row)
59
 
60
  df = pd.DataFrame(processed_rows)
61
+ # Update search columns
62
+ st.session_state['search_columns'] = [col for col in df.columns
63
+ if col not in ['video_embed', 'description_embed', 'audio_embed']]
 
 
64
  return df
65
+ return self.load_example_data()
 
 
 
 
 
 
66
 
67
  except Exception as e:
 
68
  return self.load_example_data()
69
 
70
+ def prepare_features(self):
71
+ """Prepare embeddings with adaptive field detection"""
72
+ try:
73
+ embed_cols = [col for col in self.dataset.columns
74
+ if any(term in col.lower() for term in ['embed', 'vector', 'encoding'])]
75
+
76
+ embeddings = {}
77
+ for col in embed_cols:
78
+ try:
79
+ data = []
80
+ for row in self.dataset[col]:
81
+ if isinstance(row, str):
82
+ values = [float(x.strip()) for x in row.strip('[]').split(',') if x.strip()]
83
+ elif isinstance(row, list):
84
+ values = row
85
+ else:
86
+ continue
87
+ data.append(values)
88
+
89
+ if data:
90
+ embeddings[col] = np.array(data)
91
+ except Exception as e:
92
+ continue
93
+
94
+ # Set main embeddings for search
95
+ if 'video_embed' in embeddings:
96
+ self.video_embeds = embeddings['video_embed']
97
+ else:
98
+ self.video_embeds = next(iter(embeddings.values()))
99
+
100
+ if 'description_embed' in embeddings:
101
+ self.text_embeds = embeddings['description_embed']
102
+ else:
103
+ self.text_embeds = self.video_embeds
104
+
105
+ except Exception as e:
106
+ # Fallback to random embeddings
107
+ num_rows = len(self.dataset)
108
+ self.video_embeds = np.random.randn(num_rows, 384)
109
+ self.text_embeds = np.random.randn(num_rows, 384)
110
+
111
  def load_example_data(self):
112
  """Load example data as fallback"""
113
  example_data = [
114
  {
115
  "video_id": "cd21da96-fcca-4c94-a60f-0b1e4e1e29fc",
116
  "youtube_id": "IO-vwtyicn4",
117
+ "description": "This video shows a close-up of an ancient text carved into a surface.",
118
  "views": 45489,
119
  "start_time": 1452,
120
  "end_time": 1458,
121
  "video_embed": [0.014160037972033024, -0.003111184574663639, -0.016604168340563774],
122
  "description_embed": [-0.05835828185081482, 0.02589797042310238, 0.11952091753482819]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  }
124
  ]
125
  return pd.DataFrame(example_data)
126
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  def load_dataset(self):
128
+ self.dataset = self.fetch_dataset_rows()
129
+ self.prepare_features()
130
+
131
+ def search(self, query, column=None, top_k=20):
132
+ """Search videos using query with column filtering"""
133
+ # Semantic search
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  query_embedding = self.text_model.encode([query])[0]
 
135
  video_sims = cosine_similarity([query_embedding], self.video_embeds)[0]
136
  text_sims = cosine_similarity([query_embedding], self.text_embeds)[0]
 
137
  combined_sims = 0.5 * video_sims + 0.5 * text_sims
138
+
139
+ # Column-specific text search if specified
140
+ if column and column in self.dataset.columns:
141
+ mask = self.dataset[column].astype(str).str.contains(query, case=False)
142
+ combined_sims[~mask] *= 0.5 # Reduce scores for non-matching rows
143
+
144
+ # Get top results
145
+ top_k = min(top_k, 100)
146
  top_indices = np.argsort(combined_sims)[-top_k:][::-1]
147
 
148
  results = []
149
  for idx in top_indices:
150
+ result = {
151
+ 'relevance_score': float(combined_sims[idx])
152
+ }
153
+ for col in self.dataset.columns:
154
+ if col not in ['video_embed', 'description_embed', 'audio_embed']:
155
+ result[col] = self.dataset.iloc[idx][col]
156
+ results.append(result)
157
+
 
158
  return results
159
 
160
+ def main():
161
+ st.title("πŸŽ₯ Video Search with Speech Recognition")
162
+
163
+ # Initialize search
164
+ search = VideoSearch()
165
+
166
+ # Create tabs
167
+ tab1, tab2, tab3 = st.tabs(["πŸ” Search", "πŸŽ™οΈ Voice Input", "πŸ“‚ Files"])
168
+
169
+ with tab1:
170
+ st.subheader("Search Videos")
171
 
172
+ # Search interface
173
+ col1, col2 = st.columns([3, 1])
174
+ with col1:
175
+ query = st.text_input("Enter your search query:", value="ancient" if not st.session_state['initial_search_done'] else "")
176
+ with col2:
177
+ search_column = st.selectbox("Search in field:",
178
+ ["All Fields"] + st.session_state['search_columns'])
179
 
180
+ col3, col4 = st.columns(2)
181
+ with col3:
182
+ num_results = st.slider("Number of results:", 1, 100, 20)
183
+ with col4:
184
+ search_button = st.button("πŸ” Search")
185
 
186
+ # Process search
187
+ if (search_button or not st.session_state['initial_search_done']) and query:
188
+ st.session_state['initial_search_done'] = True
189
+ selected_column = None if search_column == "All Fields" else search_column
190
+ results = search.search(query, selected_column, num_results)
191
+
192
+ st.session_state['search_history'].append({
193
+ 'query': query,
194
+ 'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
195
+ 'results': results[:5] # Store only top 5 for history
196
+ })
197
+
198
+ for i, result in enumerate(results, 1):
199
+ with st.expander(f"Result {i}: {result['description'][:100]}...",
200
+ expanded=i==1):
201
+ cols = st.columns([2, 1])
202
+ with cols[0]:
203
+ st.markdown("**Description:**")
204
+ st.write(result['description'])
205
+ st.markdown(f"**Time Range:** {result['start_time']}s - {result['end_time']}s")
206
+ st.markdown(f"**Views:** {result['views']:,}")
207
+
208
+ with cols[1]:
209
+ st.markdown(f"**Relevance Score:** {result['relevance_score']:.2%}")
210
+ if result.get('youtube_id'):
211
+ st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result['start_time']}")
212
+
213
+ if st.button(f"πŸ”Š Audio Summary", key=f"audio_{i}"):
214
+ summary = f"Video summary: {result['description'][:200]}"
215
+ audio_file = asyncio.run(generate_speech(summary))
216
+ if audio_file:
217
+ st.audio(audio_file)
218
+ if os.path.exists(audio_file):
219
+ os.remove(audio_file)
220
+
221
+ with tab2:
222
+ st.subheader("Voice Input")
223
+
224
+ col1, col2 = st.columns(2)
225
+ with col1:
226
+ st.write("πŸŽ™οΈ Speech Recognition")
227
+ voice_input = speech_component()
228
+
229
+ if voice_input and voice_input != st.session_state['last_voice_input']:
230
+ st.session_state['last_voice_input'] = voice_input
231
+ st.markdown("**Transcribed Text:**")
232
+ st.write(voice_input)
233
+
234
+ if st.button("πŸ” Search"):
235
+ results = search.search(voice_input, None, num_results)
236
+ for i, result in enumerate(results, 1):
237
+ with st.expander(f"Result {i}", expanded=i==1):
238
+ st.write(result['description'])
239
+ if result.get('youtube_id'):
240
+ st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result.get('start_time', 0)}")
241
+
242
+ with col2:
243
+ st.write("🎡 Audio Recording")
244
+ audio_bytes = audio_recorder()
245
+ if audio_bytes:
246
+ audio_path = f"temp_audio_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
247
+ with open(audio_path, "wb") as f:
248
+ f.write(audio_bytes)
249
+ st.success("Audio recorded successfully!")
250
+ if os.path.exists(audio_path):
251
+ os.remove(audio_path)
252
+
253
+ with tab3:
254
+ show_file_manager()
255
+
256
+ # Sidebar
257
+ with st.sidebar:
258
+ st.subheader("βš™οΈ Settings & History")
259
+
260
+ if st.button("πŸ—‘οΈ Clear History"):
261
+ st.session_state['search_history'] = []
262
+ st.experimental_rerun()
263
+
264
+ st.markdown("### Recent Searches")
265
+ for entry in reversed(st.session_state['search_history'][-5:]):
266
+ with st.expander(f"{entry['timestamp']}: {entry['query']}"):
267
+ for i, result in enumerate(entry['results'], 1):
268
+ st.write(f"{i}. {result['description'][:100]}...")
269
+
270
+ st.markdown("### Voice Settings")
271
+ st.selectbox("TTS Voice:",
272
+ ["en-US-AriaNeural", "en-US-GuyNeural", "en-GB-SoniaNeural"],
273
+ key="tts_voice")
274
 
275
  async def generate_speech(text, voice="en-US-AriaNeural"):
276
  """Generate speech using Edge TTS"""
277
  if not text.strip():
278
  return None
 
279
  try:
280
  communicate = edge_tts.Communicate(text, voice)
281
  audio_file = f"speech_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3"
 
285
  st.error(f"Error generating speech: {e}")
286
  return None
287
 
288
+ def show_file_manager():
289
+ """Display file manager interface"""
290
+ st.subheader("πŸ“‚ File Manager")
291
+
292
+ # File operations
293
+ col1, col2 = st.columns(2)
294
+ with col1:
295
+ uploaded_file = st.file_uploader("Upload File", type=['txt', 'md', 'mp3'])
296
+ if uploaded_file:
297
+ with open(uploaded_file.name, "wb") as f:
298
+ f.write(uploaded_file.getvalue())
299
+ st.success(f"Uploaded: {uploaded_file.name}")
300
+ st.rerun()
301
+
302
+ with col2:
303
+ if st.button("πŸ—‘ Clear All Files"):
304
+ for f in glob.glob("*.txt") + glob.glob("*.md") + glob.glob("*.mp3"):
305
+ os.remove(f)
306
+ st.success("All files cleared!")
307
+ st.rerun()
308
+
309
+ # Show existing files
310
+ files = glob.glob("*.txt") + glob.glob("*.md") + glob.glob("*.mp3")
311
+ if files:
312
+ st.write("### Existing Files")
313
+ for f in files:
314
+ with st.expander(f"πŸ“„ {os.path.basename(f)}"):
315
+ if f.endswith('.mp3'):
316
+ st.audio(f)
317
+ else:
318
+ with open(f, 'r') as file:
319
+ st.text_area("Content", file.read(), height=100)
320
+ if st.button(f"Delete {os.path.basename(f)}", key=f"del_{f}"):
321
+ os.remove(f)
322
+ st.rerun()
323
+
324
+ @st.cache_data(ttl=3600)
325
+ def load_file_list():
326
+ """Cache file listing"""
327
+ return glob.glob("*.txt") + glob.glob("*.md") + glob.glob("*.mp3")
328
+
329
+ @st.cache_resource
330
+ def get_speech_model():
331
+ """Cache speech model initialization"""
332
+ return edge_tts.Communicate
333
+
334
+ async def generate_speech(text, voice="en-US-AriaNeural"):
335
+ """Generate speech using Edge TTS with cached model"""
336
+ if not text.strip():
337
+ return None
338
+ try:
339
+ communicate = get_speech_model()(text, voice)
340
+ audio_file = f"speech_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3"
341
+ await communicate.save(audio_file)
342
+ return audio_file
343
+ except Exception as e:
344
+ st.error(f"Error generating speech: {e}")
345
+ return None
346
 
347
  def main():
348
+ st.title("πŸŽ₯ Video Search with Speech Recognition")
349
 
350
+ # Initialize search with cached model
351
  search = VideoSearch()
352
 
353
+ # Create tabs
354
+ tab1, tab2, tab3 = st.tabs(["πŸ” Search", "πŸŽ™οΈ Voice Input", "πŸ“‚ Files"])
355
 
356
  with tab1:
357
+ st.subheader("Search Videos")
 
 
 
 
358
 
359
+ # Search interface
360
+ col1, col2 = st.columns([3, 1])
361
  with col1:
362
+ query = st.text_input("Enter your search query:",
363
+ value="ancient" if not st.session_state['initial_search_done'] else "")
364
  with col2:
365
+ search_column = st.selectbox("Search in field:",
366
+ ["All Fields"] + st.session_state['search_columns'])
367
+
368
+ col3, col4 = st.columns(2)
369
+ with col3:
370
+ num_results = st.slider("Number of results:", 1, 100, 20)
371
+ with col4:
372
+ search_button = st.button("πŸ” Search")
373
+
374
+ # Process search
375
+ if (search_button or not st.session_state['initial_search_done']) and query:
376
+ st.session_state['initial_search_done'] = True
377
+ selected_column = None if search_column == "All Fields" else search_column
378
+ with st.spinner("Searching..."):
379
+ results = search.search(query, selected_column, num_results)
380
 
 
 
381
  st.session_state['search_history'].append({
382
  'query': query,
383
  'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
384
+ 'results': results[:5] # Store only top 5 for history
385
  })
386
 
387
  for i, result in enumerate(results, 1):
388
+ with st.expander(f"Result {i}: {result['description'][:100]}...",
389
+ expanded=i==1):
390
  cols = st.columns([2, 1])
 
391
  with cols[0]:
392
+ st.markdown("**Description:**")
393
  st.write(result['description'])
394
  st.markdown(f"**Time Range:** {result['start_time']}s - {result['end_time']}s")
395
  st.markdown(f"**Views:** {result['views']:,}")
396
 
397
  with cols[1]:
398
  st.markdown(f"**Relevance Score:** {result['relevance_score']:.2%}")
399
+ if result.get('youtube_id'):
400
  st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result['start_time']}")
401
 
402
+ if st.button(f"πŸ”Š Audio Summary", key=f"audio_{i}"):
 
403
  summary = f"Video summary: {result['description'][:200]}"
404
  audio_file = asyncio.run(generate_speech(summary))
405
  if audio_file:
406
  st.audio(audio_file)
407
+ if os.path.exists(audio_file):
408
+ os.remove(audio_file)
409
+
410
  with tab2:
411
+ st.subheader("Voice Input")
412
 
413
  col1, col2 = st.columns(2)
414
  with col1:
 
420
  st.markdown("**Transcribed Text:**")
421
  st.write(voice_input)
422
 
423
+ if st.button("πŸ” Search"):
424
+ with st.spinner("Searching..."):
425
+ results = search.search(voice_input, None, num_results)
426
  for i, result in enumerate(results, 1):
427
  with st.expander(f"Result {i}", expanded=i==1):
428
  st.write(result['description'])
429
+ if result.get('youtube_id'):
430
+ st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result.get('start_time', 0)}")
431
 
432
  with col2:
433
+ st.write("🎡 Audio Recording")
434
  audio_bytes = audio_recorder()
435
  if audio_bytes:
436
+ audio_path = f"temp_audio_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
437
+ with open(audio_path, "wb") as f:
438
+ f.write(audio_bytes)
439
+ st.success("Audio recorded successfully!")
440
+ if os.path.exists(audio_path):
441
+ os.remove(audio_path)
442
 
443
  with tab3:
444
+ show_file_manager()
 
 
 
 
 
 
 
 
 
 
445
 
446
+ # Sidebar
447
  with st.sidebar:
448
  st.subheader("βš™οΈ Settings & History")
449
 
450
  if st.button("πŸ—‘οΈ Clear History"):
451
  st.session_state['search_history'] = []
452
+ st.rerun()
453
 
454
  st.markdown("### Recent Searches")
455
  for entry in reversed(st.session_state['search_history'][-5:]):
456
+ with st.expander(f"{entry['timestamp']}: {entry['query']}"):
457
+ for i, result in enumerate(entry['results'], 1):
458
+ st.write(f"{i}. {result['description'][:100]}...")
459
 
460
  st.markdown("### Voice Settings")
461
  st.selectbox("TTS Voice:",
462
  ["en-US-AriaNeural", "en-US-GuyNeural", "en-GB-SoniaNeural"],
463
  key="tts_voice")
464
+ with open(uploaded_file.name, "wb") as f:
465
+ f.write(uploaded_file.getvalue())
466
+ st.success(f"Uploaded: {uploaded_file.name}")
467
+
468
+ with col2:
469
+ if st.button("πŸ—‘ Clear All Files"):
470
+ for f in glob.glob("*.txt") + glob.glob("*.md") + glob.glob("*.mp3"):
471
+ os.remove(f)
472
+ st.success("All files cleared!")
473
+
474
+ # Show existing files
475
+ files = glob.glob("*.txt") + glob.glob("*.md") + glob.glob("*.mp3")
476
+ if files:
477
+ st.write("### Existing Files")
478
+ for f in files:
479
+ with st.expander(f"πŸ“„ {os.path.basename(f)}"):
480
+ if f.endswith('.mp3'):
481
+ st.audio(f)
482
+ else:
483
+ with open(f, 'r') as file:
484
+ st.text_area("Content", file.read(), height=100)
485
+ if st.button(f"Delete {os.path.basename(f)}", key=f"del_{f}"):
486
+ os.remove(f)
487
+ st.experimental_rerun()
488
 
489
  if __name__ == "__main__":
490
  main()