awacke1 commited on
Commit
7938082
Β·
verified Β·
1 Parent(s): 30a0d44

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +128 -215
app.py CHANGED
@@ -16,6 +16,15 @@ import requests
16
  from collections import defaultdict
17
  from audio_recorder_streamlit import audio_recorder
18
  import streamlit.components.v1 as components
 
 
 
 
 
 
 
 
 
19
 
20
  # Initialize session state
21
  if 'search_history' not in st.session_state:
@@ -30,6 +39,10 @@ if 'search_columns' not in st.session_state:
30
  st.session_state['search_columns'] = []
31
  if 'initial_search_done' not in st.session_state:
32
  st.session_state['initial_search_done'] = False
 
 
 
 
33
 
34
  class VideoSearch:
35
  def __init__(self):
@@ -37,11 +50,10 @@ class VideoSearch:
37
  self.load_dataset()
38
 
39
  def fetch_dataset_rows(self):
40
- """Fetch dataset from Hugging Face API"""
41
  try:
42
  url = "https://datasets-server.huggingface.co/first-rows?dataset=omegalabsinc%2Fomega-multimodal&config=default&split=train"
43
  response = requests.get(url, timeout=30)
44
-
45
  if response.status_code == 200:
46
  data = response.json()
47
  if 'rows' in data:
@@ -63,8 +75,7 @@ class VideoSearch:
63
  if col not in ['video_embed', 'description_embed', 'audio_embed']]
64
  return df
65
  return self.load_example_data()
66
-
67
- except Exception as e:
68
  return self.load_example_data()
69
 
70
  def prepare_features(self):
@@ -88,7 +99,7 @@ class VideoSearch:
88
 
89
  if data:
90
  embeddings[col] = np.array(data)
91
- except Exception as e:
92
  continue
93
 
94
  # Set main embeddings for search
@@ -102,14 +113,13 @@ class VideoSearch:
102
  else:
103
  self.text_embeds = self.video_embeds
104
 
105
- except Exception as e:
106
  # Fallback to random embeddings
107
  num_rows = len(self.dataset)
108
  self.video_embeds = np.random.randn(num_rows, 384)
109
  self.text_embeds = np.random.randn(num_rows, 384)
110
 
111
  def load_example_data(self):
112
- """Load example data as fallback"""
113
  example_data = [
114
  {
115
  "video_id": "cd21da96-fcca-4c94-a60f-0b1e4e1e29fc",
@@ -129,27 +139,22 @@ class VideoSearch:
129
  self.prepare_features()
130
 
131
  def search(self, query, column=None, top_k=20):
132
- """Search videos using query with column filtering"""
133
- # Semantic search
134
  query_embedding = self.text_model.encode([query])[0]
135
  video_sims = cosine_similarity([query_embedding], self.video_embeds)[0]
136
  text_sims = cosine_similarity([query_embedding], self.text_embeds)[0]
137
  combined_sims = 0.5 * video_sims + 0.5 * text_sims
138
 
139
- # Column-specific text search if specified
140
- if column and column in self.dataset.columns:
141
  mask = self.dataset[column].astype(str).str.contains(query, case=False)
142
- combined_sims[~mask] *= 0.5 # Reduce scores for non-matching rows
143
 
144
- # Get top results
145
  top_k = min(top_k, 100)
146
  top_indices = np.argsort(combined_sims)[-top_k:][::-1]
147
 
148
  results = []
149
  for idx in top_indices:
150
- result = {
151
- 'relevance_score': float(combined_sims[idx])
152
- }
153
  for col in self.dataset.columns:
154
  if col not in ['video_embed', 'description_embed', 'audio_embed']:
155
  result[col] = self.dataset.iloc[idx][col]
@@ -157,127 +162,19 @@ class VideoSearch:
157
 
158
  return results
159
 
160
- def main():
161
- st.title("πŸŽ₯ Video Search with Speech Recognition")
162
-
163
- # Initialize search
164
- search = VideoSearch()
165
-
166
- # Create tabs
167
- tab1, tab2, tab3 = st.tabs(["πŸ” Search", "πŸŽ™οΈ Voice Input", "πŸ“‚ Files"])
168
-
169
- with tab1:
170
- st.subheader("Search Videos")
171
-
172
- # Search interface
173
- col1, col2 = st.columns([3, 1])
174
- with col1:
175
- query = st.text_input("Enter your search query:", value="ancient" if not st.session_state['initial_search_done'] else "")
176
- with col2:
177
- search_column = st.selectbox("Search in field:",
178
- ["All Fields"] + st.session_state['search_columns'])
179
-
180
- col3, col4 = st.columns(2)
181
- with col3:
182
- num_results = st.slider("Number of results:", 1, 100, 20)
183
- with col4:
184
- search_button = st.button("πŸ” Search")
185
-
186
- # Process search
187
- if (search_button or not st.session_state['initial_search_done']) and query:
188
- st.session_state['initial_search_done'] = True
189
- selected_column = None if search_column == "All Fields" else search_column
190
- results = search.search(query, selected_column, num_results)
191
-
192
- st.session_state['search_history'].append({
193
- 'query': query,
194
- 'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
195
- 'results': results[:5] # Store only top 5 for history
196
- })
197
-
198
- for i, result in enumerate(results, 1):
199
- with st.expander(f"Result {i}: {result['description'][:100]}...",
200
- expanded=i==1):
201
- cols = st.columns([2, 1])
202
- with cols[0]:
203
- st.markdown("**Description:**")
204
- st.write(result['description'])
205
- st.markdown(f"**Time Range:** {result['start_time']}s - {result['end_time']}s")
206
- st.markdown(f"**Views:** {result['views']:,}")
207
-
208
- with cols[1]:
209
- st.markdown(f"**Relevance Score:** {result['relevance_score']:.2%}")
210
- if result.get('youtube_id'):
211
- st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result['start_time']}")
212
-
213
- if st.button(f"πŸ”Š Audio Summary", key=f"audio_{i}"):
214
- summary = f"Video summary: {result['description'][:200]}"
215
- audio_file = asyncio.run(generate_speech(summary))
216
- if audio_file:
217
- st.audio(audio_file)
218
- if os.path.exists(audio_file):
219
- os.remove(audio_file)
220
-
221
- with tab2:
222
- st.subheader("Voice Input")
223
-
224
- col1, col2 = st.columns(2)
225
- with col1:
226
- st.write("πŸŽ™οΈ Speech Recognition")
227
- voice_input = speech_component()
228
-
229
- if voice_input and voice_input != st.session_state['last_voice_input']:
230
- st.session_state['last_voice_input'] = voice_input
231
- st.markdown("**Transcribed Text:**")
232
- st.write(voice_input)
233
-
234
- if st.button("πŸ” Search"):
235
- results = search.search(voice_input, None, num_results)
236
- for i, result in enumerate(results, 1):
237
- with st.expander(f"Result {i}", expanded=i==1):
238
- st.write(result['description'])
239
- if result.get('youtube_id'):
240
- st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result.get('start_time', 0)}")
241
-
242
- with col2:
243
- st.write("🎡 Audio Recording")
244
- audio_bytes = audio_recorder()
245
- if audio_bytes:
246
- audio_path = f"temp_audio_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
247
- with open(audio_path, "wb") as f:
248
- f.write(audio_bytes)
249
- st.success("Audio recorded successfully!")
250
- if os.path.exists(audio_path):
251
- os.remove(audio_path)
252
-
253
- with tab3:
254
- show_file_manager()
255
-
256
- # Sidebar
257
- with st.sidebar:
258
- st.subheader("βš™οΈ Settings & History")
259
-
260
- if st.button("πŸ—‘οΈ Clear History"):
261
- st.session_state['search_history'] = []
262
- st.rerun()
263
-
264
- st.markdown("### Recent Searches")
265
- for entry in reversed(st.session_state['search_history'][-5:]):
266
- with st.expander(f"{entry['timestamp']}: {entry['query']}"):
267
- for i, result in enumerate(entry['results'], 1):
268
- st.write(f"{i}. {result['description'][:100]}...")
269
-
270
- st.markdown("### Voice Settings")
271
- st.selectbox("TTS Voice:",
272
- ["en-US-AriaNeural", "en-US-GuyNeural", "en-GB-SoniaNeural"],
273
- key="tts_voice")
274
 
275
- async def generate_speech(text, voice="en-US-AriaNeural"):
276
- """Generate speech using Edge TTS"""
277
  if not text.strip():
278
  return None
 
 
279
  try:
280
- communicate = edge_tts.Communicate(text, voice)
281
  audio_file = f"speech_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3"
282
  await communicate.save(audio_file)
283
  return audio_file
@@ -285,11 +182,19 @@ async def generate_speech(text, voice="en-US-AriaNeural"):
285
  st.error(f"Error generating speech: {e}")
286
  return None
287
 
 
 
 
 
 
 
 
 
 
 
288
  def show_file_manager():
289
  """Display file manager interface"""
290
  st.subheader("πŸ“‚ File Manager")
291
-
292
- # File operations
293
  col1, col2 = st.columns(2)
294
  with col1:
295
  uploaded_file = st.file_uploader("Upload File", type=['txt', 'md', 'mp3'])
@@ -297,16 +202,15 @@ def show_file_manager():
297
  with open(uploaded_file.name, "wb") as f:
298
  f.write(uploaded_file.getvalue())
299
  st.success(f"Uploaded: {uploaded_file.name}")
300
- st.rerun()
301
 
302
  with col2:
303
  if st.button("πŸ—‘ Clear All Files"):
304
  for f in glob.glob("*.txt") + glob.glob("*.md") + glob.glob("*.mp3"):
305
  os.remove(f)
306
  st.success("All files cleared!")
307
- st.rerun()
308
 
309
- # Show existing files
310
  files = glob.glob("*.txt") + glob.glob("*.md") + glob.glob("*.mp3")
311
  if files:
312
  st.write("### Existing Files")
@@ -315,52 +219,54 @@ def show_file_manager():
315
  if f.endswith('.mp3'):
316
  st.audio(f)
317
  else:
318
- with open(f, 'r') as file:
319
  st.text_area("Content", file.read(), height=100)
320
  if st.button(f"Delete {os.path.basename(f)}", key=f"del_{f}"):
321
  os.remove(f)
322
- st.rerun()
323
 
324
- @st.cache_data(ttl=3600)
325
- def load_file_list():
326
- """Cache file listing"""
327
- return glob.glob("*.txt") + glob.glob("*.md") + glob.glob("*.mp3")
328
 
329
- @st.cache_resource
330
- def get_speech_model():
331
- """Cache speech model initialization"""
332
- return edge_tts.Communicate
333
 
334
- async def generate_speech(text, voice="en-US-AriaNeural"):
335
- """Generate speech using Edge TTS with cached model"""
336
- if not text.strip():
337
- return None
338
- try:
339
- communicate = get_speech_model()(text, voice)
340
- audio_file = f"speech_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3"
341
- await communicate.save(audio_file)
342
- return audio_file
343
- except Exception as e:
344
- st.error(f"Error generating speech: {e}")
345
- return None
 
 
346
 
 
 
 
347
  def main():
348
- st.title("πŸŽ₯ Video Search with Speech Recognition")
349
 
350
- # Initialize search with cached model
351
  search = VideoSearch()
352
 
353
  # Create tabs
354
- tab1, tab2, tab3 = st.tabs(["πŸ” Search", "πŸŽ™οΈ Voice Input", "πŸ“‚ Files"])
355
 
 
356
  with tab1:
357
  st.subheader("Search Videos")
358
-
359
- # Search interface
360
  col1, col2 = st.columns([3, 1])
361
  with col1:
362
  query = st.text_input("Enter your search query:",
363
- value="ancient" if not st.session_state['initial_search_done'] else "")
364
  with col2:
365
  search_column = st.selectbox("Search in field:",
366
  ["All Fields"] + st.session_state['search_columns'])
@@ -370,8 +276,7 @@ def main():
370
  num_results = st.slider("Number of results:", 1, 100, 20)
371
  with col4:
372
  search_button = st.button("πŸ” Search")
373
-
374
- # Process search
375
  if (search_button or not st.session_state['initial_search_done']) and query:
376
  st.session_state['initial_search_done'] = True
377
  selected_column = None if search_column == "All Fields" else search_column
@@ -381,12 +286,11 @@ def main():
381
  st.session_state['search_history'].append({
382
  'query': query,
383
  'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
384
- 'results': results[:5] # Store only top 5 for history
385
  })
386
 
387
  for i, result in enumerate(results, 1):
388
- with st.expander(f"Result {i}: {result['description'][:100]}...",
389
- expanded=i==1):
390
  cols = st.columns([2, 1])
391
  with cols[0]:
392
  st.markdown("**Description:**")
@@ -404,68 +308,77 @@ def main():
404
  audio_file = asyncio.run(generate_speech(summary))
405
  if audio_file:
406
  st.audio(audio_file)
407
- if os.path.exists(audio_file):
408
- os.remove(audio_file)
409
-
 
 
410
  with tab2:
411
  st.subheader("Voice Input")
412
 
413
- col1, col2 = st.columns(2)
414
- with col1:
415
- st.write("πŸŽ™οΈ Speech Recognition")
416
-
417
- with col2:
418
- st.write("🎡 Audio Recording")
419
- audio_bytes = audio_recorder()
420
- if audio_bytes:
421
- audio_path = f"temp_audio_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
422
- with open(audio_path, "wb") as f:
423
- f.write(audio_bytes)
424
- st.success("Audio recorded successfully!")
425
- if os.path.exists(audio_path):
426
- os.remove(audio_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
427
 
 
428
  with tab3:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
429
  show_file_manager()
430
 
431
  # Sidebar
432
  with st.sidebar:
433
  st.subheader("βš™οΈ Settings & History")
434
-
435
  if st.button("πŸ—‘οΈ Clear History"):
436
  st.session_state['search_history'] = []
437
- st.rerun()
438
 
439
  st.markdown("### Recent Searches")
440
  for entry in reversed(st.session_state['search_history'][-5:]):
441
  with st.expander(f"{entry['timestamp']}: {entry['query']}"):
442
  for i, result in enumerate(entry['results'], 1):
443
  st.write(f"{i}. {result['description'][:100]}...")
444
-
445
  st.markdown("### Voice Settings")
446
- st.selectbox("TTS Voice:",["en-US-AriaNeural", "en-US-GuyNeural", "en-GB-SoniaNeural"],key="tts_voice")
447
-
448
-
449
- with col2:
450
- if st.button("πŸ—‘ Clear All Files"):
451
- for f in glob.glob("*.txt") + glob.glob("*.md") + glob.glob("*.mp3"):
452
- os.remove(f)
453
- st.success("All files cleared!")
454
-
455
- # Show existing files
456
- files = glob.glob("*.txt") + glob.glob("*.md") + glob.glob("*.mp3")
457
- if files:
458
- st.write("### Existing Files")
459
- for f in files:
460
- with st.expander(f"πŸ“„ {os.path.basename(f)}"):
461
- if f.endswith('.mp3'):
462
- st.audio(f)
463
- else:
464
- with open(f, 'r') as file:
465
- st.text_area("Content", file.read(), height=100)
466
- if st.button(f"Delete {os.path.basename(f)}", key=f"del_{f}"):
467
- os.remove(f)
468
- st.rerun()
469
 
470
  if __name__ == "__main__":
471
- main()
 
16
  from collections import defaultdict
17
  from audio_recorder_streamlit import audio_recorder
18
  import streamlit.components.v1 as components
19
+ import openai
20
+ from dotenv import load_dotenv
21
+
22
+ # Load environment
23
+ load_dotenv()
24
+ openai.api_key = os.getenv('OPENAI_API_KEY')
25
+
26
+ # Ensure edge_tts and other dependencies are installed
27
+ # pip install edge-tts openai streamlit-audiorecorder
28
 
29
  # Initialize session state
30
  if 'search_history' not in st.session_state:
 
39
  st.session_state['search_columns'] = []
40
  if 'initial_search_done' not in st.session_state:
41
  st.session_state['initial_search_done'] = False
42
+ if 'tts_voice' not in st.session_state:
43
+ st.session_state['tts_voice'] = "en-US-AriaNeural"
44
+ if 'arxiv_last_query' not in st.session_state:
45
+ st.session_state['arxiv_last_query'] = ""
46
 
47
  class VideoSearch:
48
  def __init__(self):
 
50
  self.load_dataset()
51
 
52
  def fetch_dataset_rows(self):
53
+ """Fetch dataset from HF API"""
54
  try:
55
  url = "https://datasets-server.huggingface.co/first-rows?dataset=omegalabsinc%2Fomega-multimodal&config=default&split=train"
56
  response = requests.get(url, timeout=30)
 
57
  if response.status_code == 200:
58
  data = response.json()
59
  if 'rows' in data:
 
75
  if col not in ['video_embed', 'description_embed', 'audio_embed']]
76
  return df
77
  return self.load_example_data()
78
+ except Exception:
 
79
  return self.load_example_data()
80
 
81
  def prepare_features(self):
 
99
 
100
  if data:
101
  embeddings[col] = np.array(data)
102
+ except:
103
  continue
104
 
105
  # Set main embeddings for search
 
113
  else:
114
  self.text_embeds = self.video_embeds
115
 
116
+ except Exception:
117
  # Fallback to random embeddings
118
  num_rows = len(self.dataset)
119
  self.video_embeds = np.random.randn(num_rows, 384)
120
  self.text_embeds = np.random.randn(num_rows, 384)
121
 
122
  def load_example_data(self):
 
123
  example_data = [
124
  {
125
  "video_id": "cd21da96-fcca-4c94-a60f-0b1e4e1e29fc",
 
139
  self.prepare_features()
140
 
141
  def search(self, query, column=None, top_k=20):
 
 
142
  query_embedding = self.text_model.encode([query])[0]
143
  video_sims = cosine_similarity([query_embedding], self.video_embeds)[0]
144
  text_sims = cosine_similarity([query_embedding], self.text_embeds)[0]
145
  combined_sims = 0.5 * video_sims + 0.5 * text_sims
146
 
147
+ # Column filtering
148
+ if column and column in self.dataset.columns and column != "All Fields":
149
  mask = self.dataset[column].astype(str).str.contains(query, case=False)
150
+ combined_sims[~mask] *= 0.5
151
 
 
152
  top_k = min(top_k, 100)
153
  top_indices = np.argsort(combined_sims)[-top_k:][::-1]
154
 
155
  results = []
156
  for idx in top_indices:
157
+ result = {'relevance_score': float(combined_sims[idx])}
 
 
158
  for col in self.dataset.columns:
159
  if col not in ['video_embed', 'description_embed', 'audio_embed']:
160
  result[col] = self.dataset.iloc[idx][col]
 
162
 
163
  return results
164
 
165
+ # Use edge_tts for TTS
166
+ @st.cache_resource
167
+ def get_speech_model():
168
+ """Cache speech model initialization."""
169
+ return edge_tts.Communicate
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
 
171
+ async def generate_speech(text, voice=None):
 
172
  if not text.strip():
173
  return None
174
+ if not voice:
175
+ voice = st.session_state['tts_voice']
176
  try:
177
+ communicate = get_speech_model()(text, voice)
178
  audio_file = f"speech_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3"
179
  await communicate.save(audio_file)
180
  return audio_file
 
182
  st.error(f"Error generating speech: {e}")
183
  return None
184
 
185
+ def transcribe_audio(audio_path):
186
+ """Transcribe audio using Whisper."""
187
+ try:
188
+ with open(audio_path, "rb") as f:
189
+ transcription = openai.Audio.transcribe("whisper-1", f)
190
+ return transcription["text"].strip()
191
+ except Exception as e:
192
+ st.error(f"Error transcribing audio: {e}")
193
+ return ""
194
+
195
  def show_file_manager():
196
  """Display file manager interface"""
197
  st.subheader("πŸ“‚ File Manager")
 
 
198
  col1, col2 = st.columns(2)
199
  with col1:
200
  uploaded_file = st.file_uploader("Upload File", type=['txt', 'md', 'mp3'])
 
202
  with open(uploaded_file.name, "wb") as f:
203
  f.write(uploaded_file.getvalue())
204
  st.success(f"Uploaded: {uploaded_file.name}")
205
+ st.experimental_rerun()
206
 
207
  with col2:
208
  if st.button("πŸ—‘ Clear All Files"):
209
  for f in glob.glob("*.txt") + glob.glob("*.md") + glob.glob("*.mp3"):
210
  os.remove(f)
211
  st.success("All files cleared!")
212
+ st.experimental_rerun()
213
 
 
214
  files = glob.glob("*.txt") + glob.glob("*.md") + glob.glob("*.mp3")
215
  if files:
216
  st.write("### Existing Files")
 
219
  if f.endswith('.mp3'):
220
  st.audio(f)
221
  else:
222
+ with open(f, 'r', encoding='utf-8') as file:
223
  st.text_area("Content", file.read(), height=100)
224
  if st.button(f"Delete {os.path.basename(f)}", key=f"del_{f}"):
225
  os.remove(f)
226
+ st.experimental_rerun()
227
 
228
+ ##########################
229
+ # Arxiv Integration #
230
+ ##########################
 
231
 
232
+ # You need to implement or integrate perform_ai_lookup from your second app into this code.
233
+ # This is a placeholder. Replace with your actual perform_ai_lookup function logic.
234
+ # Ensure you have your Arxiv RAG model endpoint available.
 
235
 
236
+ # Example placeholder implementation (replace with your actual second app code):
237
+ def perform_ai_lookup(q, vocal_summary=True, extended_refs=False, titles_summary=True, full_audio=False):
238
+ # Placeholder: In your real code, you'll call your Arxiv RAG endpoint and get results.
239
+ # Here we just simulate a response.
240
+ mock_answer = f"This is a mock Arxiv response for query: {q}.\nReferences:\n[Paper 1] Example Title"
241
+ st.markdown(f"**Arxiv Search Results for '{q}':**\n\n{mock_answer}")
242
+
243
+ if vocal_summary:
244
+ audio_file = asyncio.run(generate_speech("This is a spoken summary of Arxiv results."))
245
+ if audio_file:
246
+ st.audio(audio_file)
247
+
248
+ # Add any other logic: extended_refs, titles_summary, etc.
249
+ return mock_answer
250
 
251
+ ############################
252
+ # Main App Layout & Logic #
253
+ ############################
254
  def main():
255
+ st.title("πŸŽ₯ Video & Arxiv Search with Voice")
256
 
257
+ # Initialize search class
258
  search = VideoSearch()
259
 
260
  # Create tabs
261
+ tab1, tab2, tab3, tab4 = st.tabs(["πŸ” Search", "πŸŽ™οΈ Voice Input", "πŸ“š Arxiv", "πŸ“‚ Files"])
262
 
263
+ # ---- Tab 1: Video Search ----
264
  with tab1:
265
  st.subheader("Search Videos")
 
 
266
  col1, col2 = st.columns([3, 1])
267
  with col1:
268
  query = st.text_input("Enter your search query:",
269
+ value="ancient" if not st.session_state['initial_search_done'] else "")
270
  with col2:
271
  search_column = st.selectbox("Search in field:",
272
  ["All Fields"] + st.session_state['search_columns'])
 
276
  num_results = st.slider("Number of results:", 1, 100, 20)
277
  with col4:
278
  search_button = st.button("πŸ” Search")
279
+
 
280
  if (search_button or not st.session_state['initial_search_done']) and query:
281
  st.session_state['initial_search_done'] = True
282
  selected_column = None if search_column == "All Fields" else search_column
 
286
  st.session_state['search_history'].append({
287
  'query': query,
288
  'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
289
+ 'results': results[:5]
290
  })
291
 
292
  for i, result in enumerate(results, 1):
293
+ with st.expander(f"Result {i}: {result['description'][:100]}...", expanded=(i==1)):
 
294
  cols = st.columns([2, 1])
295
  with cols[0]:
296
  st.markdown("**Description:**")
 
308
  audio_file = asyncio.run(generate_speech(summary))
309
  if audio_file:
310
  st.audio(audio_file)
311
+ # Optionally delete after playing:
312
+ # if os.path.exists(audio_file):
313
+ # os.remove(audio_file)
314
+
315
+ # ---- Tab 2: Voice Input ----
316
  with tab2:
317
  st.subheader("Voice Input")
318
 
319
+ st.write("πŸŽ™οΈ Record your voice and automatically transcribe to text:")
320
+ audio_bytes = audio_recorder()
321
+ if audio_bytes:
322
+ # Save the recorded audio for transcription
323
+ audio_path = f"temp_audio_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
324
+ with open(audio_path, "wb") as f:
325
+ f.write(audio_bytes)
326
+ st.success("Audio recorded successfully!")
327
+
328
+ # Transcribe using Whisper
329
+ voice_query = transcribe_audio(audio_path)
330
+ if voice_query:
331
+ st.markdown("**Transcribed Text:**")
332
+ st.write(voice_query)
333
+ st.session_state['last_voice_input'] = voice_query
334
+
335
+ if st.button("πŸ” Search from Voice"):
336
+ results = search.search(voice_query, None, 20)
337
+ for i, result in enumerate(results, 1):
338
+ with st.expander(f"Result {i}", expanded=(i==1)):
339
+ st.write(result['description'])
340
+ if result.get('youtube_id'):
341
+ st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result.get('start_time', 0)}")
342
+
343
+ # Clean up
344
+ if os.path.exists(audio_path):
345
+ os.remove(audio_path)
346
 
347
+ # ---- Tab 3: Arxiv Search ----
348
  with tab3:
349
+ st.subheader("Arxiv Search")
350
+ q = st.text_input("Enter your Arxiv search query:", value=st.session_state['arxiv_last_query'])
351
+ vocal_summary = st.checkbox("πŸŽ™ Short Audio Summary", value=True)
352
+ extended_refs = st.checkbox("πŸ“œ Extended References", value=False)
353
+ titles_summary = st.checkbox("πŸ”– Titles Only", value=True)
354
+ full_audio = st.checkbox("πŸ“š Full Audio Results", value=False)
355
+
356
+ if st.button("πŸ” Arxiv Search"):
357
+ st.session_state['arxiv_last_query'] = q
358
+ perform_ai_lookup(q, vocal_summary=vocal_summary, extended_refs=extended_refs,
359
+ titles_summary=titles_summary, full_audio=full_audio)
360
+
361
+ # ---- Tab 4: File Manager ----
362
+ with tab4:
363
  show_file_manager()
364
 
365
  # Sidebar
366
  with st.sidebar:
367
  st.subheader("βš™οΈ Settings & History")
 
368
  if st.button("πŸ—‘οΈ Clear History"):
369
  st.session_state['search_history'] = []
370
+ st.experimental_rerun()
371
 
372
  st.markdown("### Recent Searches")
373
  for entry in reversed(st.session_state['search_history'][-5:]):
374
  with st.expander(f"{entry['timestamp']}: {entry['query']}"):
375
  for i, result in enumerate(entry['results'], 1):
376
  st.write(f"{i}. {result['description'][:100]}...")
377
+
378
  st.markdown("### Voice Settings")
379
+ st.selectbox("TTS Voice:",
380
+ ["en-US-AriaNeural", "en-US-GuyNeural", "en-GB-SoniaNeural"],
381
+ key="tts_voice")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
382
 
383
  if __name__ == "__main__":
384
+ main()