awacke1 commited on
Commit
5d65381
Β·
verified Β·
1 Parent(s): 7938082

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -66
app.py CHANGED
@@ -16,15 +16,8 @@ import requests
16
  from collections import defaultdict
17
  from audio_recorder_streamlit import audio_recorder
18
  import streamlit.components.v1 as components
19
- import openai
20
- from dotenv import load_dotenv
21
-
22
- # Load environment
23
- load_dotenv()
24
- openai.api_key = os.getenv('OPENAI_API_KEY')
25
-
26
- # Ensure edge_tts and other dependencies are installed
27
- # pip install edge-tts openai streamlit-audiorecorder
28
 
29
  # Initialize session state
30
  if 'search_history' not in st.session_state:
@@ -50,7 +43,7 @@ class VideoSearch:
50
  self.load_dataset()
51
 
52
  def fetch_dataset_rows(self):
53
- """Fetch dataset from HF API"""
54
  try:
55
  url = "https://datasets-server.huggingface.co/first-rows?dataset=omegalabsinc%2Fomega-multimodal&config=default&split=train"
56
  response = requests.get(url, timeout=30)
@@ -70,12 +63,11 @@ class VideoSearch:
70
  processed_rows.append(row)
71
 
72
  df = pd.DataFrame(processed_rows)
73
- # Update search columns
74
  st.session_state['search_columns'] = [col for col in df.columns
75
  if col not in ['video_embed', 'description_embed', 'audio_embed']]
76
  return df
77
  return self.load_example_data()
78
- except Exception:
79
  return self.load_example_data()
80
 
81
  def prepare_features(self):
@@ -113,13 +105,14 @@ class VideoSearch:
113
  else:
114
  self.text_embeds = self.video_embeds
115
 
116
- except Exception:
117
  # Fallback to random embeddings
118
  num_rows = len(self.dataset)
119
  self.video_embeds = np.random.randn(num_rows, 384)
120
  self.text_embeds = np.random.randn(num_rows, 384)
121
 
122
  def load_example_data(self):
 
123
  example_data = [
124
  {
125
  "video_id": "cd21da96-fcca-4c94-a60f-0b1e4e1e29fc",
@@ -162,10 +155,8 @@ class VideoSearch:
162
 
163
  return results
164
 
165
- # Use edge_tts for TTS
166
  @st.cache_resource
167
  def get_speech_model():
168
- """Cache speech model initialization."""
169
  return edge_tts.Communicate
170
 
171
  async def generate_speech(text, voice=None):
@@ -183,14 +174,10 @@ async def generate_speech(text, voice=None):
183
  return None
184
 
185
  def transcribe_audio(audio_path):
186
- """Transcribe audio using Whisper."""
187
- try:
188
- with open(audio_path, "rb") as f:
189
- transcription = openai.Audio.transcribe("whisper-1", f)
190
- return transcription["text"].strip()
191
- except Exception as e:
192
- st.error(f"Error transcribing audio: {e}")
193
- return ""
194
 
195
  def show_file_manager():
196
  """Display file manager interface"""
@@ -225,34 +212,67 @@ def show_file_manager():
225
  os.remove(f)
226
  st.experimental_rerun()
227
 
228
- ##########################
229
- # Arxiv Integration #
230
- ##########################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
 
232
- # You need to implement or integrate perform_ai_lookup from your second app into this code.
233
- # This is a placeholder. Replace with your actual perform_ai_lookup function logic.
234
- # Ensure you have your Arxiv RAG model endpoint available.
 
 
 
 
 
 
 
 
235
 
236
- # Example placeholder implementation (replace with your actual second app code):
237
- def perform_ai_lookup(q, vocal_summary=True, extended_refs=False, titles_summary=True, full_audio=False):
238
- # Placeholder: In your real code, you'll call your Arxiv RAG endpoint and get results.
239
- # Here we just simulate a response.
240
- mock_answer = f"This is a mock Arxiv response for query: {q}.\nReferences:\n[Paper 1] Example Title"
241
- st.markdown(f"**Arxiv Search Results for '{q}':**\n\n{mock_answer}")
242
-
243
  if vocal_summary:
244
- audio_file = asyncio.run(generate_speech("This is a spoken summary of Arxiv results."))
 
 
 
 
 
 
 
245
  if audio_file:
246
  st.audio(audio_file)
247
 
248
- # Add any other logic: extended_refs, titles_summary, etc.
249
- return mock_answer
 
 
 
 
 
 
 
250
 
251
- ############################
252
- # Main App Layout & Logic #
253
- ############################
254
  def main():
255
- st.title("πŸŽ₯ Video & Arxiv Search with Voice")
256
 
257
  # Initialize search class
258
  search = VideoSearch()
@@ -308,39 +328,32 @@ def main():
308
  audio_file = asyncio.run(generate_speech(summary))
309
  if audio_file:
310
  st.audio(audio_file)
311
- # Optionally delete after playing:
312
- # if os.path.exists(audio_file):
313
- # os.remove(audio_file)
314
 
315
  # ---- Tab 2: Voice Input ----
316
  with tab2:
317
  st.subheader("Voice Input")
318
 
319
- st.write("πŸŽ™οΈ Record your voice and automatically transcribe to text:")
320
  audio_bytes = audio_recorder()
321
  if audio_bytes:
322
- # Save the recorded audio for transcription
323
  audio_path = f"temp_audio_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
324
  with open(audio_path, "wb") as f:
325
  f.write(audio_bytes)
326
  st.success("Audio recorded successfully!")
327
 
328
- # Transcribe using Whisper
329
  voice_query = transcribe_audio(audio_path)
330
- if voice_query:
331
- st.markdown("**Transcribed Text:**")
332
- st.write(voice_query)
333
- st.session_state['last_voice_input'] = voice_query
334
-
335
- if st.button("πŸ” Search from Voice"):
336
- results = search.search(voice_query, None, 20)
337
- for i, result in enumerate(results, 1):
338
- with st.expander(f"Result {i}", expanded=(i==1)):
339
- st.write(result['description'])
340
- if result.get('youtube_id'):
341
- st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result.get('start_time', 0)}")
342
 
343
- # Clean up
344
  if os.path.exists(audio_path):
345
  os.remove(audio_path)
346
 
@@ -349,14 +362,12 @@ def main():
349
  st.subheader("Arxiv Search")
350
  q = st.text_input("Enter your Arxiv search query:", value=st.session_state['arxiv_last_query'])
351
  vocal_summary = st.checkbox("πŸŽ™ Short Audio Summary", value=True)
352
- extended_refs = st.checkbox("πŸ“œ Extended References", value=False)
353
  titles_summary = st.checkbox("πŸ”– Titles Only", value=True)
354
  full_audio = st.checkbox("πŸ“š Full Audio Results", value=False)
355
 
356
  if st.button("πŸ” Arxiv Search"):
357
  st.session_state['arxiv_last_query'] = q
358
- perform_ai_lookup(q, vocal_summary=vocal_summary, extended_refs=extended_refs,
359
- titles_summary=titles_summary, full_audio=full_audio)
360
 
361
  # ---- Tab 4: File Manager ----
362
  with tab4:
@@ -374,7 +385,7 @@ def main():
374
  with st.expander(f"{entry['timestamp']}: {entry['query']}"):
375
  for i, result in enumerate(entry['results'], 1):
376
  st.write(f"{i}. {result['description'][:100]}...")
377
-
378
  st.markdown("### Voice Settings")
379
  st.selectbox("TTS Voice:",
380
  ["en-US-AriaNeural", "en-US-GuyNeural", "en-GB-SoniaNeural"],
 
16
  from collections import defaultdict
17
  from audio_recorder_streamlit import audio_recorder
18
  import streamlit.components.v1 as components
19
+ from urllib.parse import quote
20
+ from xml.etree import ElementTree as ET
 
 
 
 
 
 
 
21
 
22
  # Initialize session state
23
  if 'search_history' not in st.session_state:
 
43
  self.load_dataset()
44
 
45
  def fetch_dataset_rows(self):
46
+ """Fetch dataset from Hugging Face API"""
47
  try:
48
  url = "https://datasets-server.huggingface.co/first-rows?dataset=omegalabsinc%2Fomega-multimodal&config=default&split=train"
49
  response = requests.get(url, timeout=30)
 
63
  processed_rows.append(row)
64
 
65
  df = pd.DataFrame(processed_rows)
 
66
  st.session_state['search_columns'] = [col for col in df.columns
67
  if col not in ['video_embed', 'description_embed', 'audio_embed']]
68
  return df
69
  return self.load_example_data()
70
+ except:
71
  return self.load_example_data()
72
 
73
  def prepare_features(self):
 
105
  else:
106
  self.text_embeds = self.video_embeds
107
 
108
+ except:
109
  # Fallback to random embeddings
110
  num_rows = len(self.dataset)
111
  self.video_embeds = np.random.randn(num_rows, 384)
112
  self.text_embeds = np.random.randn(num_rows, 384)
113
 
114
  def load_example_data(self):
115
+ """Load example data as fallback"""
116
  example_data = [
117
  {
118
  "video_id": "cd21da96-fcca-4c94-a60f-0b1e4e1e29fc",
 
155
 
156
  return results
157
 
 
158
  @st.cache_resource
159
  def get_speech_model():
 
160
  return edge_tts.Communicate
161
 
162
  async def generate_speech(text, voice=None):
 
174
  return None
175
 
176
  def transcribe_audio(audio_path):
177
+ """Placeholder for ASR transcription (no OpenAI/Anthropic).
178
+ Integrate your own ASR model or API here."""
179
+ # For now, just return a message:
180
+ return "ASR not implemented. Integrate a local model or another service here."
 
 
 
 
181
 
182
  def show_file_manager():
183
  """Display file manager interface"""
 
212
  os.remove(f)
213
  st.experimental_rerun()
214
 
215
+ def arxiv_search(query, max_results=5):
216
+ """Perform a simple Arxiv search using their API and return top results."""
217
+ base_url = "http://export.arxiv.org/api/query?"
218
+ # Encode the query
219
+ search_url = base_url + f"search_query={quote(query)}&start=0&max_results={max_results}"
220
+ r = requests.get(search_url)
221
+ if r.status_code == 200:
222
+ root = ET.fromstring(r.text)
223
+ # Namespace handling
224
+ ns = {'atom': 'http://www.w3.org/2005/Atom'}
225
+ entries = root.findall('atom:entry', ns)
226
+ results = []
227
+ for entry in entries:
228
+ title = entry.find('atom:title', ns).text.strip()
229
+ summary = entry.find('atom:summary', ns).text.strip()
230
+ link = None
231
+ for l in entry.findall('atom:link', ns):
232
+ if l.get('type') == 'text/html':
233
+ link = l.get('href')
234
+ break
235
+ results.append((title, summary, link))
236
+ return results
237
+ return []
238
 
239
+ def perform_arxiv_lookup(q, vocal_summary=True, titles_summary=True, full_audio=False):
240
+ results = arxiv_search(q, max_results=5)
241
+ if not results:
242
+ st.write("No Arxiv results found.")
243
+ return
244
+ st.markdown(f"**Arxiv Search Results for '{q}':**")
245
+ for i, (title, summary, link) in enumerate(results, start=1):
246
+ st.markdown(f"**{i}. {title}**")
247
+ st.write(summary)
248
+ if link:
249
+ st.markdown(f"[View Paper]({link})")
250
 
251
+ # TTS Options
 
 
 
 
 
 
252
  if vocal_summary:
253
+ spoken_text = f"Here are some Arxiv results for {q}. "
254
+ if titles_summary:
255
+ spoken_text += " Titles: " + ", ".join([res[0] for res in results])
256
+ else:
257
+ # Just first summary if no titles_summary
258
+ spoken_text += " " + results[0][1][:200]
259
+
260
+ audio_file = asyncio.run(generate_speech(spoken_text))
261
  if audio_file:
262
  st.audio(audio_file)
263
 
264
+ if full_audio:
265
+ # Full audio of summaries
266
+ full_text = ""
267
+ for i,(title, summary, _) in enumerate(results, start=1):
268
+ full_text += f"Result {i}: {title}. {summary} "
269
+ audio_file_full = asyncio.run(generate_speech(full_text))
270
+ if audio_file_full:
271
+ st.write("### Full Audio")
272
+ st.audio(audio_file_full)
273
 
 
 
 
274
  def main():
275
+ st.title("πŸŽ₯ Video & Arxiv Search with Voice (No OpenAI/Anthropic)")
276
 
277
  # Initialize search class
278
  search = VideoSearch()
 
328
  audio_file = asyncio.run(generate_speech(summary))
329
  if audio_file:
330
  st.audio(audio_file)
 
 
 
331
 
332
  # ---- Tab 2: Voice Input ----
333
  with tab2:
334
  st.subheader("Voice Input")
335
 
336
+ st.write("πŸŽ™οΈ Record your voice:")
337
  audio_bytes = audio_recorder()
338
  if audio_bytes:
 
339
  audio_path = f"temp_audio_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
340
  with open(audio_path, "wb") as f:
341
  f.write(audio_bytes)
342
  st.success("Audio recorded successfully!")
343
 
 
344
  voice_query = transcribe_audio(audio_path)
345
+ st.markdown("**Transcribed Text:**")
346
+ st.write(voice_query)
347
+ st.session_state['last_voice_input'] = voice_query
348
+
349
+ if st.button("πŸ” Search from Voice"):
350
+ results = search.search(voice_query, None, 20)
351
+ for i, result in enumerate(results, 1):
352
+ with st.expander(f"Result {i}", expanded=(i==1)):
353
+ st.write(result['description'])
354
+ if result.get('youtube_id'):
355
+ st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result.get('start_time', 0)}")
 
356
 
 
357
  if os.path.exists(audio_path):
358
  os.remove(audio_path)
359
 
 
362
  st.subheader("Arxiv Search")
363
  q = st.text_input("Enter your Arxiv search query:", value=st.session_state['arxiv_last_query'])
364
  vocal_summary = st.checkbox("πŸŽ™ Short Audio Summary", value=True)
 
365
  titles_summary = st.checkbox("πŸ”– Titles Only", value=True)
366
  full_audio = st.checkbox("πŸ“š Full Audio Results", value=False)
367
 
368
  if st.button("πŸ” Arxiv Search"):
369
  st.session_state['arxiv_last_query'] = q
370
+ perform_arxiv_lookup(q, vocal_summary=vocal_summary, titles_summary=titles_summary, full_audio=full_audio)
 
371
 
372
  # ---- Tab 4: File Manager ----
373
  with tab4:
 
385
  with st.expander(f"{entry['timestamp']}: {entry['query']}"):
386
  for i, result in enumerate(entry['results'], 1):
387
  st.write(f"{i}. {result['description'][:100]}...")
388
+
389
  st.markdown("### Voice Settings")
390
  st.selectbox("TTS Voice:",
391
  ["en-US-AriaNeural", "en-US-GuyNeural", "en-GB-SoniaNeural"],