awacke1 commited on
Commit
d15ace4
Β·
verified Β·
1 Parent(s): ee456d4

Create backup15.voiceinoutworks.app.py

Browse files
Files changed (1) hide show
  1. backup15.voiceinoutworks.app.py +434 -0
backup15.voiceinoutworks.app.py ADDED
@@ -0,0 +1,434 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, textract, time, zipfile
3
+ import plotly.graph_objects as go
4
+ import streamlit.components.v1 as components
5
+ from datetime import datetime
6
+ from audio_recorder_streamlit import audio_recorder
7
+ from bs4 import BeautifulSoup
8
+ from collections import deque
9
+ from dotenv import load_dotenv
10
+ from gradio_client import Client
11
+ from huggingface_hub import InferenceClient
12
+ from io import BytesIO
13
+ from PIL import Image
14
+ from PyPDF2 import PdfReader
15
+ from urllib.parse import quote
16
+ from xml.etree import ElementTree as ET
17
+ from openai import OpenAI
18
+ import extra_streamlit_components as stx
19
+ from streamlit.runtime.scriptrunner import get_script_run_ctx
20
+ import asyncio
21
+ import edge_tts # ensure this is installed (pip install edge-tts)
22
+
23
+ # πŸ”§ Config & Setup
24
+ st.set_page_config(
25
+ page_title="🚲BikeAIπŸ† Claude/GPT Research",
26
+ page_icon="πŸš²πŸ†",
27
+ layout="wide",
28
+ initial_sidebar_state="auto",
29
+ menu_items={
30
+ 'Get Help': 'https://huggingface.co/awacke1',
31
+ 'Report a bug': 'https://huggingface.co/spaces/awacke1',
32
+ 'About': "🚲BikeAIπŸ† Claude/GPT Research AI"
33
+ }
34
+ )
35
+ load_dotenv()
36
+ openai.api_key = os.getenv('OPENAI_API_KEY') or st.secrets['OPENAI_API_KEY']
37
+ anthropic_key = os.getenv("ANTHROPIC_API_KEY_3") or st.secrets["ANTHROPIC_API_KEY"]
38
+ claude_client = anthropic.Anthropic(api_key=anthropic_key)
39
+ openai_client = OpenAI(api_key=openai.api_key, organization=os.getenv('OPENAI_ORG_ID'))
40
+ HF_KEY = os.getenv('HF_KEY')
41
+ API_URL = os.getenv('API_URL')
42
+
43
+ st.session_state.setdefault('transcript_history', [])
44
+ st.session_state.setdefault('chat_history', [])
45
+ st.session_state.setdefault('openai_model', "gpt-4o-2024-05-13")
46
+ st.session_state.setdefault('messages', [])
47
+ st.session_state.setdefault('last_voice_input', "")
48
+
49
+ # 🎨 Minimal Custom CSS
50
+ st.markdown("""
51
+ <style>
52
+ .main { background: linear-gradient(to right, #1a1a1a, #2d2d2d); color: #fff; }
53
+ .stMarkdown { font-family: 'Helvetica Neue', sans-serif; }
54
+ </style>
55
+ """, unsafe_allow_html=True)
56
+
57
+ # πŸ”‘ Common Utilities
58
+ def generate_filename(prompt, file_type="md"):
59
+ ctz = pytz.timezone('US/Central')
60
+ date_str = datetime.now(ctz).strftime("%m%d_%H%M")
61
+ safe = re.sub(r'[<>:"/\\\\|?*\n]', ' ', prompt)
62
+ safe = re.sub(r'\s+', ' ', safe).strip()[:90]
63
+ return f"{date_str}_{safe}.{file_type}"
64
+
65
+ def create_file(filename, prompt, response):
66
+ with open(filename, 'w', encoding='utf-8') as f:
67
+ f.write(prompt + "\n\n" + response)
68
+
69
+ def get_download_link(file):
70
+ with open(file, "rb") as f:
71
+ b64 = base64.b64encode(f.read()).decode()
72
+ return f'<a href="data:file/txt;base64,{b64}" download="{os.path.basename(file)}">πŸ“‚ Download {os.path.basename(file)}</a>'
73
+
74
+ @st.cache_resource
75
+ def speech_synthesis_html(result):
76
+ # This old function can remain as a fallback, but we won't use it after integrating EdgeTTS.
77
+ html_code = f"""
78
+ <html><body>
79
+ <script>
80
+ var msg = new SpeechSynthesisUtterance("{result.replace('"', '')}");
81
+ window.speechSynthesis.speak(msg);
82
+ </script>
83
+ </body></html>
84
+ """
85
+ components.html(html_code, height=0)
86
+
87
+ #------------add EdgeTTS
88
+ # --- NEW FUNCTIONS FOR EDGE TTS ---
89
+ async def edge_tts_generate_audio(text, voice="en-US-AriaNeural", rate=0, pitch=0):
90
+ """
91
+ Generate audio from text using Edge TTS and return the path to the MP3 file.
92
+ """
93
+ if not text.strip():
94
+ return None
95
+ rate_str = f"{rate:+d}%"
96
+ pitch_str = f"{pitch:+d}Hz"
97
+ communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
98
+ out_fn = generate_filename(text,"mp3")
99
+ await communicate.save(out_fn)
100
+ return out_fn
101
+
102
+ def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0):
103
+ """
104
+ Synchronous wrapper to call the async TTS generation and return the file path.
105
+ """
106
+ return asyncio.run(edge_tts_generate_audio(text, voice, rate, pitch))
107
+
108
+ def play_and_download_audio(file_path):
109
+ """
110
+ Display an audio player and a download link for the generated MP3 file.
111
+ """
112
+ if file_path and os.path.exists(file_path):
113
+ st.audio(file_path)
114
+ st.markdown(get_download_link(file_path), unsafe_allow_html=True)
115
+ #---------------------------
116
+
117
+
118
+ def process_image(image_path, user_prompt):
119
+ with open(image_path, "rb") as imgf:
120
+ image_data = imgf.read()
121
+ b64img = base64.b64encode(image_data).decode("utf-8")
122
+ resp = openai_client.chat.completions.create(
123
+ model=st.session_state["openai_model"],
124
+ messages=[
125
+ {"role": "system", "content": "You are a helpful assistant."},
126
+ {"role": "user", "content": [
127
+ {"type": "text", "text": user_prompt},
128
+ {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64img}"}}
129
+ ]}
130
+ ],
131
+ temperature=0.0,
132
+ )
133
+ return resp.choices[0].message.content
134
+
135
+ def process_audio(audio_path):
136
+ with open(audio_path, "rb") as f:
137
+ transcription = openai_client.audio.transcriptions.create(model="whisper-1", file=f)
138
+ st.session_state.messages.append({"role": "user", "content": transcription.text})
139
+ return transcription.text
140
+
141
+ def process_video(video_path, seconds_per_frame=1):
142
+ vid = cv2.VideoCapture(video_path)
143
+ total = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
144
+ fps = vid.get(cv2.CAP_PROP_FPS)
145
+ skip = int(fps*seconds_per_frame)
146
+ frames_b64 = []
147
+ for i in range(0, total, skip):
148
+ vid.set(cv2.CAP_PROP_POS_FRAMES, i)
149
+ ret, frame = vid.read()
150
+ if not ret: break
151
+ _, buf = cv2.imencode(".jpg", frame)
152
+ frames_b64.append(base64.b64encode(buf).decode("utf-8"))
153
+ vid.release()
154
+ return frames_b64
155
+
156
+ def process_video_with_gpt(video_path, prompt):
157
+ frames = process_video(video_path)
158
+ resp = openai_client.chat.completions.create(
159
+ model=st.session_state["openai_model"],
160
+ messages=[
161
+ {"role":"system","content":"Analyze video frames."},
162
+ {"role":"user","content":[
163
+ {"type":"text","text":prompt},
164
+ *[{"type":"image_url","image_url":{"url":f"data:image/jpeg;base64,{fr}"}} for fr in frames]
165
+ ]}
166
+ ]
167
+ )
168
+ return resp.choices[0].message.content
169
+
170
+ def search_arxiv(query):
171
+ st.write("πŸ” Searching ArXiv...")
172
+ client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
173
+ r1 = client.predict(prompt=query, llm_model_picked="mistralai/Mixtral-8x7B-Instruct-v0.1", stream_outputs=True, api_name="/ask_llm")
174
+ st.markdown("### Mistral-8x7B-Instruct-v0.1 Result")
175
+ st.markdown(r1)
176
+ r2 = client.predict(prompt=query, llm_model_picked="mistralai/Mistral-7B-Instruct-v0.2", stream_outputs=True, api_name="/ask_llm")
177
+ st.markdown("### Mistral-7B-Instruct-v0.2 Result")
178
+ st.markdown(r2)
179
+ return f"{r1}\n\n{r2}"
180
+
181
+ def perform_ai_lookup(q):
182
+ start = time.time()
183
+ client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
184
+ # Perform a RAG-based search
185
+ r = client.predict(q,20,"Semantic Search","mistralai/Mixtral-8x7B-Instruct-v0.1",api_name="/update_with_rag_md")
186
+ refs = r[0]
187
+ # Ask model for answer
188
+ r2 = client.predict(q,"mistralai/Mixtral-8x7B-Instruct-v0.1",True,api_name="/ask_llm")
189
+ result = f"### πŸ”Ž {q}\n\n{r2}\n\n{refs}"
190
+
191
+ #---------------------------------------------------------------
192
+ # Speak results
193
+ #speech_synthesis_html(r2)
194
+
195
+ # Instead of speech_synthesis_html, use EdgeTTS now:
196
+ st.markdown(result)
197
+
198
+ # Speak main result
199
+ audio_file_main = speak_with_edge_tts(r2, voice="en-US-AriaNeural", rate=0, pitch=0)
200
+ st.write("### Audio Output for Main Result")
201
+ play_and_download_audio(audio_file_main)
202
+
203
+
204
+ # Speak references summaries
205
+ summaries_text = "Here are the summaries from the references: " + refs.replace('"','')
206
+ audio_file_refs = speak_with_edge_tts(summaries_text, voice="en-US-AriaNeural", rate=0, pitch=0)
207
+ st.write("### Audio Output for References Summaries")
208
+ play_and_download_audio(audio_file_refs)
209
+
210
+ # Extract titles from refs and speak them
211
+ titles = []
212
+ for line in refs.split('\n'):
213
+ m = re.search(r"\[([^\]]+)\]", line)
214
+ if m:
215
+ titles.append(m.group(1))
216
+ if titles:
217
+ titles_text = "Here are the titles of the papers: " + ", ".join(titles)
218
+ audio_file_titles = speak_with_edge_tts(titles_text, voice="en-US-AriaNeural", rate=0, pitch=0)
219
+ st.write("### Audio Output for Paper Titles")
220
+ play_and_download_audio(audio_file_titles)
221
+ # --------------------------------------------
222
+
223
+
224
+ st.markdown(result)
225
+ elapsed = time.time()-start
226
+ st.write(f"Elapsed: {elapsed:.2f} s")
227
+ fn = generate_filename(q,"md")
228
+ create_file(fn,q,result)
229
+ return result
230
+
231
+ def process_with_gpt(text):
232
+ if not text: return
233
+ st.session_state.messages.append({"role":"user","content":text})
234
+ with st.chat_message("user"):
235
+ st.markdown(text)
236
+ with st.chat_message("assistant"):
237
+ c = openai_client.chat.completions.create(
238
+ model=st.session_state["openai_model"],
239
+ messages=st.session_state.messages,
240
+ stream=False
241
+ )
242
+ ans = c.choices[0].message.content
243
+ st.write("GPT-4o: " + ans)
244
+ create_file(generate_filename(text,"md"),text,ans)
245
+ st.session_state.messages.append({"role":"assistant","content":ans})
246
+ return ans
247
+
248
+ def process_with_claude(text):
249
+ if not text: return
250
+ with st.chat_message("user"):
251
+ st.markdown(text)
252
+ with st.chat_message("assistant"):
253
+ r = claude_client.messages.create(
254
+ model="claude-3-sonnet-20240229",
255
+ max_tokens=1000,
256
+ messages=[{"role":"user","content":text}]
257
+ )
258
+ ans = r.content[0].text
259
+ st.write("Claude: " + ans)
260
+ create_file(generate_filename(text,"md"),text,ans)
261
+ st.session_state.chat_history.append({"user":text,"claude":ans})
262
+ return ans
263
+
264
+ def create_zip_of_files(files):
265
+ zip_name = "all_files.zip"
266
+ with zipfile.ZipFile(zip_name,'w') as z:
267
+ for f in files: z.write(f)
268
+ return zip_name
269
+
270
+ def get_media_html(p,typ="video",w="100%"):
271
+ d = base64.b64encode(open(p,'rb').read()).decode()
272
+ if typ=="video":
273
+ return f'<video width="{w}" controls autoplay muted loop><source src="data:video/mp4;base64,{d}" type="video/mp4"></video>'
274
+ else:
275
+ return f'<audio controls style="width:{w};"><source src="data:audio/mpeg;base64,{d}" type="audio/mpeg"></audio>'
276
+
277
+ def create_media_gallery():
278
+ st.header("🎬 Media Gallery")
279
+ tabs = st.tabs(["πŸ–ΌοΈ Images", "🎡 Audio", "πŸŽ₯ Video"])
280
+ with tabs[0]:
281
+ imgs = glob.glob("*.png")+glob.glob("*.jpg")
282
+ if imgs:
283
+ c = st.slider("Cols",1,5,3)
284
+ cols = st.columns(c)
285
+ for i,f in enumerate(imgs):
286
+ with cols[i%c]:
287
+ st.image(Image.open(f),use_container_width=True)
288
+ if st.button(f"πŸ‘€ Analyze {os.path.basename(f)}"):
289
+ a = process_image(f,"Describe this image.")
290
+ st.markdown(a)
291
+ with tabs[1]:
292
+ auds = glob.glob("*.mp3")+glob.glob("*.wav")
293
+ for a in auds:
294
+ with st.expander(f"🎡 {os.path.basename(a)}"):
295
+ st.markdown(get_media_html(a,"audio"),unsafe_allow_html=True)
296
+ if st.button(f"Transcribe {os.path.basename(a)}"):
297
+ t = process_audio(a)
298
+ st.write(t)
299
+ with tabs[2]:
300
+ vids = glob.glob("*.mp4")
301
+ for v in vids:
302
+ with st.expander(f"πŸŽ₯ {os.path.basename(v)}"):
303
+ st.markdown(get_media_html(v,"video"),unsafe_allow_html=True)
304
+ if st.button(f"Analyze {os.path.basename(v)}"):
305
+ a = process_video_with_gpt(v,"Describe video.")
306
+ st.markdown(a)
307
+
308
+ def display_file_manager():
309
+ st.sidebar.title("πŸ“ File Management")
310
+ files = sorted(glob.glob("*.md"),reverse=True)
311
+ if st.sidebar.button("πŸ—‘ Delete All"):
312
+ for f in files: os.remove(f)
313
+ st.experimental_rerun()
314
+ if st.sidebar.button("⬇️ Download All"):
315
+ z= create_zip_of_files(files)
316
+ st.sidebar.markdown(get_download_link(z),unsafe_allow_html=True)
317
+ for f in files:
318
+ col1,col2,col3,col4 = st.sidebar.columns([1,3,1,1])
319
+ with col1:
320
+ if st.button("🌐",key="v"+f):
321
+ st.session_state.current_file=f
322
+ c=open(f,'r',encoding='utf-8').read()
323
+ st.write(c)
324
+ with col2:
325
+ st.markdown(get_download_link(f),unsafe_allow_html=True)
326
+ with col3:
327
+ if st.button("πŸ“‚",key="e"+f):
328
+ st.session_state.current_file=f
329
+ st.session_state.file_content=open(f,'r',encoding='utf-8').read()
330
+ with col4:
331
+ if st.button("πŸ—‘",key="d"+f):
332
+ os.remove(f)
333
+ st.experimental_rerun()
334
+ def main():
335
+ st.sidebar.markdown("### 🚲BikeAIπŸ† Multi-Agent Research AI")
336
+ tab_main = st.radio("Action:",["🎀 Voice Input","πŸ“Έ Media Gallery","πŸ” Search ArXiv","πŸ“ File Editor"],horizontal=True)
337
+
338
+ model_choice = st.sidebar.radio("AI Model:", ["Arxiv","GPT-4o","Claude-3","GPT+Claude+Arxiv"], index=0)
339
+
340
+ # Declare the component
341
+ mycomponent = components.declare_component("mycomponent", path="mycomponent")
342
+ val = mycomponent(my_input_value="Hello")
343
+ if val:
344
+ # Strip whitespace and newlines from the end of the user input
345
+ user_input = val.strip()
346
+ if user_input:
347
+ if model_choice == "GPT-4o":
348
+ process_with_gpt(user_input)
349
+ elif model_choice == "Claude-3":
350
+ process_with_claude(user_input)
351
+ elif model_choice == "Arxiv":
352
+ st.subheader("Arxiv Only Results:")
353
+ perform_ai_lookup(user_input)
354
+ else:
355
+ col1,col2,col3=st.columns(3)
356
+ with col1:
357
+ st.subheader("GPT-4o Omni:")
358
+ try: process_with_gpt(user_input)
359
+ except: st.write('GPT 4o error')
360
+ with col2:
361
+ st.subheader("Claude-3 Sonnet:")
362
+ try: process_with_claude(user_input)
363
+ except: st.write('Claude error')
364
+ with col3:
365
+ st.subheader("Arxiv + Mistral:")
366
+ try:
367
+ r = perform_ai_lookup(user_input)
368
+ st.markdown(r)
369
+ except:
370
+ st.write("Arxiv error")
371
+
372
+ if tab_main == "🎀 Voice Input":
373
+ st.subheader("🎀 Voice Recognition")
374
+ user_text = st.text_area("Message:", height=100)
375
+ # Strip whitespace and newlines
376
+ user_text = user_text.strip()
377
+ if st.button("Send πŸ“¨"):
378
+ if user_text:
379
+ if model_choice == "GPT-4o":
380
+ process_with_gpt(user_text)
381
+ elif model_choice == "Claude-3":
382
+ process_with_claude(user_text)
383
+ elif model_choice == "Arxiv":
384
+ st.subheader("Arxiv Only Results:")
385
+ perform_ai_lookup(user_text)
386
+ else:
387
+ col1,col2,col3=st.columns(3)
388
+ with col1:
389
+ st.subheader("GPT-4o Omni:")
390
+ process_with_gpt(user_text)
391
+ with col2:
392
+ st.subheader("Claude-3 Sonnet:")
393
+ process_with_claude(user_text)
394
+ with col3:
395
+ st.subheader("Arxiv & Mistral:")
396
+ res = perform_ai_lookup(user_text)
397
+ st.markdown(res)
398
+ st.subheader("πŸ“œ Chat History")
399
+ t1,t2=st.tabs(["Claude History","GPT-4o History"])
400
+ with t1:
401
+ for c in st.session_state.chat_history:
402
+ st.write("**You:**", c["user"])
403
+ st.write("**Claude:**", c["claude"])
404
+ with t2:
405
+ for m in st.session_state.messages:
406
+ with st.chat_message(m["role"]):
407
+ st.markdown(m["content"])
408
+
409
+ elif tab_main == "πŸ“Έ Media Gallery":
410
+ create_media_gallery()
411
+
412
+ elif tab_main == "πŸ” Search ArXiv":
413
+ q=st.text_input("Research query:")
414
+ if q:
415
+ q = q.strip() # Strip whitespace and newlines
416
+ if q:
417
+ r=search_arxiv(q)
418
+ st.markdown(r)
419
+
420
+ elif tab_main == "πŸ“ File Editor":
421
+ if getattr(st.session_state,'current_file',None):
422
+ st.subheader(f"Editing: {st.session_state.current_file}")
423
+ new_text = st.text_area("Content:", st.session_state.file_content, height=300)
424
+ # Here also you can strip if needed, but usually for file editing you might not want to.
425
+ if st.button("Save"):
426
+ with open(st.session_state.current_file,'w',encoding='utf-8') as f:
427
+ f.write(new_text)
428
+ st.success("Updated!")
429
+
430
+ display_file_manager()
431
+
432
+
433
+ if __name__=="__main__":
434
+ main()