Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
-
# ๐ Main App - TalkingAIResearcher with Chat, Voice, Media, ArXiv, and More
|
| 2 |
import streamlit as st
|
| 3 |
import asyncio
|
| 4 |
import websockets
|
| 5 |
import uuid
|
| 6 |
import argparse
|
|
|
|
| 7 |
import os
|
| 8 |
import random
|
| 9 |
import time
|
|
@@ -17,6 +17,7 @@ import edge_tts
|
|
| 17 |
from audio_recorder_streamlit import audio_recorder
|
| 18 |
import nest_asyncio
|
| 19 |
import re
|
|
|
|
| 20 |
import pytz
|
| 21 |
import shutil
|
| 22 |
import anthropic
|
|
@@ -28,11 +29,10 @@ import zipfile
|
|
| 28 |
from gradio_client import Client
|
| 29 |
from dotenv import load_dotenv
|
| 30 |
from streamlit_marquee import streamlit_marquee
|
| 31 |
-
from datetime import datetime
|
| 32 |
from collections import defaultdict, Counter
|
| 33 |
import pandas as pd
|
| 34 |
|
| 35 |
-
# ๐ ๏ธ Patch asyncio for nesting
|
| 36 |
nest_asyncio.apply()
|
| 37 |
|
| 38 |
# ๐จ Page Config
|
|
@@ -57,8 +57,18 @@ FUN_USERNAMES = {
|
|
| 57 |
"GalacticGopher ๐": "en-AU-WilliamNeural",
|
| 58 |
"RocketRaccoon ๐": "en-CA-LiamNeural",
|
| 59 |
"EchoElf ๐ง": "en-US-AnaNeural",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
}
|
| 61 |
-
EDGE_TTS_VOICES = list(set(FUN_USERNAMES.values()))
|
| 62 |
FILE_EMOJIS = {"md": "๐", "mp3": "๐ต", "wav": "๐"}
|
| 63 |
|
| 64 |
# ๐ Directories
|
|
@@ -69,7 +79,7 @@ CHAT_FILE = "chat_logs/global_chat.md"
|
|
| 69 |
HISTORY_FILE = "history_logs/chat_history.md"
|
| 70 |
MEDIA_DIR = "media_files"
|
| 71 |
AUDIO_CACHE_DIR = "audio_cache"
|
| 72 |
-
AUDIO_DIR = "audio_logs"
|
| 73 |
|
| 74 |
# ๐ API Keys
|
| 75 |
load_dotenv()
|
|
@@ -115,7 +125,6 @@ def init_session_state():
|
|
| 115 |
|
| 116 |
# ๐๏ธ Marquee Helpers
|
| 117 |
def update_marquee_settings_ui():
|
| 118 |
-
# ๐จ Sidebar marquee controls
|
| 119 |
st.sidebar.markdown("### ๐ฏ Marquee Settings")
|
| 120 |
cols = st.sidebar.columns(2)
|
| 121 |
with cols[0]:
|
|
@@ -126,7 +135,6 @@ def update_marquee_settings_ui():
|
|
| 126 |
st.session_state['marquee_settings']['animationDuration'] = f"{st.slider('โฑ๏ธ Speed', 1, 20, 20)}s"
|
| 127 |
|
| 128 |
def display_marquee(text, settings, key_suffix=""):
|
| 129 |
-
# ๐ Show marquee with truncation
|
| 130 |
truncated = text[:280] + "..." if len(text) > 280 else text
|
| 131 |
streamlit_marquee(content=truncated, **settings, key=f"marquee_{key_suffix}")
|
| 132 |
st.write("")
|
|
@@ -142,7 +150,6 @@ def get_high_info_terms(text, top_n=10):
|
|
| 142 |
return [t for t, _ in Counter(filtered).most_common(top_n)]
|
| 143 |
|
| 144 |
def generate_filename(prompt, response, file_type="md"):
|
| 145 |
-
# ๐ Smart filename with info terms
|
| 146 |
prefix = format_timestamp_prefix()
|
| 147 |
terms = get_high_info_terms(prompt + " " + response, 5)
|
| 148 |
snippet = clean_text_for_filename(prompt[:40] + " " + response[:40])
|
|
@@ -152,13 +159,11 @@ def generate_filename(prompt, response, file_type="md"):
|
|
| 152 |
return f"{prefix}{base}_wct{wct}_sw{sw}_dur{dur}.{file_type}"
|
| 153 |
|
| 154 |
def create_file(prompt, response, file_type="md"):
|
| 155 |
-
# ๐ Save file with Q&A
|
| 156 |
filename = generate_filename(prompt, response, file_type)
|
| 157 |
with open(filename, 'w', encoding='utf-8') as f: f.write(prompt + "\n\n" + response)
|
| 158 |
return filename
|
| 159 |
|
| 160 |
def get_download_link(file, file_type="mp3"):
|
| 161 |
-
# โฌ๏ธ Cached download link
|
| 162 |
cache_key = f"dl_{file}"
|
| 163 |
if cache_key not in st.session_state['download_link_cache']:
|
| 164 |
with open(file, "rb") as f:
|
|
@@ -168,7 +173,6 @@ def get_download_link(file, file_type="mp3"):
|
|
| 168 |
|
| 169 |
# ๐ถ Audio Processing
|
| 170 |
async def async_edge_tts_generate(text, voice, username, rate=0, pitch=0, file_format="mp3"):
|
| 171 |
-
# ๐ต Async TTS with caching and .md generation
|
| 172 |
cache_key = f"{text[:100]}_{voice}_{rate}_{pitch}_{file_format}"
|
| 173 |
if cache_key in st.session_state['audio_cache']: return st.session_state['audio_cache'][cache_key], 0
|
| 174 |
start_time = time.time()
|
|
@@ -179,7 +183,6 @@ async def async_edge_tts_generate(text, voice, username, rate=0, pitch=0, file_f
|
|
| 179 |
await communicate.save(filename)
|
| 180 |
st.session_state['audio_cache'][cache_key] = filename
|
| 181 |
|
| 182 |
-
# Generate .md file
|
| 183 |
md_filename = filename.replace(".mp3", ".md")
|
| 184 |
md_content = f"# Chat Audio Log\n\n**Player:** {username}\n**Voice:** {voice}\n**Text:**\n```markdown\n{text}\n```"
|
| 185 |
with open(md_filename, 'w', encoding='utf-8') as f: f.write(md_content)
|
|
@@ -187,13 +190,11 @@ async def async_edge_tts_generate(text, voice, username, rate=0, pitch=0, file_f
|
|
| 187 |
return filename, time.time() - start_time
|
| 188 |
|
| 189 |
def play_and_download_audio(file_path):
|
| 190 |
-
# ๐ Play + download
|
| 191 |
if file_path and os.path.exists(file_path):
|
| 192 |
st.audio(file_path)
|
| 193 |
st.markdown(get_download_link(file_path), unsafe_allow_html=True)
|
| 194 |
|
| 195 |
def load_mp3_viewer():
|
| 196 |
-
# ๐ต Load all MP3s at startup
|
| 197 |
mp3_files = glob.glob(f"{AUDIO_DIR}/*.mp3")
|
| 198 |
for mp3 in mp3_files:
|
| 199 |
filename = os.path.basename(mp3)
|
|
@@ -201,7 +202,6 @@ def load_mp3_viewer():
|
|
| 201 |
st.session_state['mp3_files'][filename] = mp3
|
| 202 |
|
| 203 |
async def save_chat_entry(username, message, is_markdown=False):
|
| 204 |
-
# ๐ฌ Save chat with multicast broadcast and audio
|
| 205 |
central = pytz.timezone('US/Central')
|
| 206 |
timestamp = datetime.now(central).strftime("%Y-%m-%d %H:%M:%S")
|
| 207 |
entry = f"[{timestamp}] {username}: {message}" if not is_markdown else f"[{timestamp}] {username}:\n```markdown\n{message}\n```"
|
|
@@ -217,7 +217,6 @@ async def save_chat_entry(username, message, is_markdown=False):
|
|
| 217 |
return audio_file
|
| 218 |
|
| 219 |
async def load_chat():
|
| 220 |
-
# ๐ Load chat history - Numbered
|
| 221 |
if not os.path.exists(CHAT_FILE):
|
| 222 |
with open(CHAT_FILE, 'a') as f: f.write(f"# {START_ROOM} Chat\n\nWelcome to the cosmic hub! ๐ค\n")
|
| 223 |
with open(CHAT_FILE, 'r') as f:
|
|
@@ -228,7 +227,6 @@ async def load_chat():
|
|
| 228 |
|
| 229 |
# ๐ WebSocket Handling
|
| 230 |
async def websocket_handler(websocket, path):
|
| 231 |
-
# ๐ค Handle WebSocket clients
|
| 232 |
client_id = str(uuid.uuid4())
|
| 233 |
room_id = "chat"
|
| 234 |
if room_id not in st.session_state.active_connections:
|
|
@@ -252,7 +250,6 @@ async def websocket_handler(websocket, path):
|
|
| 252 |
del st.session_state.active_connections[room_id][client_id]
|
| 253 |
|
| 254 |
async def broadcast_message(message, room_id):
|
| 255 |
-
# ๐ข Broadcast to all clients
|
| 256 |
if room_id in st.session_state.active_connections:
|
| 257 |
disconnected = []
|
| 258 |
for client_id, ws in st.session_state.active_connections[room_id].items():
|
|
@@ -265,7 +262,6 @@ async def broadcast_message(message, room_id):
|
|
| 265 |
del st.session_state.active_connections[room_id][client_id]
|
| 266 |
|
| 267 |
async def run_websocket_server():
|
| 268 |
-
# ๐ฅ๏ธ Start WebSocket server
|
| 269 |
if not st.session_state.server_running:
|
| 270 |
server = await websockets.serve(websocket_handler, '0.0.0.0', 8765)
|
| 271 |
st.session_state.server_running = True
|
|
@@ -282,7 +278,6 @@ class AudioProcessor:
|
|
| 282 |
with open(f"{self.cache_dir}/metadata.json", 'w') as f: json.dump(self.metadata, f)
|
| 283 |
|
| 284 |
async def create_audio(self, text, voice='en-US-AriaNeural'):
|
| 285 |
-
# ๐ถ Generate cached audio
|
| 286 |
cache_key = hashlib.md5(f"{text}:{voice}".encode()).hexdigest()
|
| 287 |
cache_path = f"{self.cache_dir}/{cache_key}.mp3"
|
| 288 |
if cache_key in self.metadata and os.path.exists(cache_path):
|
|
@@ -296,7 +291,6 @@ class AudioProcessor:
|
|
| 296 |
return open(cache_path, 'rb').read()
|
| 297 |
|
| 298 |
def process_pdf(pdf_file, max_pages, voice, audio_processor):
|
| 299 |
-
# ๐ Convert PDF to audio
|
| 300 |
reader = PdfReader(pdf_file)
|
| 301 |
total_pages = min(len(reader.pages), max_pages)
|
| 302 |
texts, audios = [], {}
|
|
@@ -309,7 +303,6 @@ def process_pdf(pdf_file, max_pages, voice, audio_processor):
|
|
| 309 |
|
| 310 |
# ๐ ArXiv & AI Lookup
|
| 311 |
def parse_arxiv_refs(ref_text):
|
| 312 |
-
# ๐ Parse ArXiv refs into dicts
|
| 313 |
if not ref_text: return []
|
| 314 |
papers = []
|
| 315 |
current = {}
|
|
@@ -326,7 +319,6 @@ def parse_arxiv_refs(ref_text):
|
|
| 326 |
return papers[:20]
|
| 327 |
|
| 328 |
def generate_5min_feature_markdown(paper):
|
| 329 |
-
# โจ 5-min research paper feature
|
| 330 |
title, summary, authors, date, url = paper['title'], paper['summary'], paper['authors'], paper['date'], paper['url']
|
| 331 |
pdf_url = url.replace("abs", "pdf") + (".pdf" if not url.endswith(".pdf") else "")
|
| 332 |
wct, sw = len(title.split()), len(summary.split())
|
|
@@ -348,14 +340,12 @@ def generate_5min_feature_markdown(paper):
|
|
| 348 |
def create_detailed_paper_md(papers): return "# Detailed Summary\n" + "\n".join(generate_5min_feature_markdown(p) for p in papers)
|
| 349 |
|
| 350 |
async def create_paper_audio_files(papers, query):
|
| 351 |
-
# ๐ง Generate paper audio
|
| 352 |
for p in papers:
|
| 353 |
audio_text = clean_text_for_tts(f"{p['title']} by {p['authors']}. {p['summary']}")
|
| 354 |
p['full_audio'], _ = await async_edge_tts_generate(audio_text, st.session_state['tts_voice'], p['authors'])
|
| 355 |
if p['full_audio']: p['download_base64'] = get_download_link(p['full_audio'])
|
| 356 |
|
| 357 |
async def perform_ai_lookup(q, useArxiv=True, useArxivAudio=False):
|
| 358 |
-
# ๐ฎ AI-powered research
|
| 359 |
client = anthropic.Anthropic(api_key=anthropic_key)
|
| 360 |
response = client.messages.create(model="claude-3-sonnet-20240229", max_tokens=1000, messages=[{"role": "user", "content": q}])
|
| 361 |
result = response.content[0].text
|
|
@@ -378,7 +368,6 @@ async def perform_ai_lookup(q, useArxiv=True, useArxivAudio=False):
|
|
| 378 |
|
| 379 |
# ๐ฆ Zip Files
|
| 380 |
def create_zip_of_files(md_files, mp3_files, query):
|
| 381 |
-
# ๐ฆ Zip it up
|
| 382 |
all_files = md_files + mp3_files
|
| 383 |
if not all_files: return None
|
| 384 |
terms = get_high_info_terms(" ".join([open(f, 'r', encoding='utf-8').read() if f.endswith('.md') else os.path.splitext(os.path.basename(f))[0].replace('_', ' ') for f in all_files] + [query]), 5)
|
|
@@ -389,7 +378,7 @@ def create_zip_of_files(md_files, mp3_files, query):
|
|
| 389 |
# ๐ฎ Main Interface
|
| 390 |
async def async_interface():
|
| 391 |
init_session_state()
|
| 392 |
-
load_mp3_viewer()
|
| 393 |
if not st.session_state.username:
|
| 394 |
available = [n for n in FUN_USERNAMES if not any(f"{n} has joined" in l for l in (await load_chat()).split('\n'))]
|
| 395 |
st.session_state.username = random.choice(available or list(FUN_USERNAMES.keys()))
|
|
@@ -418,7 +407,6 @@ async def async_interface():
|
|
| 418 |
for i, line in enumerate(lines):
|
| 419 |
if line.strip():
|
| 420 |
st.markdown(line)
|
| 421 |
-
# Match MP3 to line by timestamp and username
|
| 422 |
for mp3_name, mp3_path in st.session_state['mp3_files'].items():
|
| 423 |
if line.strip() in mp3_name and st.session_state.username in mp3_name:
|
| 424 |
st.audio(mp3_path, key=f"audio_{i}_{mp3_name}")
|
|
@@ -432,7 +420,7 @@ async def async_interface():
|
|
| 432 |
st.rerun()
|
| 433 |
|
| 434 |
st.subheader("๐ค Speech-to-Chat")
|
| 435 |
-
|
| 436 |
transcript_data = speech_component(default_value=st.session_state.get('last_transcript', ''))
|
| 437 |
if transcript_data and 'value' in transcript_data:
|
| 438 |
transcript = transcript_data['value'].strip()
|
|
@@ -498,7 +486,7 @@ async def async_interface():
|
|
| 498 |
st.audio(audios[i], format='audio/mp3')
|
| 499 |
st.markdown(get_download_link(io.BytesIO(audios[i]), "mp3"), unsafe_allow_html=True)
|
| 500 |
|
| 501 |
-
# ๐๏ธ Sidebar
|
| 502 |
st.sidebar.subheader("Voice Settings")
|
| 503 |
new_username = st.sidebar.selectbox("Change Name/Voice", list(FUN_USERNAMES.keys()), index=list(FUN_USERNAMES.keys()).index(st.session_state.username))
|
| 504 |
if new_username != st.session_state.username:
|
|
@@ -506,6 +494,20 @@ async def async_interface():
|
|
| 506 |
st.session_state.username, st.session_state.tts_voice = new_username, FUN_USERNAMES[new_username]
|
| 507 |
st.rerun()
|
| 508 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 509 |
md_files, mp3_files = glob.glob("*.md"), glob.glob(f"{AUDIO_DIR}/*.mp3")
|
| 510 |
st.sidebar.markdown("### ๐ File History")
|
| 511 |
for f in sorted(md_files + mp3_files, key=os.path.getmtime, reverse=True)[:10]:
|
|
@@ -515,7 +517,6 @@ async def async_interface():
|
|
| 515 |
if zip_name: st.sidebar.markdown(get_download_link(zip_name, "zip"), unsafe_allow_html=True)
|
| 516 |
|
| 517 |
def main():
|
| 518 |
-
# ๐ Kick it off
|
| 519 |
asyncio.run(async_interface())
|
| 520 |
|
| 521 |
if __name__ == "__main__":
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import asyncio
|
| 3 |
import websockets
|
| 4 |
import uuid
|
| 5 |
import argparse
|
| 6 |
+
from datetime import datetime
|
| 7 |
import os
|
| 8 |
import random
|
| 9 |
import time
|
|
|
|
| 17 |
from audio_recorder_streamlit import audio_recorder
|
| 18 |
import nest_asyncio
|
| 19 |
import re
|
| 20 |
+
from streamlit_paste_button import paste_image_button
|
| 21 |
import pytz
|
| 22 |
import shutil
|
| 23 |
import anthropic
|
|
|
|
| 29 |
from gradio_client import Client
|
| 30 |
from dotenv import load_dotenv
|
| 31 |
from streamlit_marquee import streamlit_marquee
|
|
|
|
| 32 |
from collections import defaultdict, Counter
|
| 33 |
import pandas as pd
|
| 34 |
|
| 35 |
+
# ๐ ๏ธ Patch asyncio for nesting
|
| 36 |
nest_asyncio.apply()
|
| 37 |
|
| 38 |
# ๐จ Page Config
|
|
|
|
| 57 |
"GalacticGopher ๐": "en-AU-WilliamNeural",
|
| 58 |
"RocketRaccoon ๐": "en-CA-LiamNeural",
|
| 59 |
"EchoElf ๐ง": "en-US-AnaNeural",
|
| 60 |
+
"PhantomFox ๐ฆ": "en-US-BrandonNeural",
|
| 61 |
+
"WittyWizard ๐ง": "en-GB-ThomasNeural",
|
| 62 |
+
"LunarLlama ๐": "en-AU-FreyaNeural",
|
| 63 |
+
"SolarSloth โ๏ธ": "en-CA-LindaNeural",
|
| 64 |
+
"AstroAlpaca ๐ฆ": "en-US-ChristopherNeural",
|
| 65 |
+
"CyberCoyote ๐บ": "en-GB-ElliotNeural",
|
| 66 |
+
"MysticMoose ๐ฆ": "en-AU-JamesNeural",
|
| 67 |
+
"GlitchGnome ๐ง": "en-CA-EthanNeural",
|
| 68 |
+
"VortexViper ๐": "en-US-AmberNeural",
|
| 69 |
+
"ChronoChimp ๐": "en-GB-LibbyNeural"
|
| 70 |
}
|
| 71 |
+
EDGE_TTS_VOICES = list(set(FUN_USERNAMES.values()))
|
| 72 |
FILE_EMOJIS = {"md": "๐", "mp3": "๐ต", "wav": "๐"}
|
| 73 |
|
| 74 |
# ๐ Directories
|
|
|
|
| 79 |
HISTORY_FILE = "history_logs/chat_history.md"
|
| 80 |
MEDIA_DIR = "media_files"
|
| 81 |
AUDIO_CACHE_DIR = "audio_cache"
|
| 82 |
+
AUDIO_DIR = "audio_logs"
|
| 83 |
|
| 84 |
# ๐ API Keys
|
| 85 |
load_dotenv()
|
|
|
|
| 125 |
|
| 126 |
# ๐๏ธ Marquee Helpers
|
| 127 |
def update_marquee_settings_ui():
|
|
|
|
| 128 |
st.sidebar.markdown("### ๐ฏ Marquee Settings")
|
| 129 |
cols = st.sidebar.columns(2)
|
| 130 |
with cols[0]:
|
|
|
|
| 135 |
st.session_state['marquee_settings']['animationDuration'] = f"{st.slider('โฑ๏ธ Speed', 1, 20, 20)}s"
|
| 136 |
|
| 137 |
def display_marquee(text, settings, key_suffix=""):
|
|
|
|
| 138 |
truncated = text[:280] + "..." if len(text) > 280 else text
|
| 139 |
streamlit_marquee(content=truncated, **settings, key=f"marquee_{key_suffix}")
|
| 140 |
st.write("")
|
|
|
|
| 150 |
return [t for t, _ in Counter(filtered).most_common(top_n)]
|
| 151 |
|
| 152 |
def generate_filename(prompt, response, file_type="md"):
|
|
|
|
| 153 |
prefix = format_timestamp_prefix()
|
| 154 |
terms = get_high_info_terms(prompt + " " + response, 5)
|
| 155 |
snippet = clean_text_for_filename(prompt[:40] + " " + response[:40])
|
|
|
|
| 159 |
return f"{prefix}{base}_wct{wct}_sw{sw}_dur{dur}.{file_type}"
|
| 160 |
|
| 161 |
def create_file(prompt, response, file_type="md"):
|
|
|
|
| 162 |
filename = generate_filename(prompt, response, file_type)
|
| 163 |
with open(filename, 'w', encoding='utf-8') as f: f.write(prompt + "\n\n" + response)
|
| 164 |
return filename
|
| 165 |
|
| 166 |
def get_download_link(file, file_type="mp3"):
|
|
|
|
| 167 |
cache_key = f"dl_{file}"
|
| 168 |
if cache_key not in st.session_state['download_link_cache']:
|
| 169 |
with open(file, "rb") as f:
|
|
|
|
| 173 |
|
| 174 |
# ๐ถ Audio Processing
|
| 175 |
async def async_edge_tts_generate(text, voice, username, rate=0, pitch=0, file_format="mp3"):
|
|
|
|
| 176 |
cache_key = f"{text[:100]}_{voice}_{rate}_{pitch}_{file_format}"
|
| 177 |
if cache_key in st.session_state['audio_cache']: return st.session_state['audio_cache'][cache_key], 0
|
| 178 |
start_time = time.time()
|
|
|
|
| 183 |
await communicate.save(filename)
|
| 184 |
st.session_state['audio_cache'][cache_key] = filename
|
| 185 |
|
|
|
|
| 186 |
md_filename = filename.replace(".mp3", ".md")
|
| 187 |
md_content = f"# Chat Audio Log\n\n**Player:** {username}\n**Voice:** {voice}\n**Text:**\n```markdown\n{text}\n```"
|
| 188 |
with open(md_filename, 'w', encoding='utf-8') as f: f.write(md_content)
|
|
|
|
| 190 |
return filename, time.time() - start_time
|
| 191 |
|
| 192 |
def play_and_download_audio(file_path):
|
|
|
|
| 193 |
if file_path and os.path.exists(file_path):
|
| 194 |
st.audio(file_path)
|
| 195 |
st.markdown(get_download_link(file_path), unsafe_allow_html=True)
|
| 196 |
|
| 197 |
def load_mp3_viewer():
|
|
|
|
| 198 |
mp3_files = glob.glob(f"{AUDIO_DIR}/*.mp3")
|
| 199 |
for mp3 in mp3_files:
|
| 200 |
filename = os.path.basename(mp3)
|
|
|
|
| 202 |
st.session_state['mp3_files'][filename] = mp3
|
| 203 |
|
| 204 |
async def save_chat_entry(username, message, is_markdown=False):
|
|
|
|
| 205 |
central = pytz.timezone('US/Central')
|
| 206 |
timestamp = datetime.now(central).strftime("%Y-%m-%d %H:%M:%S")
|
| 207 |
entry = f"[{timestamp}] {username}: {message}" if not is_markdown else f"[{timestamp}] {username}:\n```markdown\n{message}\n```"
|
|
|
|
| 217 |
return audio_file
|
| 218 |
|
| 219 |
async def load_chat():
|
|
|
|
| 220 |
if not os.path.exists(CHAT_FILE):
|
| 221 |
with open(CHAT_FILE, 'a') as f: f.write(f"# {START_ROOM} Chat\n\nWelcome to the cosmic hub! ๐ค\n")
|
| 222 |
with open(CHAT_FILE, 'r') as f:
|
|
|
|
| 227 |
|
| 228 |
# ๐ WebSocket Handling
|
| 229 |
async def websocket_handler(websocket, path):
|
|
|
|
| 230 |
client_id = str(uuid.uuid4())
|
| 231 |
room_id = "chat"
|
| 232 |
if room_id not in st.session_state.active_connections:
|
|
|
|
| 250 |
del st.session_state.active_connections[room_id][client_id]
|
| 251 |
|
| 252 |
async def broadcast_message(message, room_id):
|
|
|
|
| 253 |
if room_id in st.session_state.active_connections:
|
| 254 |
disconnected = []
|
| 255 |
for client_id, ws in st.session_state.active_connections[room_id].items():
|
|
|
|
| 262 |
del st.session_state.active_connections[room_id][client_id]
|
| 263 |
|
| 264 |
async def run_websocket_server():
|
|
|
|
| 265 |
if not st.session_state.server_running:
|
| 266 |
server = await websockets.serve(websocket_handler, '0.0.0.0', 8765)
|
| 267 |
st.session_state.server_running = True
|
|
|
|
| 278 |
with open(f"{self.cache_dir}/metadata.json", 'w') as f: json.dump(self.metadata, f)
|
| 279 |
|
| 280 |
async def create_audio(self, text, voice='en-US-AriaNeural'):
|
|
|
|
| 281 |
cache_key = hashlib.md5(f"{text}:{voice}".encode()).hexdigest()
|
| 282 |
cache_path = f"{self.cache_dir}/{cache_key}.mp3"
|
| 283 |
if cache_key in self.metadata and os.path.exists(cache_path):
|
|
|
|
| 291 |
return open(cache_path, 'rb').read()
|
| 292 |
|
| 293 |
def process_pdf(pdf_file, max_pages, voice, audio_processor):
|
|
|
|
| 294 |
reader = PdfReader(pdf_file)
|
| 295 |
total_pages = min(len(reader.pages), max_pages)
|
| 296 |
texts, audios = [], {}
|
|
|
|
| 303 |
|
| 304 |
# ๐ ArXiv & AI Lookup
|
| 305 |
def parse_arxiv_refs(ref_text):
|
|
|
|
| 306 |
if not ref_text: return []
|
| 307 |
papers = []
|
| 308 |
current = {}
|
|
|
|
| 319 |
return papers[:20]
|
| 320 |
|
| 321 |
def generate_5min_feature_markdown(paper):
|
|
|
|
| 322 |
title, summary, authors, date, url = paper['title'], paper['summary'], paper['authors'], paper['date'], paper['url']
|
| 323 |
pdf_url = url.replace("abs", "pdf") + (".pdf" if not url.endswith(".pdf") else "")
|
| 324 |
wct, sw = len(title.split()), len(summary.split())
|
|
|
|
| 340 |
def create_detailed_paper_md(papers): return "# Detailed Summary\n" + "\n".join(generate_5min_feature_markdown(p) for p in papers)
|
| 341 |
|
| 342 |
async def create_paper_audio_files(papers, query):
|
|
|
|
| 343 |
for p in papers:
|
| 344 |
audio_text = clean_text_for_tts(f"{p['title']} by {p['authors']}. {p['summary']}")
|
| 345 |
p['full_audio'], _ = await async_edge_tts_generate(audio_text, st.session_state['tts_voice'], p['authors'])
|
| 346 |
if p['full_audio']: p['download_base64'] = get_download_link(p['full_audio'])
|
| 347 |
|
| 348 |
async def perform_ai_lookup(q, useArxiv=True, useArxivAudio=False):
|
|
|
|
| 349 |
client = anthropic.Anthropic(api_key=anthropic_key)
|
| 350 |
response = client.messages.create(model="claude-3-sonnet-20240229", max_tokens=1000, messages=[{"role": "user", "content": q}])
|
| 351 |
result = response.content[0].text
|
|
|
|
| 368 |
|
| 369 |
# ๐ฆ Zip Files
|
| 370 |
def create_zip_of_files(md_files, mp3_files, query):
|
|
|
|
| 371 |
all_files = md_files + mp3_files
|
| 372 |
if not all_files: return None
|
| 373 |
terms = get_high_info_terms(" ".join([open(f, 'r', encoding='utf-8').read() if f.endswith('.md') else os.path.splitext(os.path.basename(f))[0].replace('_', ' ') for f in all_files] + [query]), 5)
|
|
|
|
| 378 |
# ๐ฎ Main Interface
|
| 379 |
async def async_interface():
|
| 380 |
init_session_state()
|
| 381 |
+
load_mp3_viewer()
|
| 382 |
if not st.session_state.username:
|
| 383 |
available = [n for n in FUN_USERNAMES if not any(f"{n} has joined" in l for l in (await load_chat()).split('\n'))]
|
| 384 |
st.session_state.username = random.choice(available or list(FUN_USERNAMES.keys()))
|
|
|
|
| 407 |
for i, line in enumerate(lines):
|
| 408 |
if line.strip():
|
| 409 |
st.markdown(line)
|
|
|
|
| 410 |
for mp3_name, mp3_path in st.session_state['mp3_files'].items():
|
| 411 |
if line.strip() in mp3_name and st.session_state.username in mp3_name:
|
| 412 |
st.audio(mp3_path, key=f"audio_{i}_{mp3_name}")
|
|
|
|
| 420 |
st.rerun()
|
| 421 |
|
| 422 |
st.subheader("๐ค Speech-to-Chat")
|
| 423 |
+
from mycomponent import speech_component
|
| 424 |
transcript_data = speech_component(default_value=st.session_state.get('last_transcript', ''))
|
| 425 |
if transcript_data and 'value' in transcript_data:
|
| 426 |
transcript = transcript_data['value'].strip()
|
|
|
|
| 486 |
st.audio(audios[i], format='audio/mp3')
|
| 487 |
st.markdown(get_download_link(io.BytesIO(audios[i]), "mp3"), unsafe_allow_html=True)
|
| 488 |
|
| 489 |
+
# ๐๏ธ Sidebar with Dialog and Audio
|
| 490 |
st.sidebar.subheader("Voice Settings")
|
| 491 |
new_username = st.sidebar.selectbox("Change Name/Voice", list(FUN_USERNAMES.keys()), index=list(FUN_USERNAMES.keys()).index(st.session_state.username))
|
| 492 |
if new_username != st.session_state.username:
|
|
|
|
| 494 |
st.session_state.username, st.session_state.tts_voice = new_username, FUN_USERNAMES[new_username]
|
| 495 |
st.rerun()
|
| 496 |
|
| 497 |
+
st.sidebar.markdown("### ๐ฌ Chat Dialog & Audio")
|
| 498 |
+
chat_content = await load_chat()
|
| 499 |
+
lines = chat_content.split('\n')
|
| 500 |
+
audio_files = sorted(glob.glob(f"{AUDIO_DIR}/*.mp3"), key=os.path.getmtime, reverse=True)
|
| 501 |
+
for line in lines[-10:]: # Show last 10 lines for brevity
|
| 502 |
+
if line.strip():
|
| 503 |
+
st.sidebar.markdown(f"**{line}**")
|
| 504 |
+
for mp3 in audio_files:
|
| 505 |
+
mp3_name = os.path.basename(mp3)
|
| 506 |
+
if st.session_state.username in mp3_name and any(word in mp3_name for word in line.split()):
|
| 507 |
+
st.sidebar.audio(mp3, key=f"sidebar_audio_{mp3_name}")
|
| 508 |
+
st.sidebar.markdown(get_download_link(mp3), unsafe_allow_html=True)
|
| 509 |
+
break
|
| 510 |
+
|
| 511 |
md_files, mp3_files = glob.glob("*.md"), glob.glob(f"{AUDIO_DIR}/*.mp3")
|
| 512 |
st.sidebar.markdown("### ๐ File History")
|
| 513 |
for f in sorted(md_files + mp3_files, key=os.path.getmtime, reverse=True)[:10]:
|
|
|
|
| 517 |
if zip_name: st.sidebar.markdown(get_download_link(zip_name, "zip"), unsafe_allow_html=True)
|
| 518 |
|
| 519 |
def main():
|
|
|
|
| 520 |
asyncio.run(async_interface())
|
| 521 |
|
| 522 |
if __name__ == "__main__":
|