Update app.py
Browse files
app.py
CHANGED
@@ -1,19 +1,14 @@
|
|
1 |
import streamlit as st
|
2 |
-
import
|
3 |
-
import os
|
4 |
-
import glob
|
5 |
-
import random
|
6 |
from datetime import datetime
|
|
|
|
|
|
|
7 |
import edge_tts
|
8 |
-
import asyncio
|
9 |
-
import requests
|
10 |
import streamlit.components.v1 as components
|
11 |
-
import base64
|
12 |
-
import re
|
13 |
-
from xml.etree import ElementTree as ET
|
14 |
-
from datasets import load_dataset
|
15 |
|
16 |
-
# -------------------- Configuration
|
|
|
17 |
USER_NAMES = [
|
18 |
"Aria", "Guy", "Sonia", "Tony", "Jenny", "Davis", "Libby", "Clara", "Liam", "Natasha", "William"
|
19 |
]
|
@@ -24,45 +19,25 @@ ENGLISH_VOICES = [
|
|
24 |
"en-CA-LiamNeural", "en-AU-NatashaNeural", "en-AU-WilliamNeural"
|
25 |
]
|
26 |
|
27 |
-
# Map each user to a corresponding voice
|
28 |
USER_VOICES = dict(zip(USER_NAMES, ENGLISH_VOICES))
|
29 |
|
30 |
-
ROWS_PER_PAGE = 100
|
31 |
SAVED_INPUTS_DIR = "saved_inputs"
|
32 |
os.makedirs(SAVED_INPUTS_DIR, exist_ok=True)
|
33 |
|
34 |
-
|
35 |
-
|
36 |
-
'
|
37 |
-
|
38 |
-
|
39 |
-
'
|
40 |
-
|
41 |
-
|
42 |
-
'
|
43 |
-
'current_page': 0,
|
44 |
-
'data_cache': None,
|
45 |
-
'dataset_info': None,
|
46 |
-
'nps_submitted': False,
|
47 |
-
'nps_last_shown': None,
|
48 |
-
'old_val': None,
|
49 |
-
'voice_text': None,
|
50 |
-
'user_name': random.choice(USER_NAMES),
|
51 |
-
'max_items': 100,
|
52 |
-
'global_voice': "en-US-AriaNeural"
|
53 |
-
}
|
54 |
-
|
55 |
-
for var, default in SESSION_VARS.items():
|
56 |
-
if var not in st.session_state:
|
57 |
-
st.session_state[var] = default
|
58 |
-
|
59 |
-
def create_voice_component():
|
60 |
-
mycomponent = components.declare_component(
|
61 |
-
"mycomponent",
|
62 |
-
path="mycomponent"
|
63 |
-
)
|
64 |
-
return mycomponent
|
65 |
|
|
|
|
|
|
|
|
|
66 |
def clean_for_speech(text: str) -> str:
|
67 |
text = text.replace("\n", " ")
|
68 |
text = text.replace("</s>", " ")
|
@@ -76,8 +51,12 @@ async def edge_tts_generate_audio(text, voice="en-US-AriaNeural"):
|
|
76 |
if not text.strip():
|
77 |
return None
|
78 |
communicate = edge_tts.Communicate(text, voice)
|
79 |
-
out_fn = f"speech_{datetime.now().strftime('%Y%m%d_%H%M%
|
80 |
-
|
|
|
|
|
|
|
|
|
81 |
return out_fn
|
82 |
|
83 |
def speak_with_edge_tts(text, voice="en-US-AriaNeural"):
|
@@ -89,27 +68,13 @@ def play_and_download_audio(file_path):
|
|
89 |
dl_link = f'<a href="data:audio/mpeg;base64,{base64.b64encode(open(file_path,"rb").read()).decode()}" download="{os.path.basename(file_path)}">Download {os.path.basename(file_path)}</a>'
|
90 |
st.markdown(dl_link, unsafe_allow_html=True)
|
91 |
|
92 |
-
def generate_filename(prefix, text):
|
93 |
-
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
94 |
-
safe_text = re.sub(r'[^\w\s-]', '', text[:50]).strip().lower()
|
95 |
-
safe_text = re.sub(r'[-\s]+', '-', safe_text)
|
96 |
-
return f"{prefix}_{timestamp}_{safe_text}.md"
|
97 |
-
|
98 |
def save_input_as_md(user_name, text, prefix="input"):
|
99 |
if not text.strip():
|
100 |
return
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
f.write(f"**Timestamp:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
|
106 |
-
f.write(text)
|
107 |
-
return full_path
|
108 |
-
|
109 |
-
def save_response_as_md(user_name, text, prefix="response"):
|
110 |
-
if not text.strip():
|
111 |
-
return
|
112 |
-
fn = generate_filename(prefix, text)
|
113 |
full_path = os.path.join(SAVED_INPUTS_DIR, fn)
|
114 |
with open(full_path, 'w', encoding='utf-8') as f:
|
115 |
f.write(f"# User: {user_name}\n")
|
@@ -164,168 +129,135 @@ def summarize_arxiv_results(results):
|
|
164 |
lines.append(f"Result {i}: {title}\n{summary}\n")
|
165 |
return "\n\n".join(lines)
|
166 |
|
167 |
-
def simple_dataset_search(query, df):
|
168 |
-
if df.empty or not query.strip():
|
169 |
-
return pd.DataFrame()
|
170 |
-
query_terms = query.lower().split()
|
171 |
-
matches = []
|
172 |
-
for idx, row in df.iterrows():
|
173 |
-
text_parts = []
|
174 |
-
for col in df.columns:
|
175 |
-
val = row[col]
|
176 |
-
if isinstance(val, str):
|
177 |
-
text_parts.append(val.lower())
|
178 |
-
elif isinstance(val, (int, float)):
|
179 |
-
text_parts.append(str(val))
|
180 |
-
full_text = " ".join(text_parts)
|
181 |
-
if any(qt in full_text for qt in query_terms):
|
182 |
-
matches.append(row)
|
183 |
-
if matches:
|
184 |
-
return pd.DataFrame(matches)
|
185 |
-
return pd.DataFrame()
|
186 |
-
|
187 |
-
@st.cache_data
|
188 |
-
def load_dataset_page(dataset_id, token, page, rows_per_page):
|
189 |
-
try:
|
190 |
-
start_idx = page * rows_per_page
|
191 |
-
end_idx = start_idx + rows_per_page
|
192 |
-
dataset = load_dataset(
|
193 |
-
dataset_id,
|
194 |
-
token=token,
|
195 |
-
streaming=False,
|
196 |
-
split=f'train[{start_idx}:{end_idx}]'
|
197 |
-
)
|
198 |
-
return pd.DataFrame(dataset)
|
199 |
-
except:
|
200 |
-
return pd.DataFrame()
|
201 |
-
|
202 |
-
class SimpleDatasetSearcher:
|
203 |
-
def __init__(self, dataset_id="tomg-group-umd/cinepile"):
|
204 |
-
self.dataset_id = dataset_id
|
205 |
-
self.token = os.environ.get('DATASET_KEY')
|
206 |
-
def load_page(self, page=0):
|
207 |
-
return load_dataset_page(self.dataset_id, self.token, page, ROWS_PER_PAGE)
|
208 |
-
|
209 |
def concatenate_mp3(files, output_file):
|
210 |
with open(output_file, 'wb') as outfile:
|
211 |
for f in files:
|
212 |
with open(f, 'rb') as infile:
|
213 |
outfile.write(infile.read())
|
214 |
|
215 |
-
def
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
224 |
for fpath in saved_files:
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
if st.button("
|
252 |
-
|
253 |
-
|
254 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
255 |
voice = USER_VOICES.get(user, "en-US-AriaNeural")
|
256 |
audio_file = speak_with_edge_tts(content, voice=voice)
|
257 |
if audio_file:
|
258 |
-
mp3_files.append(audio_file)
|
259 |
-
st.write(f"**{user} ({ts}):**")
|
260 |
play_and_download_audio(audio_file)
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
save_response_as_md(st.session_state['user_name'], summary, prefix="response")
|
289 |
-
st.write(summary)
|
290 |
-
voice = USER_VOICES.get(st.session_state['user_name'], "en-US-AriaNeural")
|
291 |
-
audio_file = speak_with_edge_tts(summary, voice=voice)
|
292 |
-
if audio_file:
|
293 |
-
play_and_download_audio(audio_file)
|
294 |
-
else:
|
295 |
-
st.warning("No results found on ArXiv.")
|
296 |
-
|
297 |
-
with tab3:
|
298 |
-
st.subheader("Dataset Search")
|
299 |
-
ds_searcher = SimpleDatasetSearcher()
|
300 |
-
query = st.text_input("Enter dataset search query:")
|
301 |
-
run_ds_search = st.button("Search Dataset", key="ds_search_button")
|
302 |
-
num_results = st.slider("Max results:", 1, 100, 20, key="ds_max_results")
|
303 |
-
|
304 |
-
if run_ds_search and query.strip():
|
305 |
-
with st.spinner("Searching dataset..."):
|
306 |
-
df = ds_searcher.load_page(0)
|
307 |
-
results = simple_dataset_search(query, df)
|
308 |
-
if not results.empty:
|
309 |
-
st.write(f"Found {len(results)} results:")
|
310 |
-
shown = 0
|
311 |
-
for i, (_, row) in enumerate(results.iterrows(), 1):
|
312 |
-
if shown >= num_results:
|
313 |
-
break
|
314 |
-
with st.expander(f"Result {i}", expanded=(i==1)):
|
315 |
-
for k, v in row.items():
|
316 |
-
st.write(f"**{k}:** {v}")
|
317 |
-
shown += 1
|
318 |
-
else:
|
319 |
-
st.warning("No matching results found.")
|
320 |
-
|
321 |
-
with tab4:
|
322 |
-
st.subheader("Settings")
|
323 |
-
if st.button("ποΈ Clear Search History", key="clear_history"):
|
324 |
-
for fpath in list_saved_inputs():
|
325 |
-
os.remove(fpath)
|
326 |
-
st.session_state['search_history'] = []
|
327 |
-
st.success("Search history cleared for everyone!")
|
328 |
-
st.rerun()
|
329 |
-
|
330 |
-
if __name__ == "__main__":
|
331 |
-
main()
|
|
|
1 |
import streamlit as st
|
2 |
+
import os, glob, re, base64, asyncio, requests
|
|
|
|
|
|
|
3 |
from datetime import datetime
|
4 |
+
from collections import defaultdict
|
5 |
+
from urllib.parse import quote
|
6 |
+
from xml.etree import ElementTree as ET
|
7 |
import edge_tts
|
|
|
|
|
8 |
import streamlit.components.v1 as components
|
|
|
|
|
|
|
|
|
9 |
|
10 |
+
# -------------------- Configuration --------------------
|
11 |
+
# Exactly 11 user names and 11 voices (as an example)
|
12 |
USER_NAMES = [
|
13 |
"Aria", "Guy", "Sonia", "Tony", "Jenny", "Davis", "Libby", "Clara", "Liam", "Natasha", "William"
|
14 |
]
|
|
|
19 |
"en-CA-LiamNeural", "en-AU-NatashaNeural", "en-AU-WilliamNeural"
|
20 |
]
|
21 |
|
|
|
22 |
USER_VOICES = dict(zip(USER_NAMES, ENGLISH_VOICES))
|
23 |
|
|
|
24 |
SAVED_INPUTS_DIR = "saved_inputs"
|
25 |
os.makedirs(SAVED_INPUTS_DIR, exist_ok=True)
|
26 |
|
27 |
+
# Session state
|
28 |
+
if 'user_name' not in st.session_state:
|
29 |
+
st.session_state['user_name'] = USER_NAMES[0]
|
30 |
+
|
31 |
+
if 'old_val' not in st.session_state:
|
32 |
+
st.session_state['old_val'] = None
|
33 |
+
|
34 |
+
if 'should_rerun' not in st.session_state:
|
35 |
+
st.session_state['should_rerun'] = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
+
if 'viewing_prefix' not in st.session_state:
|
38 |
+
st.session_state['viewing_prefix'] = None
|
39 |
+
|
40 |
+
# -------------------- Utility Functions --------------------
|
41 |
def clean_for_speech(text: str) -> str:
|
42 |
text = text.replace("\n", " ")
|
43 |
text = text.replace("</s>", " ")
|
|
|
51 |
if not text.strip():
|
52 |
return None
|
53 |
communicate = edge_tts.Communicate(text, voice)
|
54 |
+
out_fn = f"speech_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3"
|
55 |
+
try:
|
56 |
+
await communicate.save(out_fn)
|
57 |
+
except edge_tts.exceptions.NoAudioReceived:
|
58 |
+
st.error("No audio received from TTS service.")
|
59 |
+
return None
|
60 |
return out_fn
|
61 |
|
62 |
def speak_with_edge_tts(text, voice="en-US-AriaNeural"):
|
|
|
68 |
dl_link = f'<a href="data:audio/mpeg;base64,{base64.b64encode(open(file_path,"rb").read()).decode()}" download="{os.path.basename(file_path)}">Download {os.path.basename(file_path)}</a>'
|
69 |
st.markdown(dl_link, unsafe_allow_html=True)
|
70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
def save_input_as_md(user_name, text, prefix="input"):
|
72 |
if not text.strip():
|
73 |
return
|
74 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
75 |
+
safe_text = re.sub(r'[^\w\s-]', '', text[:50]).strip().lower()
|
76 |
+
safe_text = re.sub(r'[-\s]+', '-', safe_text)
|
77 |
+
fn = f"{prefix}_{timestamp}_{safe_text}.md"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
full_path = os.path.join(SAVED_INPUTS_DIR, fn)
|
79 |
with open(full_path, 'w', encoding='utf-8') as f:
|
80 |
f.write(f"# User: {user_name}\n")
|
|
|
129 |
lines.append(f"Result {i}: {title}\n{summary}\n")
|
130 |
return "\n\n".join(lines)
|
131 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
def concatenate_mp3(files, output_file):
|
133 |
with open(output_file, 'wb') as outfile:
|
134 |
for f in files:
|
135 |
with open(f, 'rb') as infile:
|
136 |
outfile.write(infile.read())
|
137 |
|
138 |
+
def load_groups():
|
139 |
+
files = list_saved_inputs()
|
140 |
+
groups = defaultdict(list)
|
141 |
+
for fpath in files:
|
142 |
+
fname = os.path.basename(fpath)
|
143 |
+
prefix = fname[:10]
|
144 |
+
groups[prefix].append(fpath)
|
145 |
+
for prefix in groups:
|
146 |
+
groups[prefix].sort(key=lambda x: os.path.getmtime(x), reverse=True)
|
147 |
+
sorted_prefixes = sorted(groups.keys(),
|
148 |
+
key=lambda pre: max(os.path.getmtime(x) for x in groups[pre]),
|
149 |
+
reverse=True)
|
150 |
+
return groups, sorted_prefixes
|
151 |
+
|
152 |
+
# -------------------- Main Application --------------------
|
153 |
+
st.title("ποΈ Voice Chat & ArXiv Search")
|
154 |
+
|
155 |
+
with st.sidebar:
|
156 |
+
st.session_state['user_name'] = st.selectbox("Current User:", USER_NAMES, index=0)
|
157 |
+
|
158 |
+
saved_files = list_saved_inputs()
|
159 |
+
st.write("π Saved Inputs:")
|
160 |
+
for fpath in saved_files:
|
161 |
+
user, ts, content = parse_md_file(fpath)
|
162 |
+
fname = os.path.basename(fpath)
|
163 |
+
st.write(f"- {fname} (User: {user})")
|
164 |
+
|
165 |
+
if st.button("ποΈ Clear All History"):
|
166 |
for fpath in saved_files:
|
167 |
+
os.remove(fpath)
|
168 |
+
st.session_state['viewing_prefix'] = None
|
169 |
+
st.success("All history cleared!")
|
170 |
+
st.experimental_rerun()
|
171 |
+
|
172 |
+
# Voice input component (replace path with your component)
|
173 |
+
mycomponent = components.declare_component("mycomponent", path="mycomponent")
|
174 |
+
voice_val = mycomponent(my_input_value="Start speaking...")
|
175 |
+
|
176 |
+
tabs = st.tabs(["π€ Voice Chat", "π ArXiv Search", "πΎ History", "βοΈ Settings"])
|
177 |
+
|
178 |
+
# ------------------ Voice Chat Tab -------------------------
|
179 |
+
with tabs[0]:
|
180 |
+
st.subheader("π€ Voice Chat")
|
181 |
+
if voice_val:
|
182 |
+
voice_text = voice_val.strip()
|
183 |
+
edited_input = st.text_area("βοΈ Edit Voice Input:", value=voice_text, height=100)
|
184 |
+
autorun = st.checkbox("β‘ Auto-Run", value=True)
|
185 |
+
input_changed = (voice_text != st.session_state.get('old_val'))
|
186 |
+
|
187 |
+
if autorun and input_changed:
|
188 |
+
st.session_state['old_val'] = voice_text
|
189 |
+
# Save input right away
|
190 |
+
saved_path = save_input_as_md(st.session_state['user_name'], edited_input, prefix="input")
|
191 |
+
st.success("Saved input!")
|
192 |
+
|
193 |
+
if st.button("π Save Input Manually"):
|
194 |
+
saved_path = save_input_as_md(st.session_state['user_name'], edited_input, prefix="input")
|
195 |
+
st.success("Saved input!")
|
196 |
+
|
197 |
+
st.write("Use the sidebar to select user and the voice input component above to record messages.")
|
198 |
+
|
199 |
+
# ------------------ ArXiv Search Tab -------------------------
|
200 |
+
with tabs[1]:
|
201 |
+
st.subheader("π ArXiv Search")
|
202 |
+
query = st.text_input("Enter Query:")
|
203 |
+
if query and st.button("π Search ArXiv"):
|
204 |
+
with st.spinner("Searching..."):
|
205 |
+
results = arxiv_search(query)
|
206 |
+
if results:
|
207 |
+
summary = summarize_arxiv_results(results)
|
208 |
+
# Save as response
|
209 |
+
save_input_as_md(st.session_state['user_name'], summary, prefix="arxiv")
|
210 |
+
st.write(summary)
|
211 |
+
# Read aloud summary
|
212 |
+
voice = USER_VOICES.get(st.session_state['user_name'], "en-US-AriaNeural")
|
213 |
+
audio_file = speak_with_edge_tts(summary, voice=voice)
|
214 |
+
if audio_file:
|
215 |
+
play_and_download_audio(audio_file)
|
216 |
+
else:
|
217 |
+
st.warning("No results found.")
|
218 |
+
|
219 |
+
# ------------------ History Tab -------------------------
|
220 |
+
with tabs[2]:
|
221 |
+
st.subheader("πΎ History")
|
222 |
+
files = list_saved_inputs()
|
223 |
+
conversation = []
|
224 |
+
for fpath in files:
|
225 |
+
user, ts, content = parse_md_file(fpath)
|
226 |
+
conversation.append((user, ts, content, fpath))
|
227 |
+
|
228 |
+
# Show conversation and read aloud each line
|
229 |
+
for i, (user, ts, content, fpath) in enumerate(reversed(conversation), start=1):
|
230 |
+
with st.expander(f"{ts} - {user}", expanded=False):
|
231 |
+
st.write(content)
|
232 |
+
if st.button(f"π Read Aloud {ts}-{user}", key=f"read_{i}_{fpath}"):
|
233 |
voice = USER_VOICES.get(user, "en-US-AriaNeural")
|
234 |
audio_file = speak_with_edge_tts(content, voice=voice)
|
235 |
if audio_file:
|
|
|
|
|
236 |
play_and_download_audio(audio_file)
|
237 |
+
|
238 |
+
# Read entire conversation
|
239 |
+
if st.button("π Read Entire Conversation"):
|
240 |
+
conversation_chrono = list(reversed(conversation))
|
241 |
+
mp3_files = []
|
242 |
+
for user, ts, content, fpath in conversation_chrono:
|
243 |
+
voice = USER_VOICES.get(user, "en-US-AriaNeural")
|
244 |
+
audio_file = speak_with_edge_tts(content, voice=voice)
|
245 |
+
if audio_file:
|
246 |
+
mp3_files.append(audio_file)
|
247 |
+
st.write(f"**{user} ({ts}):**")
|
248 |
+
play_and_download_audio(audio_file)
|
249 |
+
|
250 |
+
if mp3_files:
|
251 |
+
combined_file = f"full_conversation_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3"
|
252 |
+
concatenate_mp3(mp3_files, combined_file)
|
253 |
+
st.write("**Full Conversation Audio:**")
|
254 |
+
play_and_download_audio(combined_file)
|
255 |
+
|
256 |
+
# ------------------ Settings Tab -------------------------
|
257 |
+
with tabs[3]:
|
258 |
+
st.subheader("βοΈ Settings")
|
259 |
+
st.write("Adjust parameters in the sidebar. Currently, no other settings to configure.")
|
260 |
+
|
261 |
+
if st.session_state.should_rerun:
|
262 |
+
st.session_state.should_rerun = False
|
263 |
+
st.rerun()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|