Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -280,122 +280,6 @@ def process_video_with_gpt(video_input, user_prompt):
|
|
| 280 |
)
|
| 281 |
return response.choices[0].message.content
|
| 282 |
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
def process_tts(text, voice="alloy"):
|
| 286 |
-
"""
|
| 287 |
-
Process text-to-speech using OpenAI's TTS API
|
| 288 |
-
Voices available: alloy, echo, fable, onyx, nova, shimmer
|
| 289 |
-
"""
|
| 290 |
-
try:
|
| 291 |
-
response = openai_client.audio.speech.create(
|
| 292 |
-
model="tts-1",
|
| 293 |
-
voice=voice,
|
| 294 |
-
input=text
|
| 295 |
-
)
|
| 296 |
-
|
| 297 |
-
# Generate a unique filename
|
| 298 |
-
filename = generate_filename("tts_output", "mp3")
|
| 299 |
-
|
| 300 |
-
# Save the audio file
|
| 301 |
-
response.stream_to_file(filename)
|
| 302 |
-
|
| 303 |
-
# Create audio player HTML
|
| 304 |
-
audio_html = f"""
|
| 305 |
-
<audio controls>
|
| 306 |
-
<source src="data:audio/mp3;base64,{base64.b64encode(open(filename, 'rb').read()).decode()}" type="audio/mp3">
|
| 307 |
-
Your browser does not support the audio element.
|
| 308 |
-
</audio>
|
| 309 |
-
"""
|
| 310 |
-
|
| 311 |
-
return filename, audio_html
|
| 312 |
-
except Exception as e:
|
| 313 |
-
st.error(f"TTS Error: {str(e)}")
|
| 314 |
-
return None, None
|
| 315 |
-
|
| 316 |
-
def update_chat_interface():
|
| 317 |
-
"""Update the chat interface to include voice selection and TTS playback"""
|
| 318 |
-
# Add voice selection to sidebar
|
| 319 |
-
st.sidebar.markdown("### 🗣️ TTS Voice Settings")
|
| 320 |
-
selected_voice = st.sidebar.selectbox(
|
| 321 |
-
"Choose TTS Voice:",
|
| 322 |
-
["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
|
| 323 |
-
help="Select the voice for text-to-speech playback"
|
| 324 |
-
)
|
| 325 |
-
|
| 326 |
-
# Store the selected voice in session state
|
| 327 |
-
if "selected_voice" not in st.session_state:
|
| 328 |
-
st.session_state.selected_voice = selected_voice
|
| 329 |
-
|
| 330 |
-
# Modify the chat processing functions to include TTS
|
| 331 |
-
def process_with_gpt(text_input):
|
| 332 |
-
"""Process text with GPT-4o and add TTS."""
|
| 333 |
-
if text_input:
|
| 334 |
-
st.session_state.messages.append({"role": "user", "content": text_input})
|
| 335 |
-
|
| 336 |
-
with st.chat_message("user"):
|
| 337 |
-
st.markdown(text_input)
|
| 338 |
-
|
| 339 |
-
with st.chat_message("assistant"):
|
| 340 |
-
completion = openai_client.chat.completions.create(
|
| 341 |
-
model=st.session_state["openai_model"],
|
| 342 |
-
messages=[
|
| 343 |
-
{"role": m["role"], "content": m["content"]}
|
| 344 |
-
for m in st.session_state.messages
|
| 345 |
-
],
|
| 346 |
-
stream=False
|
| 347 |
-
)
|
| 348 |
-
return_text = completion.choices[0].message.content
|
| 349 |
-
st.write("GPT-4o: " + return_text)
|
| 350 |
-
|
| 351 |
-
# Add TTS playback
|
| 352 |
-
filename, audio_html = process_tts(return_text, st.session_state.selected_voice)
|
| 353 |
-
if audio_html:
|
| 354 |
-
st.markdown(audio_html, unsafe_allow_html=True)
|
| 355 |
-
|
| 356 |
-
# Original file handling
|
| 357 |
-
filename = generate_filename("GPT-4o: " + return_text, "md")
|
| 358 |
-
create_file(filename, text_input, return_text)
|
| 359 |
-
st.session_state.messages.append({"role": "assistant", "content": return_text})
|
| 360 |
-
return return_text
|
| 361 |
-
|
| 362 |
-
def process_with_claude(text_input):
|
| 363 |
-
"""Process text with Claude and add TTS."""
|
| 364 |
-
if text_input:
|
| 365 |
-
with st.chat_message("user"):
|
| 366 |
-
st.markdown(text_input)
|
| 367 |
-
|
| 368 |
-
with st.chat_message("assistant"):
|
| 369 |
-
response = claude_client.messages.create(
|
| 370 |
-
model="claude-3-sonnet-20240229",
|
| 371 |
-
max_tokens=1000,
|
| 372 |
-
messages=[
|
| 373 |
-
{"role": "user", "content": text_input}
|
| 374 |
-
]
|
| 375 |
-
)
|
| 376 |
-
response_text = response.content[0].text
|
| 377 |
-
st.write("Claude: " + response_text)
|
| 378 |
-
|
| 379 |
-
# Add TTS playback
|
| 380 |
-
filename, audio_html = process_tts(response_text, st.session_state.selected_voice)
|
| 381 |
-
if audio_html:
|
| 382 |
-
st.markdown(audio_html, unsafe_allow_html=True)
|
| 383 |
-
|
| 384 |
-
# Original file handling
|
| 385 |
-
filename = generate_filename("Claude: " + response_text, "md")
|
| 386 |
-
create_file(filename, text_input, response_text)
|
| 387 |
-
|
| 388 |
-
st.session_state.chat_history.append({
|
| 389 |
-
"user": text_input,
|
| 390 |
-
"claude": response_text
|
| 391 |
-
})
|
| 392 |
-
return response_text
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
|
| 400 |
def extract_urls(text):
|
| 401 |
try:
|
|
@@ -751,9 +635,11 @@ speech_recognition_html = """
|
|
| 751 |
const output = document.getElementById('output');
|
| 752 |
let fullTranscript = '';
|
| 753 |
let lastUpdateTime = Date.now();
|
|
|
|
| 754 |
// Configure recognition
|
| 755 |
recognition.continuous = true;
|
| 756 |
recognition.interimResults = true;
|
|
|
|
| 757 |
// Function to start recognition
|
| 758 |
const startRecognition = () => {
|
| 759 |
try {
|
|
@@ -763,21 +649,24 @@ speech_recognition_html = """
|
|
| 763 |
stopButton.disabled = false;
|
| 764 |
} catch (e) {
|
| 765 |
console.error(e);
|
| 766 |
-
#status.textContent = '';
|
| 767 |
status.textContent = 'Error: ' + e.message;
|
| 768 |
}
|
| 769 |
};
|
|
|
|
| 770 |
// Auto-start on load
|
| 771 |
window.addEventListener('load', () => {
|
| 772 |
setTimeout(startRecognition, 1000);
|
| 773 |
});
|
|
|
|
| 774 |
startButton.onclick = startRecognition;
|
|
|
|
| 775 |
stopButton.onclick = () => {
|
| 776 |
recognition.stop();
|
| 777 |
status.textContent = 'Stopped';
|
| 778 |
startButton.disabled = false;
|
| 779 |
stopButton.disabled = true;
|
| 780 |
};
|
|
|
|
| 781 |
clearButton.onclick = () => {
|
| 782 |
fullTranscript = '';
|
| 783 |
output.textContent = '';
|
|
@@ -785,9 +674,11 @@ speech_recognition_html = """
|
|
| 785 |
type: 'clear_transcript',
|
| 786 |
}, '*');
|
| 787 |
};
|
|
|
|
| 788 |
recognition.onresult = (event) => {
|
| 789 |
let interimTranscript = '';
|
| 790 |
let finalTranscript = '';
|
|
|
|
| 791 |
for (let i = event.resultIndex; i < event.results.length; i++) {
|
| 792 |
const transcript = event.results[i][0].transcript;
|
| 793 |
if (event.results[i].isFinal) {
|
|
@@ -796,6 +687,7 @@ speech_recognition_html = """
|
|
| 796 |
interimTranscript += transcript;
|
| 797 |
}
|
| 798 |
}
|
|
|
|
| 799 |
if (finalTranscript || (Date.now() - lastUpdateTime > 5000)) {
|
| 800 |
if (finalTranscript) {
|
| 801 |
fullTranscript += finalTranscript;
|
|
@@ -805,10 +697,14 @@ speech_recognition_html = """
|
|
| 805 |
}
|
| 806 |
lastUpdateTime = Date.now();
|
| 807 |
}
|
|
|
|
| 808 |
output.textContent = fullTranscript + (interimTranscript ? '... ' + interimTranscript : '');
|
| 809 |
output.scrollTop = output.scrollHeight;
|
|
|
|
| 810 |
document.getElementById('streamlit-data').value = fullTranscript;
|
|
|
|
| 811 |
};
|
|
|
|
| 812 |
recognition.onend = () => {
|
| 813 |
if (!stopButton.disabled) {
|
| 814 |
try {
|
|
@@ -822,6 +718,7 @@ speech_recognition_html = """
|
|
| 822 |
}
|
| 823 |
}
|
| 824 |
};
|
|
|
|
| 825 |
recognition.onerror = (event) => {
|
| 826 |
console.error('Recognition error:', event.error);
|
| 827 |
status.textContent = 'Error: ' + event.error;
|
|
@@ -986,104 +883,45 @@ def main():
|
|
| 986 |
transcript = st.components.v1.html(speech_recognition_html, height=400)
|
| 987 |
|
| 988 |
# Update session state if there's new data
|
| 989 |
-
|
| 990 |
-
if showTranscript:
|
| 991 |
-
#transcript is not None and transcript != "":
|
| 992 |
st.session_state.voice_transcript = transcript
|
| 993 |
-
|
| 994 |
-
# Display the transcript in a Streamlit text area
|
| 995 |
-
st.markdown("### Processed Voice Input:")
|
| 996 |
-
st.text_area("Voice Transcript", st.session_state.voice_transcript, height=100)
|
| 997 |
-
|
| 998 |
-
# Add functionality to process the transcript
|
| 999 |
-
if st.button("Process Transcript"):
|
| 1000 |
-
st.subheader("AI Response to Transcript")
|
| 1001 |
-
gpt_response = process_with_gpt(st.session_state.voice_transcript)
|
| 1002 |
-
st.markdown(gpt_response)
|
| 1003 |
-
|
| 1004 |
-
# Option to clear the transcript
|
| 1005 |
-
if st.button("Clear Transcript"):
|
| 1006 |
-
st.session_state.voice_transcript = ""
|
| 1007 |
-
st.rerun()
|
| 1008 |
-
|
| 1009 |
-
|
| 1010 |
-
# Buttons to process the transcript
|
| 1011 |
-
if st.button("Search with GPT"):
|
| 1012 |
-
st.subheader("GPT-4o Response")
|
| 1013 |
-
gpt_response = process_with_gpt(st.session_state.voice_transcript)
|
| 1014 |
-
st.markdown(gpt_response)
|
| 1015 |
-
|
| 1016 |
-
if st.button("Search with Claude"):
|
| 1017 |
-
st.subheader("Claude Response")
|
| 1018 |
-
claude_response = process_with_claude(st.session_state.voice_transcript)
|
| 1019 |
-
st.markdown(claude_response)
|
| 1020 |
-
|
| 1021 |
-
if st.button("Search ArXiv"):
|
| 1022 |
-
st.subheader("ArXiv Search Results")
|
| 1023 |
-
arxiv_results = perform_ai_lookup(st.session_state.voice_transcript)
|
| 1024 |
-
st.markdown(arxiv_results)
|
| 1025 |
-
|
| 1026 |
-
|
| 1027 |
-
# Display last voice input
|
| 1028 |
-
if st.session_state.last_voice_input:
|
| 1029 |
-
st.text_area("Last Voice Input:", st.session_state.last_voice_input, height=100)
|
| 1030 |
-
|
| 1031 |
-
|
| 1032 |
-
# Model Selection
|
| 1033 |
-
model_choice = st.sidebar.radio(
|
| 1034 |
-
"Choose AI Model:",
|
| 1035 |
-
["GPT-4o", "Claude-3", "GPT+Claude+Arxiv"]
|
| 1036 |
-
)
|
| 1037 |
-
|
| 1038 |
-
# Chat Interface
|
| 1039 |
-
user_input = st.text_area("Message:", height=100)
|
| 1040 |
|
| 1041 |
-
|
| 1042 |
-
|
| 1043 |
-
|
| 1044 |
-
gpt_response = process_with_gpt(user_input)
|
| 1045 |
-
elif model_choice == "Claude-3":
|
| 1046 |
-
claude_response = process_with_claude(user_input)
|
| 1047 |
-
else: # Both
|
| 1048 |
-
col1, col2, col3 = st.columns(3)
|
| 1049 |
-
with col2:
|
| 1050 |
-
st.subheader("Claude-3.5 Sonnet:")
|
| 1051 |
-
try:
|
| 1052 |
-
claude_response = process_with_claude(user_input)
|
| 1053 |
-
except:
|
| 1054 |
-
st.write('Claude 3.5 Sonnet out of tokens.')
|
| 1055 |
-
with col1:
|
| 1056 |
-
st.subheader("GPT-4o Omni:")
|
| 1057 |
-
try:
|
| 1058 |
-
gpt_response = process_with_gpt(user_input)
|
| 1059 |
-
except:
|
| 1060 |
-
st.write('GPT 4o out of tokens')
|
| 1061 |
-
with col3:
|
| 1062 |
-
st.subheader("Arxiv and Mistral Research:")
|
| 1063 |
-
with st.spinner("Searching ArXiv..."):
|
| 1064 |
-
#results = search_arxiv(user_input)
|
| 1065 |
-
results = perform_ai_lookup(user_input)
|
| 1066 |
-
|
| 1067 |
-
st.markdown(results)
|
| 1068 |
-
|
| 1069 |
-
# Display Chat History
|
| 1070 |
-
st.subheader("Chat History 📜")
|
| 1071 |
-
tab1, tab2 = st.tabs(["Claude History", "GPT-4o History"])
|
| 1072 |
|
| 1073 |
-
|
| 1074 |
-
|
| 1075 |
-
|
| 1076 |
-
|
| 1077 |
-
|
| 1078 |
|
| 1079 |
-
|
| 1080 |
-
|
| 1081 |
-
|
| 1082 |
-
|
| 1083 |
-
|
| 1084 |
-
|
| 1085 |
-
# ------------------------------------------------------- ************************* --->
|
| 1086 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1087 |
|
| 1088 |
|
| 1089 |
if tab_main == "💬 Chat":
|
|
|
|
| 280 |
)
|
| 281 |
return response.choices[0].message.content
|
| 282 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
|
| 284 |
def extract_urls(text):
|
| 285 |
try:
|
|
|
|
| 635 |
const output = document.getElementById('output');
|
| 636 |
let fullTranscript = '';
|
| 637 |
let lastUpdateTime = Date.now();
|
| 638 |
+
|
| 639 |
// Configure recognition
|
| 640 |
recognition.continuous = true;
|
| 641 |
recognition.interimResults = true;
|
| 642 |
+
|
| 643 |
// Function to start recognition
|
| 644 |
const startRecognition = () => {
|
| 645 |
try {
|
|
|
|
| 649 |
stopButton.disabled = false;
|
| 650 |
} catch (e) {
|
| 651 |
console.error(e);
|
|
|
|
| 652 |
status.textContent = 'Error: ' + e.message;
|
| 653 |
}
|
| 654 |
};
|
| 655 |
+
|
| 656 |
// Auto-start on load
|
| 657 |
window.addEventListener('load', () => {
|
| 658 |
setTimeout(startRecognition, 1000);
|
| 659 |
});
|
| 660 |
+
|
| 661 |
startButton.onclick = startRecognition;
|
| 662 |
+
|
| 663 |
stopButton.onclick = () => {
|
| 664 |
recognition.stop();
|
| 665 |
status.textContent = 'Stopped';
|
| 666 |
startButton.disabled = false;
|
| 667 |
stopButton.disabled = true;
|
| 668 |
};
|
| 669 |
+
|
| 670 |
clearButton.onclick = () => {
|
| 671 |
fullTranscript = '';
|
| 672 |
output.textContent = '';
|
|
|
|
| 674 |
type: 'clear_transcript',
|
| 675 |
}, '*');
|
| 676 |
};
|
| 677 |
+
|
| 678 |
recognition.onresult = (event) => {
|
| 679 |
let interimTranscript = '';
|
| 680 |
let finalTranscript = '';
|
| 681 |
+
|
| 682 |
for (let i = event.resultIndex; i < event.results.length; i++) {
|
| 683 |
const transcript = event.results[i][0].transcript;
|
| 684 |
if (event.results[i].isFinal) {
|
|
|
|
| 687 |
interimTranscript += transcript;
|
| 688 |
}
|
| 689 |
}
|
| 690 |
+
|
| 691 |
if (finalTranscript || (Date.now() - lastUpdateTime > 5000)) {
|
| 692 |
if (finalTranscript) {
|
| 693 |
fullTranscript += finalTranscript;
|
|
|
|
| 697 |
}
|
| 698 |
lastUpdateTime = Date.now();
|
| 699 |
}
|
| 700 |
+
|
| 701 |
output.textContent = fullTranscript + (interimTranscript ? '... ' + interimTranscript : '');
|
| 702 |
output.scrollTop = output.scrollHeight;
|
| 703 |
+
|
| 704 |
document.getElementById('streamlit-data').value = fullTranscript;
|
| 705 |
+
|
| 706 |
};
|
| 707 |
+
|
| 708 |
recognition.onend = () => {
|
| 709 |
if (!stopButton.disabled) {
|
| 710 |
try {
|
|
|
|
| 718 |
}
|
| 719 |
}
|
| 720 |
};
|
| 721 |
+
|
| 722 |
recognition.onerror = (event) => {
|
| 723 |
console.error('Recognition error:', event.error);
|
| 724 |
status.textContent = 'Error: ' + event.error;
|
|
|
|
| 883 |
transcript = st.components.v1.html(speech_recognition_html, height=400)
|
| 884 |
|
| 885 |
# Update session state if there's new data
|
| 886 |
+
if transcript is not None and transcript != "":
|
|
|
|
|
|
|
| 887 |
st.session_state.voice_transcript = transcript
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 888 |
|
| 889 |
+
# Display the transcript in a Streamlit text area
|
| 890 |
+
st.markdown("### Processed Voice Input:")
|
| 891 |
+
st.text_area("Voice Transcript", st.session_state.voice_transcript, height=100)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 892 |
|
| 893 |
+
# Add functionality to process the transcript
|
| 894 |
+
if st.button("Process Transcript"):
|
| 895 |
+
st.subheader("AI Response to Transcript")
|
| 896 |
+
gpt_response = process_with_gpt(st.session_state.voice_transcript)
|
| 897 |
+
st.markdown(gpt_response)
|
| 898 |
|
| 899 |
+
# Option to clear the transcript
|
| 900 |
+
if st.button("Clear Transcript"):
|
| 901 |
+
st.session_state.voice_transcript = ""
|
| 902 |
+
st.rerun()
|
| 903 |
+
|
|
|
|
|
|
|
| 904 |
|
| 905 |
+
# Buttons to process the transcript
|
| 906 |
+
if st.button("Search with GPT"):
|
| 907 |
+
st.subheader("GPT-4o Response")
|
| 908 |
+
gpt_response = process_with_gpt(st.session_state.voice_transcript)
|
| 909 |
+
st.markdown(gpt_response)
|
| 910 |
+
|
| 911 |
+
if st.button("Search with Claude"):
|
| 912 |
+
st.subheader("Claude Response")
|
| 913 |
+
claude_response = process_with_claude(st.session_state.voice_transcript)
|
| 914 |
+
st.markdown(claude_response)
|
| 915 |
+
|
| 916 |
+
if st.button("Search ArXiv"):
|
| 917 |
+
st.subheader("ArXiv Search Results")
|
| 918 |
+
arxiv_results = perform_ai_lookup(st.session_state.voice_transcript)
|
| 919 |
+
st.markdown(arxiv_results)
|
| 920 |
+
|
| 921 |
+
|
| 922 |
+
# Display last voice input
|
| 923 |
+
if st.session_state.last_voice_input:
|
| 924 |
+
st.text_area("Last Voice Input:", st.session_state.last_voice_input, height=100)
|
| 925 |
|
| 926 |
|
| 927 |
if tab_main == "💬 Chat":
|