Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -280,122 +280,6 @@ def process_video_with_gpt(video_input, user_prompt):
|
|
280 |
)
|
281 |
return response.choices[0].message.content
|
282 |
|
283 |
-
|
284 |
-
|
285 |
-
def process_tts(text, voice="alloy"):
|
286 |
-
"""
|
287 |
-
Process text-to-speech using OpenAI's TTS API
|
288 |
-
Voices available: alloy, echo, fable, onyx, nova, shimmer
|
289 |
-
"""
|
290 |
-
try:
|
291 |
-
response = openai_client.audio.speech.create(
|
292 |
-
model="tts-1",
|
293 |
-
voice=voice,
|
294 |
-
input=text
|
295 |
-
)
|
296 |
-
|
297 |
-
# Generate a unique filename
|
298 |
-
filename = generate_filename("tts_output", "mp3")
|
299 |
-
|
300 |
-
# Save the audio file
|
301 |
-
response.stream_to_file(filename)
|
302 |
-
|
303 |
-
# Create audio player HTML
|
304 |
-
audio_html = f"""
|
305 |
-
<audio controls>
|
306 |
-
<source src="data:audio/mp3;base64,{base64.b64encode(open(filename, 'rb').read()).decode()}" type="audio/mp3">
|
307 |
-
Your browser does not support the audio element.
|
308 |
-
</audio>
|
309 |
-
"""
|
310 |
-
|
311 |
-
return filename, audio_html
|
312 |
-
except Exception as e:
|
313 |
-
st.error(f"TTS Error: {str(e)}")
|
314 |
-
return None, None
|
315 |
-
|
316 |
-
def update_chat_interface():
|
317 |
-
"""Update the chat interface to include voice selection and TTS playback"""
|
318 |
-
# Add voice selection to sidebar
|
319 |
-
st.sidebar.markdown("### 🗣️ TTS Voice Settings")
|
320 |
-
selected_voice = st.sidebar.selectbox(
|
321 |
-
"Choose TTS Voice:",
|
322 |
-
["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
|
323 |
-
help="Select the voice for text-to-speech playback"
|
324 |
-
)
|
325 |
-
|
326 |
-
# Store the selected voice in session state
|
327 |
-
if "selected_voice" not in st.session_state:
|
328 |
-
st.session_state.selected_voice = selected_voice
|
329 |
-
|
330 |
-
# Modify the chat processing functions to include TTS
|
331 |
-
def process_with_gpt(text_input):
|
332 |
-
"""Process text with GPT-4o and add TTS."""
|
333 |
-
if text_input:
|
334 |
-
st.session_state.messages.append({"role": "user", "content": text_input})
|
335 |
-
|
336 |
-
with st.chat_message("user"):
|
337 |
-
st.markdown(text_input)
|
338 |
-
|
339 |
-
with st.chat_message("assistant"):
|
340 |
-
completion = openai_client.chat.completions.create(
|
341 |
-
model=st.session_state["openai_model"],
|
342 |
-
messages=[
|
343 |
-
{"role": m["role"], "content": m["content"]}
|
344 |
-
for m in st.session_state.messages
|
345 |
-
],
|
346 |
-
stream=False
|
347 |
-
)
|
348 |
-
return_text = completion.choices[0].message.content
|
349 |
-
st.write("GPT-4o: " + return_text)
|
350 |
-
|
351 |
-
# Add TTS playback
|
352 |
-
filename, audio_html = process_tts(return_text, st.session_state.selected_voice)
|
353 |
-
if audio_html:
|
354 |
-
st.markdown(audio_html, unsafe_allow_html=True)
|
355 |
-
|
356 |
-
# Original file handling
|
357 |
-
filename = generate_filename("GPT-4o: " + return_text, "md")
|
358 |
-
create_file(filename, text_input, return_text)
|
359 |
-
st.session_state.messages.append({"role": "assistant", "content": return_text})
|
360 |
-
return return_text
|
361 |
-
|
362 |
-
def process_with_claude(text_input):
|
363 |
-
"""Process text with Claude and add TTS."""
|
364 |
-
if text_input:
|
365 |
-
with st.chat_message("user"):
|
366 |
-
st.markdown(text_input)
|
367 |
-
|
368 |
-
with st.chat_message("assistant"):
|
369 |
-
response = claude_client.messages.create(
|
370 |
-
model="claude-3-sonnet-20240229",
|
371 |
-
max_tokens=1000,
|
372 |
-
messages=[
|
373 |
-
{"role": "user", "content": text_input}
|
374 |
-
]
|
375 |
-
)
|
376 |
-
response_text = response.content[0].text
|
377 |
-
st.write("Claude: " + response_text)
|
378 |
-
|
379 |
-
# Add TTS playback
|
380 |
-
filename, audio_html = process_tts(response_text, st.session_state.selected_voice)
|
381 |
-
if audio_html:
|
382 |
-
st.markdown(audio_html, unsafe_allow_html=True)
|
383 |
-
|
384 |
-
# Original file handling
|
385 |
-
filename = generate_filename("Claude: " + response_text, "md")
|
386 |
-
create_file(filename, text_input, response_text)
|
387 |
-
|
388 |
-
st.session_state.chat_history.append({
|
389 |
-
"user": text_input,
|
390 |
-
"claude": response_text
|
391 |
-
})
|
392 |
-
return response_text
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
|
400 |
def extract_urls(text):
|
401 |
try:
|
@@ -751,9 +635,11 @@ speech_recognition_html = """
|
|
751 |
const output = document.getElementById('output');
|
752 |
let fullTranscript = '';
|
753 |
let lastUpdateTime = Date.now();
|
|
|
754 |
// Configure recognition
|
755 |
recognition.continuous = true;
|
756 |
recognition.interimResults = true;
|
|
|
757 |
// Function to start recognition
|
758 |
const startRecognition = () => {
|
759 |
try {
|
@@ -763,21 +649,24 @@ speech_recognition_html = """
|
|
763 |
stopButton.disabled = false;
|
764 |
} catch (e) {
|
765 |
console.error(e);
|
766 |
-
#status.textContent = '';
|
767 |
status.textContent = 'Error: ' + e.message;
|
768 |
}
|
769 |
};
|
|
|
770 |
// Auto-start on load
|
771 |
window.addEventListener('load', () => {
|
772 |
setTimeout(startRecognition, 1000);
|
773 |
});
|
|
|
774 |
startButton.onclick = startRecognition;
|
|
|
775 |
stopButton.onclick = () => {
|
776 |
recognition.stop();
|
777 |
status.textContent = 'Stopped';
|
778 |
startButton.disabled = false;
|
779 |
stopButton.disabled = true;
|
780 |
};
|
|
|
781 |
clearButton.onclick = () => {
|
782 |
fullTranscript = '';
|
783 |
output.textContent = '';
|
@@ -785,9 +674,11 @@ speech_recognition_html = """
|
|
785 |
type: 'clear_transcript',
|
786 |
}, '*');
|
787 |
};
|
|
|
788 |
recognition.onresult = (event) => {
|
789 |
let interimTranscript = '';
|
790 |
let finalTranscript = '';
|
|
|
791 |
for (let i = event.resultIndex; i < event.results.length; i++) {
|
792 |
const transcript = event.results[i][0].transcript;
|
793 |
if (event.results[i].isFinal) {
|
@@ -796,6 +687,7 @@ speech_recognition_html = """
|
|
796 |
interimTranscript += transcript;
|
797 |
}
|
798 |
}
|
|
|
799 |
if (finalTranscript || (Date.now() - lastUpdateTime > 5000)) {
|
800 |
if (finalTranscript) {
|
801 |
fullTranscript += finalTranscript;
|
@@ -805,10 +697,14 @@ speech_recognition_html = """
|
|
805 |
}
|
806 |
lastUpdateTime = Date.now();
|
807 |
}
|
|
|
808 |
output.textContent = fullTranscript + (interimTranscript ? '... ' + interimTranscript : '');
|
809 |
output.scrollTop = output.scrollHeight;
|
|
|
810 |
document.getElementById('streamlit-data').value = fullTranscript;
|
|
|
811 |
};
|
|
|
812 |
recognition.onend = () => {
|
813 |
if (!stopButton.disabled) {
|
814 |
try {
|
@@ -822,6 +718,7 @@ speech_recognition_html = """
|
|
822 |
}
|
823 |
}
|
824 |
};
|
|
|
825 |
recognition.onerror = (event) => {
|
826 |
console.error('Recognition error:', event.error);
|
827 |
status.textContent = 'Error: ' + event.error;
|
@@ -986,104 +883,45 @@ def main():
|
|
986 |
transcript = st.components.v1.html(speech_recognition_html, height=400)
|
987 |
|
988 |
# Update session state if there's new data
|
989 |
-
|
990 |
-
if showTranscript:
|
991 |
-
#transcript is not None and transcript != "":
|
992 |
st.session_state.voice_transcript = transcript
|
993 |
-
|
994 |
-
# Display the transcript in a Streamlit text area
|
995 |
-
st.markdown("### Processed Voice Input:")
|
996 |
-
st.text_area("Voice Transcript", st.session_state.voice_transcript, height=100)
|
997 |
-
|
998 |
-
# Add functionality to process the transcript
|
999 |
-
if st.button("Process Transcript"):
|
1000 |
-
st.subheader("AI Response to Transcript")
|
1001 |
-
gpt_response = process_with_gpt(st.session_state.voice_transcript)
|
1002 |
-
st.markdown(gpt_response)
|
1003 |
-
|
1004 |
-
# Option to clear the transcript
|
1005 |
-
if st.button("Clear Transcript"):
|
1006 |
-
st.session_state.voice_transcript = ""
|
1007 |
-
st.rerun()
|
1008 |
-
|
1009 |
-
|
1010 |
-
# Buttons to process the transcript
|
1011 |
-
if st.button("Search with GPT"):
|
1012 |
-
st.subheader("GPT-4o Response")
|
1013 |
-
gpt_response = process_with_gpt(st.session_state.voice_transcript)
|
1014 |
-
st.markdown(gpt_response)
|
1015 |
-
|
1016 |
-
if st.button("Search with Claude"):
|
1017 |
-
st.subheader("Claude Response")
|
1018 |
-
claude_response = process_with_claude(st.session_state.voice_transcript)
|
1019 |
-
st.markdown(claude_response)
|
1020 |
-
|
1021 |
-
if st.button("Search ArXiv"):
|
1022 |
-
st.subheader("ArXiv Search Results")
|
1023 |
-
arxiv_results = perform_ai_lookup(st.session_state.voice_transcript)
|
1024 |
-
st.markdown(arxiv_results)
|
1025 |
-
|
1026 |
-
|
1027 |
-
# Display last voice input
|
1028 |
-
if st.session_state.last_voice_input:
|
1029 |
-
st.text_area("Last Voice Input:", st.session_state.last_voice_input, height=100)
|
1030 |
-
|
1031 |
-
|
1032 |
-
# Model Selection
|
1033 |
-
model_choice = st.sidebar.radio(
|
1034 |
-
"Choose AI Model:",
|
1035 |
-
["GPT-4o", "Claude-3", "GPT+Claude+Arxiv"]
|
1036 |
-
)
|
1037 |
-
|
1038 |
-
# Chat Interface
|
1039 |
-
user_input = st.text_area("Message:", height=100)
|
1040 |
|
1041 |
-
|
1042 |
-
|
1043 |
-
|
1044 |
-
gpt_response = process_with_gpt(user_input)
|
1045 |
-
elif model_choice == "Claude-3":
|
1046 |
-
claude_response = process_with_claude(user_input)
|
1047 |
-
else: # Both
|
1048 |
-
col1, col2, col3 = st.columns(3)
|
1049 |
-
with col2:
|
1050 |
-
st.subheader("Claude-3.5 Sonnet:")
|
1051 |
-
try:
|
1052 |
-
claude_response = process_with_claude(user_input)
|
1053 |
-
except:
|
1054 |
-
st.write('Claude 3.5 Sonnet out of tokens.')
|
1055 |
-
with col1:
|
1056 |
-
st.subheader("GPT-4o Omni:")
|
1057 |
-
try:
|
1058 |
-
gpt_response = process_with_gpt(user_input)
|
1059 |
-
except:
|
1060 |
-
st.write('GPT 4o out of tokens')
|
1061 |
-
with col3:
|
1062 |
-
st.subheader("Arxiv and Mistral Research:")
|
1063 |
-
with st.spinner("Searching ArXiv..."):
|
1064 |
-
#results = search_arxiv(user_input)
|
1065 |
-
results = perform_ai_lookup(user_input)
|
1066 |
-
|
1067 |
-
st.markdown(results)
|
1068 |
-
|
1069 |
-
# Display Chat History
|
1070 |
-
st.subheader("Chat History 📜")
|
1071 |
-
tab1, tab2 = st.tabs(["Claude History", "GPT-4o History"])
|
1072 |
|
1073 |
-
|
1074 |
-
|
1075 |
-
|
1076 |
-
|
1077 |
-
|
1078 |
|
1079 |
-
|
1080 |
-
|
1081 |
-
|
1082 |
-
|
1083 |
-
|
1084 |
-
|
1085 |
-
# ------------------------------------------------------- ************************* --->
|
1086 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1087 |
|
1088 |
|
1089 |
if tab_main == "💬 Chat":
|
|
|
280 |
)
|
281 |
return response.choices[0].message.content
|
282 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
283 |
|
284 |
def extract_urls(text):
|
285 |
try:
|
|
|
635 |
const output = document.getElementById('output');
|
636 |
let fullTranscript = '';
|
637 |
let lastUpdateTime = Date.now();
|
638 |
+
|
639 |
// Configure recognition
|
640 |
recognition.continuous = true;
|
641 |
recognition.interimResults = true;
|
642 |
+
|
643 |
// Function to start recognition
|
644 |
const startRecognition = () => {
|
645 |
try {
|
|
|
649 |
stopButton.disabled = false;
|
650 |
} catch (e) {
|
651 |
console.error(e);
|
|
|
652 |
status.textContent = 'Error: ' + e.message;
|
653 |
}
|
654 |
};
|
655 |
+
|
656 |
// Auto-start on load
|
657 |
window.addEventListener('load', () => {
|
658 |
setTimeout(startRecognition, 1000);
|
659 |
});
|
660 |
+
|
661 |
startButton.onclick = startRecognition;
|
662 |
+
|
663 |
stopButton.onclick = () => {
|
664 |
recognition.stop();
|
665 |
status.textContent = 'Stopped';
|
666 |
startButton.disabled = false;
|
667 |
stopButton.disabled = true;
|
668 |
};
|
669 |
+
|
670 |
clearButton.onclick = () => {
|
671 |
fullTranscript = '';
|
672 |
output.textContent = '';
|
|
|
674 |
type: 'clear_transcript',
|
675 |
}, '*');
|
676 |
};
|
677 |
+
|
678 |
recognition.onresult = (event) => {
|
679 |
let interimTranscript = '';
|
680 |
let finalTranscript = '';
|
681 |
+
|
682 |
for (let i = event.resultIndex; i < event.results.length; i++) {
|
683 |
const transcript = event.results[i][0].transcript;
|
684 |
if (event.results[i].isFinal) {
|
|
|
687 |
interimTranscript += transcript;
|
688 |
}
|
689 |
}
|
690 |
+
|
691 |
if (finalTranscript || (Date.now() - lastUpdateTime > 5000)) {
|
692 |
if (finalTranscript) {
|
693 |
fullTranscript += finalTranscript;
|
|
|
697 |
}
|
698 |
lastUpdateTime = Date.now();
|
699 |
}
|
700 |
+
|
701 |
output.textContent = fullTranscript + (interimTranscript ? '... ' + interimTranscript : '');
|
702 |
output.scrollTop = output.scrollHeight;
|
703 |
+
|
704 |
document.getElementById('streamlit-data').value = fullTranscript;
|
705 |
+
|
706 |
};
|
707 |
+
|
708 |
recognition.onend = () => {
|
709 |
if (!stopButton.disabled) {
|
710 |
try {
|
|
|
718 |
}
|
719 |
}
|
720 |
};
|
721 |
+
|
722 |
recognition.onerror = (event) => {
|
723 |
console.error('Recognition error:', event.error);
|
724 |
status.textContent = 'Error: ' + event.error;
|
|
|
883 |
transcript = st.components.v1.html(speech_recognition_html, height=400)
|
884 |
|
885 |
# Update session state if there's new data
|
886 |
+
if transcript is not None and transcript != "":
|
|
|
|
|
887 |
st.session_state.voice_transcript = transcript
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
888 |
|
889 |
+
# Display the transcript in a Streamlit text area
|
890 |
+
st.markdown("### Processed Voice Input:")
|
891 |
+
st.text_area("Voice Transcript", st.session_state.voice_transcript, height=100)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
892 |
|
893 |
+
# Add functionality to process the transcript
|
894 |
+
if st.button("Process Transcript"):
|
895 |
+
st.subheader("AI Response to Transcript")
|
896 |
+
gpt_response = process_with_gpt(st.session_state.voice_transcript)
|
897 |
+
st.markdown(gpt_response)
|
898 |
|
899 |
+
# Option to clear the transcript
|
900 |
+
if st.button("Clear Transcript"):
|
901 |
+
st.session_state.voice_transcript = ""
|
902 |
+
st.rerun()
|
903 |
+
|
|
|
|
|
904 |
|
905 |
+
# Buttons to process the transcript
|
906 |
+
if st.button("Search with GPT"):
|
907 |
+
st.subheader("GPT-4o Response")
|
908 |
+
gpt_response = process_with_gpt(st.session_state.voice_transcript)
|
909 |
+
st.markdown(gpt_response)
|
910 |
+
|
911 |
+
if st.button("Search with Claude"):
|
912 |
+
st.subheader("Claude Response")
|
913 |
+
claude_response = process_with_claude(st.session_state.voice_transcript)
|
914 |
+
st.markdown(claude_response)
|
915 |
+
|
916 |
+
if st.button("Search ArXiv"):
|
917 |
+
st.subheader("ArXiv Search Results")
|
918 |
+
arxiv_results = perform_ai_lookup(st.session_state.voice_transcript)
|
919 |
+
st.markdown(arxiv_results)
|
920 |
+
|
921 |
+
|
922 |
+
# Display last voice input
|
923 |
+
if st.session_state.last_voice_input:
|
924 |
+
st.text_area("Last Voice Input:", st.session_state.last_voice_input, height=100)
|
925 |
|
926 |
|
927 |
if tab_main == "💬 Chat":
|