awacke1 commited on
Commit
fe1df36
·
verified ·
1 Parent(s): 22e76cc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -210
app.py CHANGED
@@ -280,122 +280,6 @@ def process_video_with_gpt(video_input, user_prompt):
280
  )
281
  return response.choices[0].message.content
282
 
283
-
284
-
285
- def process_tts(text, voice="alloy"):
286
- """
287
- Process text-to-speech using OpenAI's TTS API
288
- Voices available: alloy, echo, fable, onyx, nova, shimmer
289
- """
290
- try:
291
- response = openai_client.audio.speech.create(
292
- model="tts-1",
293
- voice=voice,
294
- input=text
295
- )
296
-
297
- # Generate a unique filename
298
- filename = generate_filename("tts_output", "mp3")
299
-
300
- # Save the audio file
301
- response.stream_to_file(filename)
302
-
303
- # Create audio player HTML
304
- audio_html = f"""
305
- <audio controls>
306
- <source src="data:audio/mp3;base64,{base64.b64encode(open(filename, 'rb').read()).decode()}" type="audio/mp3">
307
- Your browser does not support the audio element.
308
- </audio>
309
- """
310
-
311
- return filename, audio_html
312
- except Exception as e:
313
- st.error(f"TTS Error: {str(e)}")
314
- return None, None
315
-
316
- def update_chat_interface():
317
- """Update the chat interface to include voice selection and TTS playback"""
318
- # Add voice selection to sidebar
319
- st.sidebar.markdown("### 🗣️ TTS Voice Settings")
320
- selected_voice = st.sidebar.selectbox(
321
- "Choose TTS Voice:",
322
- ["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
323
- help="Select the voice for text-to-speech playback"
324
- )
325
-
326
- # Store the selected voice in session state
327
- if "selected_voice" not in st.session_state:
328
- st.session_state.selected_voice = selected_voice
329
-
330
- # Modify the chat processing functions to include TTS
331
- def process_with_gpt(text_input):
332
- """Process text with GPT-4o and add TTS."""
333
- if text_input:
334
- st.session_state.messages.append({"role": "user", "content": text_input})
335
-
336
- with st.chat_message("user"):
337
- st.markdown(text_input)
338
-
339
- with st.chat_message("assistant"):
340
- completion = openai_client.chat.completions.create(
341
- model=st.session_state["openai_model"],
342
- messages=[
343
- {"role": m["role"], "content": m["content"]}
344
- for m in st.session_state.messages
345
- ],
346
- stream=False
347
- )
348
- return_text = completion.choices[0].message.content
349
- st.write("GPT-4o: " + return_text)
350
-
351
- # Add TTS playback
352
- filename, audio_html = process_tts(return_text, st.session_state.selected_voice)
353
- if audio_html:
354
- st.markdown(audio_html, unsafe_allow_html=True)
355
-
356
- # Original file handling
357
- filename = generate_filename("GPT-4o: " + return_text, "md")
358
- create_file(filename, text_input, return_text)
359
- st.session_state.messages.append({"role": "assistant", "content": return_text})
360
- return return_text
361
-
362
- def process_with_claude(text_input):
363
- """Process text with Claude and add TTS."""
364
- if text_input:
365
- with st.chat_message("user"):
366
- st.markdown(text_input)
367
-
368
- with st.chat_message("assistant"):
369
- response = claude_client.messages.create(
370
- model="claude-3-sonnet-20240229",
371
- max_tokens=1000,
372
- messages=[
373
- {"role": "user", "content": text_input}
374
- ]
375
- )
376
- response_text = response.content[0].text
377
- st.write("Claude: " + response_text)
378
-
379
- # Add TTS playback
380
- filename, audio_html = process_tts(response_text, st.session_state.selected_voice)
381
- if audio_html:
382
- st.markdown(audio_html, unsafe_allow_html=True)
383
-
384
- # Original file handling
385
- filename = generate_filename("Claude: " + response_text, "md")
386
- create_file(filename, text_input, response_text)
387
-
388
- st.session_state.chat_history.append({
389
- "user": text_input,
390
- "claude": response_text
391
- })
392
- return response_text
393
-
394
-
395
-
396
-
397
-
398
-
399
 
400
  def extract_urls(text):
401
  try:
@@ -751,9 +635,11 @@ speech_recognition_html = """
751
  const output = document.getElementById('output');
752
  let fullTranscript = '';
753
  let lastUpdateTime = Date.now();
 
754
  // Configure recognition
755
  recognition.continuous = true;
756
  recognition.interimResults = true;
 
757
  // Function to start recognition
758
  const startRecognition = () => {
759
  try {
@@ -763,21 +649,24 @@ speech_recognition_html = """
763
  stopButton.disabled = false;
764
  } catch (e) {
765
  console.error(e);
766
- #status.textContent = '';
767
  status.textContent = 'Error: ' + e.message;
768
  }
769
  };
 
770
  // Auto-start on load
771
  window.addEventListener('load', () => {
772
  setTimeout(startRecognition, 1000);
773
  });
 
774
  startButton.onclick = startRecognition;
 
775
  stopButton.onclick = () => {
776
  recognition.stop();
777
  status.textContent = 'Stopped';
778
  startButton.disabled = false;
779
  stopButton.disabled = true;
780
  };
 
781
  clearButton.onclick = () => {
782
  fullTranscript = '';
783
  output.textContent = '';
@@ -785,9 +674,11 @@ speech_recognition_html = """
785
  type: 'clear_transcript',
786
  }, '*');
787
  };
 
788
  recognition.onresult = (event) => {
789
  let interimTranscript = '';
790
  let finalTranscript = '';
 
791
  for (let i = event.resultIndex; i < event.results.length; i++) {
792
  const transcript = event.results[i][0].transcript;
793
  if (event.results[i].isFinal) {
@@ -796,6 +687,7 @@ speech_recognition_html = """
796
  interimTranscript += transcript;
797
  }
798
  }
 
799
  if (finalTranscript || (Date.now() - lastUpdateTime > 5000)) {
800
  if (finalTranscript) {
801
  fullTranscript += finalTranscript;
@@ -805,10 +697,14 @@ speech_recognition_html = """
805
  }
806
  lastUpdateTime = Date.now();
807
  }
 
808
  output.textContent = fullTranscript + (interimTranscript ? '... ' + interimTranscript : '');
809
  output.scrollTop = output.scrollHeight;
 
810
  document.getElementById('streamlit-data').value = fullTranscript;
 
811
  };
 
812
  recognition.onend = () => {
813
  if (!stopButton.disabled) {
814
  try {
@@ -822,6 +718,7 @@ speech_recognition_html = """
822
  }
823
  }
824
  };
 
825
  recognition.onerror = (event) => {
826
  console.error('Recognition error:', event.error);
827
  status.textContent = 'Error: ' + event.error;
@@ -986,104 +883,45 @@ def main():
986
  transcript = st.components.v1.html(speech_recognition_html, height=400)
987
 
988
  # Update session state if there's new data
989
- showTranscript=False
990
- if showTranscript:
991
- #transcript is not None and transcript != "":
992
  st.session_state.voice_transcript = transcript
993
-
994
- # Display the transcript in a Streamlit text area
995
- st.markdown("### Processed Voice Input:")
996
- st.text_area("Voice Transcript", st.session_state.voice_transcript, height=100)
997
-
998
- # Add functionality to process the transcript
999
- if st.button("Process Transcript"):
1000
- st.subheader("AI Response to Transcript")
1001
- gpt_response = process_with_gpt(st.session_state.voice_transcript)
1002
- st.markdown(gpt_response)
1003
-
1004
- # Option to clear the transcript
1005
- if st.button("Clear Transcript"):
1006
- st.session_state.voice_transcript = ""
1007
- st.rerun()
1008
-
1009
-
1010
- # Buttons to process the transcript
1011
- if st.button("Search with GPT"):
1012
- st.subheader("GPT-4o Response")
1013
- gpt_response = process_with_gpt(st.session_state.voice_transcript)
1014
- st.markdown(gpt_response)
1015
-
1016
- if st.button("Search with Claude"):
1017
- st.subheader("Claude Response")
1018
- claude_response = process_with_claude(st.session_state.voice_transcript)
1019
- st.markdown(claude_response)
1020
-
1021
- if st.button("Search ArXiv"):
1022
- st.subheader("ArXiv Search Results")
1023
- arxiv_results = perform_ai_lookup(st.session_state.voice_transcript)
1024
- st.markdown(arxiv_results)
1025
-
1026
-
1027
- # Display last voice input
1028
- if st.session_state.last_voice_input:
1029
- st.text_area("Last Voice Input:", st.session_state.last_voice_input, height=100)
1030
-
1031
-
1032
- # Model Selection
1033
- model_choice = st.sidebar.radio(
1034
- "Choose AI Model:",
1035
- ["GPT-4o", "Claude-3", "GPT+Claude+Arxiv"]
1036
- )
1037
-
1038
- # Chat Interface
1039
- user_input = st.text_area("Message:", height=100)
1040
 
1041
- if st.button("Send 📨"):
1042
- if user_input:
1043
- if model_choice == "GPT-4o":
1044
- gpt_response = process_with_gpt(user_input)
1045
- elif model_choice == "Claude-3":
1046
- claude_response = process_with_claude(user_input)
1047
- else: # Both
1048
- col1, col2, col3 = st.columns(3)
1049
- with col2:
1050
- st.subheader("Claude-3.5 Sonnet:")
1051
- try:
1052
- claude_response = process_with_claude(user_input)
1053
- except:
1054
- st.write('Claude 3.5 Sonnet out of tokens.')
1055
- with col1:
1056
- st.subheader("GPT-4o Omni:")
1057
- try:
1058
- gpt_response = process_with_gpt(user_input)
1059
- except:
1060
- st.write('GPT 4o out of tokens')
1061
- with col3:
1062
- st.subheader("Arxiv and Mistral Research:")
1063
- with st.spinner("Searching ArXiv..."):
1064
- #results = search_arxiv(user_input)
1065
- results = perform_ai_lookup(user_input)
1066
-
1067
- st.markdown(results)
1068
-
1069
- # Display Chat History
1070
- st.subheader("Chat History 📜")
1071
- tab1, tab2 = st.tabs(["Claude History", "GPT-4o History"])
1072
 
1073
- with tab1:
1074
- for chat in st.session_state.chat_history:
1075
- st.text_area("You:", chat["user"], height=100)
1076
- st.text_area("Claude:", chat["claude"], height=200)
1077
- st.markdown(chat["claude"])
1078
 
1079
- with tab2:
1080
- for message in st.session_state.messages:
1081
- with st.chat_message(message["role"]):
1082
- st.markdown(message["content"])
1083
-
1084
-
1085
- # ------------------------------------------------------- ************************* --->
1086
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1087
 
1088
 
1089
  if tab_main == "💬 Chat":
 
280
  )
281
  return response.choices[0].message.content
282
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
 
284
  def extract_urls(text):
285
  try:
 
635
  const output = document.getElementById('output');
636
  let fullTranscript = '';
637
  let lastUpdateTime = Date.now();
638
+
639
  // Configure recognition
640
  recognition.continuous = true;
641
  recognition.interimResults = true;
642
+
643
  // Function to start recognition
644
  const startRecognition = () => {
645
  try {
 
649
  stopButton.disabled = false;
650
  } catch (e) {
651
  console.error(e);
 
652
  status.textContent = 'Error: ' + e.message;
653
  }
654
  };
655
+
656
  // Auto-start on load
657
  window.addEventListener('load', () => {
658
  setTimeout(startRecognition, 1000);
659
  });
660
+
661
  startButton.onclick = startRecognition;
662
+
663
  stopButton.onclick = () => {
664
  recognition.stop();
665
  status.textContent = 'Stopped';
666
  startButton.disabled = false;
667
  stopButton.disabled = true;
668
  };
669
+
670
  clearButton.onclick = () => {
671
  fullTranscript = '';
672
  output.textContent = '';
 
674
  type: 'clear_transcript',
675
  }, '*');
676
  };
677
+
678
  recognition.onresult = (event) => {
679
  let interimTranscript = '';
680
  let finalTranscript = '';
681
+
682
  for (let i = event.resultIndex; i < event.results.length; i++) {
683
  const transcript = event.results[i][0].transcript;
684
  if (event.results[i].isFinal) {
 
687
  interimTranscript += transcript;
688
  }
689
  }
690
+
691
  if (finalTranscript || (Date.now() - lastUpdateTime > 5000)) {
692
  if (finalTranscript) {
693
  fullTranscript += finalTranscript;
 
697
  }
698
  lastUpdateTime = Date.now();
699
  }
700
+
701
  output.textContent = fullTranscript + (interimTranscript ? '... ' + interimTranscript : '');
702
  output.scrollTop = output.scrollHeight;
703
+
704
  document.getElementById('streamlit-data').value = fullTranscript;
705
+
706
  };
707
+
708
  recognition.onend = () => {
709
  if (!stopButton.disabled) {
710
  try {
 
718
  }
719
  }
720
  };
721
+
722
  recognition.onerror = (event) => {
723
  console.error('Recognition error:', event.error);
724
  status.textContent = 'Error: ' + event.error;
 
883
  transcript = st.components.v1.html(speech_recognition_html, height=400)
884
 
885
  # Update session state if there's new data
886
+ if transcript is not None and transcript != "":
 
 
887
  st.session_state.voice_transcript = transcript
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
888
 
889
+ # Display the transcript in a Streamlit text area
890
+ st.markdown("### Processed Voice Input:")
891
+ st.text_area("Voice Transcript", st.session_state.voice_transcript, height=100)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
892
 
893
+ # Add functionality to process the transcript
894
+ if st.button("Process Transcript"):
895
+ st.subheader("AI Response to Transcript")
896
+ gpt_response = process_with_gpt(st.session_state.voice_transcript)
897
+ st.markdown(gpt_response)
898
 
899
+ # Option to clear the transcript
900
+ if st.button("Clear Transcript"):
901
+ st.session_state.voice_transcript = ""
902
+ st.rerun()
903
+
 
 
904
 
905
+ # Buttons to process the transcript
906
+ if st.button("Search with GPT"):
907
+ st.subheader("GPT-4o Response")
908
+ gpt_response = process_with_gpt(st.session_state.voice_transcript)
909
+ st.markdown(gpt_response)
910
+
911
+ if st.button("Search with Claude"):
912
+ st.subheader("Claude Response")
913
+ claude_response = process_with_claude(st.session_state.voice_transcript)
914
+ st.markdown(claude_response)
915
+
916
+ if st.button("Search ArXiv"):
917
+ st.subheader("ArXiv Search Results")
918
+ arxiv_results = perform_ai_lookup(st.session_state.voice_transcript)
919
+ st.markdown(arxiv_results)
920
+
921
+
922
+ # Display last voice input
923
+ if st.session_state.last_voice_input:
924
+ st.text_area("Last Voice Input:", st.session_state.last_voice_input, height=100)
925
 
926
 
927
  if tab_main == "💬 Chat":