Spaces:

shukdevdatta123
/

GPT-4.5-Multimodal-Chatbot

Running

shukdevdatta123 commited on Mar 15

Commit

0381207

verified ·

1 Parent(s): f386ba9

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -95,19 +95,15 @@ def pdf_chat(pdf_file, text_query, temperature, top_p, max_output_tokens):
     return query_openai(messages, temperature, top_p, max_output_tokens)
 # Function to transcribe audio to text using OpenAI Whisper API
-def transcribe_audio(audio, openai_api_key):
     if not openai_api_key:
         return "Error: No API key provided."
     openai.api_key = openai_api_key
     try:
-        # Open the audio file and pass it as a file object
-        with open(audio, 'rb') as audio_file:
-            audio_file_content = audio_file.read()
         # Use the correct transcription API call
-        audio_file_obj = io.BytesIO(audio_file_content)
         audio_file_obj.name = 'audio.wav'  # Set a name for the file object (as OpenAI expects it)
         # Transcribe the audio to text using OpenAI's whisper model
@@ -185,8 +181,8 @@ with gr.Blocks() as demo:
     # For Voice Chat
     audio_button.click(
-        lambda audio, query, temperature, top_p, max_output_tokens: query_openai(
-            [{"role": "user", "content": [{"type": "text", "text": transcribe_audio(audio.name, api_key)}, {"type": "text", "text": query}]}],
             temperature, top_p, max_output_tokens
         ), [audio_upload, audio_query, temperature, top_p, max_output_tokens], audio_output
     )

     return query_openai(messages, temperature, top_p, max_output_tokens)
 # Function to transcribe audio to text using OpenAI Whisper API
+def transcribe_audio(audio_binary, openai_api_key):
     if not openai_api_key:
         return "Error: No API key provided."
     openai.api_key = openai_api_key
     try:
         # Use the correct transcription API call
+        audio_file_obj = io.BytesIO(audio_binary)
         audio_file_obj.name = 'audio.wav'  # Set a name for the file object (as OpenAI expects it)
         # Transcribe the audio to text using OpenAI's whisper model
     # For Voice Chat
     audio_button.click(
+        lambda audio_binary, query, temperature, top_p, max_output_tokens: query_openai(
+            [{"role": "user", "content": [{"type": "text", "text": transcribe_audio(audio_binary, api_key)}, {"type": "text", "text": query}]}],
             temperature, top_p, max_output_tokens
         ), [audio_upload, audio_query, temperature, top_p, max_output_tokens], audio_output
     )