Spaces:

shukdevdatta123
/

Multi-modal-o1-Chatbot

Running

shukdevdatta123 commited on Mar 8

Commit

272a0b4

verified ·

1 Parent(s): 9b3ff8a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ import openai
 import base64
 from PIL import Image
 import io
-import os
 # Function to send the request to OpenAI API with an image or text input
 def generate_response(input_text, image, openai_api_key, reasoning_effort="medium", model_choice="o1"):
@@ -56,10 +55,19 @@ def transcribe_audio(audio, openai_api_key):
         return "Error: No API key provided."
     openai.api_key = openai_api_key
     try:
-        # Transcribe the audio to text using the correct method
-        audio_file = openai.Audio.transcribe(file=audio, model="whisper-1")
-        return audio_file['text']
     except Exception as e:
         return f"Error transcribing audio: {str(e)}"
@@ -256,7 +264,7 @@ def create_interface():
         with gr.Row():
             image_input = gr.Image(label="Upload an Image", type="pil")  # Image upload input
             input_text = gr.Textbox(label="Enter Text Question", placeholder="Ask a question or provide text", lines=2)
-            audio_input = gr.Audio(label="Upload or Record Audio", type="filepath")  # Audio upload or record input
         with gr.Row():
             reasoning_effort = gr.Dropdown(

 import base64
 from PIL import Image
 import io
 # Function to send the request to OpenAI API with an image or text input
 def generate_response(input_text, image, openai_api_key, reasoning_effort="medium", model_choice="o1"):
         return "Error: No API key provided."
     openai.api_key = openai_api_key
     try:
+        # Open the audio file and pass it as a file object
+        with open(audio.name, 'rb') as audio_file:
+            audio_file_content = audio_file.read()
+        # Use the correct transcription API call
+        audio_file_obj = io.BytesIO(audio_file_content)
+        audio_file_obj.name = 'audio.wav'  # Set a name for the file object (as OpenAI expects it)
+        # Transcribe the audio to text using OpenAI's whisper model
+        audio_file_transcription = openai.Audio.transcribe(file=audio_file_obj, model="whisper-1")
+        return audio_file_transcription['text']
     except Exception as e:
         return f"Error transcribing audio: {str(e)}"
         with gr.Row():
             image_input = gr.Image(label="Upload an Image", type="pil")  # Image upload input
             input_text = gr.Textbox(label="Enter Text Question", placeholder="Ask a question or provide text", lines=2)
+            audio_input = gr.Audio(label="Upload or Record Audio", type="file")  # Audio upload or record input
         with gr.Row():
             reasoning_effort = gr.Dropdown(