shukdevdatta123 commited on
Commit
272a0b4
·
verified ·
1 Parent(s): 9b3ff8a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -5
app.py CHANGED
@@ -3,7 +3,6 @@ import openai
3
  import base64
4
  from PIL import Image
5
  import io
6
- import os
7
 
8
  # Function to send the request to OpenAI API with an image or text input
9
  def generate_response(input_text, image, openai_api_key, reasoning_effort="medium", model_choice="o1"):
@@ -56,10 +55,19 @@ def transcribe_audio(audio, openai_api_key):
56
  return "Error: No API key provided."
57
 
58
  openai.api_key = openai_api_key
 
59
  try:
60
- # Transcribe the audio to text using the correct method
61
- audio_file = openai.Audio.transcribe(file=audio, model="whisper-1")
62
- return audio_file['text']
 
 
 
 
 
 
 
 
63
  except Exception as e:
64
  return f"Error transcribing audio: {str(e)}"
65
 
@@ -256,7 +264,7 @@ def create_interface():
256
  with gr.Row():
257
  image_input = gr.Image(label="Upload an Image", type="pil") # Image upload input
258
  input_text = gr.Textbox(label="Enter Text Question", placeholder="Ask a question or provide text", lines=2)
259
- audio_input = gr.Audio(label="Upload or Record Audio", type="filepath") # Audio upload or record input
260
 
261
  with gr.Row():
262
  reasoning_effort = gr.Dropdown(
 
3
  import base64
4
  from PIL import Image
5
  import io
 
6
 
7
  # Function to send the request to OpenAI API with an image or text input
8
  def generate_response(input_text, image, openai_api_key, reasoning_effort="medium", model_choice="o1"):
 
55
  return "Error: No API key provided."
56
 
57
  openai.api_key = openai_api_key
58
+
59
  try:
60
+ # Open the audio file and pass it as a file object
61
+ with open(audio.name, 'rb') as audio_file:
62
+ audio_file_content = audio_file.read()
63
+
64
+ # Use the correct transcription API call
65
+ audio_file_obj = io.BytesIO(audio_file_content)
66
+ audio_file_obj.name = 'audio.wav' # Set a name for the file object (as OpenAI expects it)
67
+
68
+ # Transcribe the audio to text using OpenAI's whisper model
69
+ audio_file_transcription = openai.Audio.transcribe(file=audio_file_obj, model="whisper-1")
70
+ return audio_file_transcription['text']
71
  except Exception as e:
72
  return f"Error transcribing audio: {str(e)}"
73
 
 
264
  with gr.Row():
265
  image_input = gr.Image(label="Upload an Image", type="pil") # Image upload input
266
  input_text = gr.Textbox(label="Enter Text Question", placeholder="Ask a question or provide text", lines=2)
267
+ audio_input = gr.Audio(label="Upload or Record Audio", type="file") # Audio upload or record input
268
 
269
  with gr.Row():
270
  reasoning_effort = gr.Dropdown(