Update app.py
Browse files
app.py
CHANGED
@@ -3,7 +3,6 @@ import openai
|
|
3 |
import base64
|
4 |
from PIL import Image
|
5 |
import io
|
6 |
-
import os
|
7 |
|
8 |
# Function to send the request to OpenAI API with an image or text input
|
9 |
def generate_response(input_text, image, openai_api_key, reasoning_effort="medium", model_choice="o1"):
|
@@ -56,10 +55,19 @@ def transcribe_audio(audio, openai_api_key):
|
|
56 |
return "Error: No API key provided."
|
57 |
|
58 |
openai.api_key = openai_api_key
|
|
|
59 |
try:
|
60 |
-
#
|
61 |
-
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
except Exception as e:
|
64 |
return f"Error transcribing audio: {str(e)}"
|
65 |
|
@@ -256,7 +264,7 @@ def create_interface():
|
|
256 |
with gr.Row():
|
257 |
image_input = gr.Image(label="Upload an Image", type="pil") # Image upload input
|
258 |
input_text = gr.Textbox(label="Enter Text Question", placeholder="Ask a question or provide text", lines=2)
|
259 |
-
audio_input = gr.Audio(label="Upload or Record Audio", type="
|
260 |
|
261 |
with gr.Row():
|
262 |
reasoning_effort = gr.Dropdown(
|
|
|
3 |
import base64
|
4 |
from PIL import Image
|
5 |
import io
|
|
|
6 |
|
7 |
# Function to send the request to OpenAI API with an image or text input
|
8 |
def generate_response(input_text, image, openai_api_key, reasoning_effort="medium", model_choice="o1"):
|
|
|
55 |
return "Error: No API key provided."
|
56 |
|
57 |
openai.api_key = openai_api_key
|
58 |
+
|
59 |
try:
|
60 |
+
# Open the audio file and pass it as a file object
|
61 |
+
with open(audio.name, 'rb') as audio_file:
|
62 |
+
audio_file_content = audio_file.read()
|
63 |
+
|
64 |
+
# Use the correct transcription API call
|
65 |
+
audio_file_obj = io.BytesIO(audio_file_content)
|
66 |
+
audio_file_obj.name = 'audio.wav' # Set a name for the file object (as OpenAI expects it)
|
67 |
+
|
68 |
+
# Transcribe the audio to text using OpenAI's whisper model
|
69 |
+
audio_file_transcription = openai.Audio.transcribe(file=audio_file_obj, model="whisper-1")
|
70 |
+
return audio_file_transcription['text']
|
71 |
except Exception as e:
|
72 |
return f"Error transcribing audio: {str(e)}"
|
73 |
|
|
|
264 |
with gr.Row():
|
265 |
image_input = gr.Image(label="Upload an Image", type="pil") # Image upload input
|
266 |
input_text = gr.Textbox(label="Enter Text Question", placeholder="Ask a question or provide text", lines=2)
|
267 |
+
audio_input = gr.Audio(label="Upload or Record Audio", type="file") # Audio upload or record input
|
268 |
|
269 |
with gr.Row():
|
270 |
reasoning_effort = gr.Dropdown(
|