Update app.py
Browse files
app.py
CHANGED
|
@@ -2,6 +2,9 @@ import gradio as gr
|
|
| 2 |
import openai
|
| 3 |
import fitz # PyMuPDF for PDF processing
|
| 4 |
import base64
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
# Variable to store API key
|
| 7 |
api_key = ""
|
|
@@ -45,7 +48,7 @@ def image_url_chat(image_url, text_query, temperature, top_p, max_output_tokens)
|
|
| 45 |
{"role": "user", "content": [
|
| 46 |
{"type": "image_url", "image_url": {"url": image_url}}, # Corrected format
|
| 47 |
{"type": "text", "text": text_query}
|
| 48 |
-
]}
|
| 49 |
]
|
| 50 |
return query_openai(messages, temperature, top_p, max_output_tokens)
|
| 51 |
|
|
@@ -72,7 +75,7 @@ def image_chat(image_file, text_query, temperature, top_p, max_output_tokens):
|
|
| 72 |
{"role": "user", "content": [
|
| 73 |
{"type": "image_url", "image_url": {"url": image_data}}, # Fixed format
|
| 74 |
{"type": "text", "text": text_query}
|
| 75 |
-
]}
|
| 76 |
]
|
| 77 |
return query_openai(messages, temperature, top_p, max_output_tokens)
|
| 78 |
|
|
@@ -89,10 +92,33 @@ def pdf_chat(pdf_file, text_query, temperature, top_p, max_output_tokens):
|
|
| 89 |
{"role": "user", "content": [
|
| 90 |
{"type": "text", "text": text}, # Fixed format
|
| 91 |
{"type": "text", "text": text_query}
|
| 92 |
-
]}
|
| 93 |
]
|
| 94 |
return query_openai(messages, temperature, top_p, max_output_tokens)
|
| 95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
# Function to clear the chat (Fix: Returns the correct number of outputs)
|
| 97 |
def clear_chat():
|
| 98 |
return "", "", "", "", "", "", "", None, "", None, "", 1.0, 1.0, 2048
|
|
@@ -144,6 +170,12 @@ with gr.Blocks() as demo:
|
|
| 144 |
pdf_output = gr.Textbox(label="Response", interactive=False)
|
| 145 |
pdf_button = gr.Button("Ask")
|
| 146 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
# Clear chat button
|
| 148 |
clear_button = gr.Button("Clear Chat")
|
| 149 |
|
|
@@ -153,6 +185,7 @@ with gr.Blocks() as demo:
|
|
| 153 |
text_button.click(text_chat, [text_query, temperature, top_p, max_output_tokens], text_output)
|
| 154 |
image_button.click(image_chat, [image_upload, image_text_query, temperature, top_p, max_output_tokens], image_output)
|
| 155 |
pdf_button.click(pdf_chat, [pdf_upload, pdf_text_query, temperature, top_p, max_output_tokens], pdf_output)
|
|
|
|
| 156 |
|
| 157 |
# Fix: Clear button resets all necessary fields correctly
|
| 158 |
clear_button.click(
|
|
|
|
| 2 |
import openai
|
| 3 |
import fitz # PyMuPDF for PDF processing
|
| 4 |
import base64
|
| 5 |
+
import openai
|
| 6 |
+
import soundfile as sf
|
| 7 |
+
import io
|
| 8 |
|
| 9 |
# Variable to store API key
|
| 10 |
api_key = ""
|
|
|
|
| 48 |
{"role": "user", "content": [
|
| 49 |
{"type": "image_url", "image_url": {"url": image_url}}, # Corrected format
|
| 50 |
{"type": "text", "text": text_query}
|
| 51 |
+
]},
|
| 52 |
]
|
| 53 |
return query_openai(messages, temperature, top_p, max_output_tokens)
|
| 54 |
|
|
|
|
| 75 |
{"role": "user", "content": [
|
| 76 |
{"type": "image_url", "image_url": {"url": image_data}}, # Fixed format
|
| 77 |
{"type": "text", "text": text_query}
|
| 78 |
+
]},
|
| 79 |
]
|
| 80 |
return query_openai(messages, temperature, top_p, max_output_tokens)
|
| 81 |
|
|
|
|
| 92 |
{"role": "user", "content": [
|
| 93 |
{"type": "text", "text": text}, # Fixed format
|
| 94 |
{"type": "text", "text": text_query}
|
| 95 |
+
]},
|
| 96 |
]
|
| 97 |
return query_openai(messages, temperature, top_p, max_output_tokens)
|
| 98 |
|
| 99 |
+
# Function to process uploaded audio and transcribe to text
|
| 100 |
+
def transcribe_audio(audio_file):
|
| 101 |
+
if audio_file is None:
|
| 102 |
+
return "Please upload an audio file."
|
| 103 |
+
|
| 104 |
+
try:
|
| 105 |
+
# Load the audio file
|
| 106 |
+
audio_data, samplerate = sf.read(audio_file.name)
|
| 107 |
+
audio_buffer = io.BytesIO()
|
| 108 |
+
sf.write(audio_buffer, audio_data, samplerate, format='WAV')
|
| 109 |
+
audio_buffer.seek(0)
|
| 110 |
+
|
| 111 |
+
# Transcribe the audio using OpenAI's Whisper API
|
| 112 |
+
transcript = openai.Audio.transcribe(
|
| 113 |
+
model="whisper-1",
|
| 114 |
+
file=audio_buffer
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
return transcript["text"]
|
| 118 |
+
|
| 119 |
+
except Exception as e:
|
| 120 |
+
return f"Error in transcription: {str(e)}"
|
| 121 |
+
|
| 122 |
# Function to clear the chat (Fix: Returns the correct number of outputs)
|
| 123 |
def clear_chat():
|
| 124 |
return "", "", "", "", "", "", "", None, "", None, "", 1.0, 1.0, 2048
|
|
|
|
| 170 |
pdf_output = gr.Textbox(label="Response", interactive=False)
|
| 171 |
pdf_button = gr.Button("Ask")
|
| 172 |
|
| 173 |
+
with gr.Tab("Voice Chat"):
|
| 174 |
+
audio_upload = gr.File(label="Upload an Audio File", type="file")
|
| 175 |
+
audio_query = gr.Textbox(label="Ask about the transcription")
|
| 176 |
+
audio_output = gr.Textbox(label="Response", interactive=False)
|
| 177 |
+
audio_button = gr.Button("Ask")
|
| 178 |
+
|
| 179 |
# Clear chat button
|
| 180 |
clear_button = gr.Button("Clear Chat")
|
| 181 |
|
|
|
|
| 185 |
text_button.click(text_chat, [text_query, temperature, top_p, max_output_tokens], text_output)
|
| 186 |
image_button.click(image_chat, [image_upload, image_text_query, temperature, top_p, max_output_tokens], image_output)
|
| 187 |
pdf_button.click(pdf_chat, [pdf_upload, pdf_text_query, temperature, top_p, max_output_tokens], pdf_output)
|
| 188 |
+
audio_button.click(lambda audio, query, temperature, top_p, max_output_tokens: query_openai([{"role": "user", "content": [{"type": "text", "text": transcribe_audio(audio)}, {"type": "text", "text": query}]}], temperature, top_p, max_output_tokens), [audio_upload, audio_query, temperature, top_p, max_output_tokens], audio_output)
|
| 189 |
|
| 190 |
# Fix: Clear button resets all necessary fields correctly
|
| 191 |
clear_button.click(
|