shukdevdatta123's picture
Update abc2.txt
63e1f97 verified
import gradio as gr
import openai
import base64
from PIL import Image
import io
# Function to send the request to OpenAI API with an image or text input
def generate_response(input_text, image, openai_api_key, reasoning_effort="medium", model_choice="o1"):
if not openai_api_key:
return "Error: No API key provided."
openai.api_key = openai_api_key
# Process the input depending on whether it's text or an image
if image:
# Convert the image to base64 string
image_info = get_base64_string_from_image(image)
input_text = f"data:image/png;base64,{image_info}"
# Prepare the messages for OpenAI API
if model_choice == "o1":
if image:
messages = [
{"role": "user", "content": [{"type": "image_url", "image_url": {"url": input_text}}]}
]
else:
messages = [
{"role": "user", "content": [{"type": "text", "text": input_text}]}
]
elif model_choice == "o3-mini":
messages = [
{"role": "user", "content": [{"type": "text", "text": input_text}]}
]
try:
# Call OpenAI API with the selected model
response = openai.ChatCompletion.create(
model=model_choice, # Dynamically choose the model (o1 or o3-mini)
messages=messages,
reasoning_effort=reasoning_effort, # Set reasoning_effort for the response
max_completion_tokens=2000 # Limit response tokens to 2000
)
return response["choices"][0]["message"]["content"]
except Exception as e:
return f"Error calling OpenAI API: {str(e)}"
# Function to convert an uploaded image to a base64 string
def get_base64_string_from_image(pil_image):
# Convert PIL Image to bytes
buffered = io.BytesIO()
pil_image.save(buffered, format="PNG")
img_bytes = buffered.getvalue()
base64_str = base64.b64encode(img_bytes).decode("utf-8")
return base64_str
# Function to transcribe audio to text using OpenAI Whisper API
def transcribe_audio(audio, openai_api_key):
if not openai_api_key:
return "Error: No API key provided."
openai.api_key = openai_api_key
try:
# Open the audio file and pass it as a file object
with open(audio, 'rb') as audio_file:
audio_file_content = audio_file.read()
# Use the correct transcription API call
audio_file_obj = io.BytesIO(audio_file_content)
audio_file_obj.name = 'audio.wav' # Set a name for the file object (as OpenAI expects it)
# Transcribe the audio to text using OpenAI's whisper model
audio_file_transcription = openai.Audio.transcribe(file=audio_file_obj, model="whisper-1")
return audio_file_transcription['text']
except Exception as e:
return f"Error transcribing audio: {str(e)}"
# The function that will be used by Gradio interface
def chatbot(input_text, image, audio, openai_api_key, reasoning_effort, model_choice, history=[]):
# If there's audio, transcribe it to text
if audio:
input_text = transcribe_audio(audio, openai_api_key)
response = generate_response(input_text, image, openai_api_key, reasoning_effort, model_choice)
# Append the response to the history
history.append((f"User: {input_text}", f"Assistant: {response}"))
return "", history
# Function to clear the chat history
def clear_history():
return "", []
# Custom CSS styles with animations and button colors
custom_css = """
/* General body styles */
.gradio-container {
font-family: 'Arial', sans-serif;
background-color: #f8f9fa;
color: #333;
}
/* Header styles */
.gradio-header {
background-color: #007bff;
color: white;
padding: 20px;
text-align: center;
border-radius: 8px;
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
animation: fadeIn 1s ease-out;
}
.gradio-header h1 {
font-size: 2.5rem;
}
.gradio-header h3 {
font-size: 1.2rem;
margin-top: 10px;
}
/* Chatbot container styles */
.gradio-chatbot {
background-color: #fff;
border-radius: 10px;
padding: 20px;
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
max-height: 500px;
overflow-y: auto;
animation: fadeIn 2s ease-out;
}
/* Input field styles */
.gradio-textbox, .gradio-dropdown, .gradio-image, .gradio-audio {
border-radius: 8px;
border: 2px solid #ccc;
padding: 10px;
margin-bottom: 10px;
width: 100%;
font-size: 1rem;
transition: all 0.3s ease;
}
.gradio-textbox:focus, .gradio-dropdown:focus, .gradio-image:focus, .gradio-audio:focus {
border-color: #007bff;
}
/* Button styles */
/* Send Button: Sky Blue */
#submit-btn {
background-color: #00aaff; /* Sky blue */
color: white;
border: none;
border-radius: 8px;
padding: 10px 19px;
font-size: 1.1rem;
cursor: pointer;
transition: all 0.3s ease;
margin-left: auto;
margin-right: auto;
display: block;
margin-top: 10px;
}
#submit-btn:hover {
background-color: #0099cc; /* Slightly darker blue */
}
#submit-btn:active {
transform: scale(0.95);
}
#clear-history {
background-color: #f04e4e; /* Slightly Darker red */
color: white;
border: none;
border-radius: 8px;
padding: 10px 13px;
font-size: 1.1rem;
cursor: pointer;
transition: all 0.3s ease;
margin-top: 10px;
}
#clear-history:hover {
background-color: #f5a4a4; /* Light red */
}
#clear-history:active {
transform: scale(0.95);
}
/* Chat history styles */
.gradio-chatbot .message {
margin-bottom: 10px;
}
.gradio-chatbot .user {
background-color: #007bff;
color: white;
padding: 10px;
border-radius: 12px;
max-width: 70%;
animation: slideInUser 0.5s ease-out;
}
.gradio-chatbot .assistant {
background-color: #f1f1f1;
color: #333;
padding: 10px;
border-radius: 12px;
max-width: 70%;
margin-left: auto;
animation: slideInAssistant 0.5s ease-out;
}
/* Animation keyframes */
@keyframes fadeIn {
0% { opacity: 0; }
100% { opacity: 1; }
}
@keyframes slideInUser {
0% { transform: translateX(-100%); }
100% { transform: translateX(0); }
}
@keyframes slideInAssistant {
0% { transform: translateX(100%); }
100% { transform: translateX(0); }
}
/* Mobile responsiveness */
@media (max-width: 768px) {
.gradio-header h1 {
font-size: 1.8rem;
}
.gradio-header h3 {
font-size: 1rem;
}
.gradio-chatbot {
max-height: 400px;
}
.gradio-textbox, .gradio-dropdown, .gradio-image, .gradio-audio {
width: 100%;
}
#submit-btn, #clear-history {
width: 100%;
margin-left: 0;
}
}
"""
# Gradio interface setup
def create_interface():
with gr.Blocks(css=custom_css) as demo:
gr.Markdown("""
<div class="gradio-header">
<h1>Multimodal Chatbot (Text + Image + Voice)</h1>
<h3>Interact with a chatbot using text, image, or voice inputs</h3>
</div>
""")
# Add a description with an expandable accordion
with gr.Accordion("Click to expand for details", open=False):
gr.Markdown("""
### Description:
This is a multimodal chatbot that can handle text, image, and voice inputs.
- You can ask questions or provide text, and the assistant will respond.
- You can also upload an image, and the assistant will process it and answer questions about the image.
- Voice input is supported: You can upload or record an audio file, and it will be transcribed to text and sent to the assistant.
- Enter your OpenAI API key to start interacting with the model.
- You can use the 'Clear History' button to remove the conversation history.
- "o1" is for image chat and "o3-mini" is for text chat.
### Reasoning Effort:
The reasoning effort controls how complex or detailed the assistant's answers should be.
- **Low**: Provides quick, concise answers with minimal reasoning or details.
- **Medium**: Offers a balanced response with a reasonable level of detail and thought.
- **High**: Produces more detailed, analytical, or thoughtful responses, requiring deeper reasoning.
""")
with gr.Row():
openai_api_key = gr.Textbox(label="Enter OpenAI API Key", type="password", placeholder="sk-...", interactive=True)
with gr.Row():
image_input = gr.Image(label="Upload an Image", type="pil") # Image upload input
input_text = gr.Textbox(label="Enter Text Question", placeholder="Ask a question or provide text", lines=2)
audio_input = gr.Audio(label="Upload or Record Audio", type="filepath") # Audio upload or record input (using filepath)
with gr.Row():
reasoning_effort = gr.Dropdown(
label="Reasoning Effort",
choices=["low", "medium", "high"],
value="medium"
)
model_choice = gr.Dropdown(
label="Select Model",
choices=["o1", "o3-mini"],
value="o1" # Default to 'o1' for image-related tasks
)
submit_btn = gr.Button("Ask!", elem_id="submit-btn")
clear_btn = gr.Button("Clear History", elem_id="clear-history")
chat_history = gr.Chatbot()
# Button interactions
submit_btn.click(fn=chatbot, inputs=[input_text, image_input, audio_input, openai_api_key, reasoning_effort, model_choice, chat_history], outputs=[input_text, chat_history])
clear_btn.click(fn=clear_history, inputs=[], outputs=[chat_history, chat_history])
return demo
# Run the interface
if __name__ == "__main__":
demo = create_interface()
demo.launch()