Spaces:
Runtime error
Runtime error
import requests | |
import base64 | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import gradio as gr | |
# Load the Hugging Face model and tokenizer | |
model_id = "meta-llama/llama-3-2-90b-vision-instruct" | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
model = AutoModelForCausalLM.from_pretrained(model_id) | |
def input_image_setup(uploaded_file): | |
""" | |
Encodes the uploaded image file into a base64 string. | |
Parameters: | |
- uploaded_file: File-like object uploaded via Gradio. | |
Returns: | |
- encoded_image (str): Base64 encoded string of the image data. | |
""" | |
if uploaded_file is not None: | |
bytes_data = uploaded_file.read() | |
encoded_image = base64.b64encode(bytes_data).decode("utf-8") | |
return encoded_image | |
else: | |
raise FileNotFoundError("No file uploaded") | |
def generate_model_response(encoded_image, user_query, assistant_prompt="You are a helpful assistant. Answer the following user query in 1 or 2 sentences: "): | |
""" | |
Sends an image and a query to the model and retrieves the description or answer. | |
Parameters: | |
- encoded_image (str): Base64-encoded image string. | |
- user_query (str): The user's question about the image. | |
- assistant_prompt (str): Optional prompt to guide the model's response. | |
Returns: | |
- str: The model's response for the given image and query. | |
""" | |
# Prepare input for the model | |
input_text = assistant_prompt + user_query + "\n" | |
# Tokenize input text | |
inputs = tokenizer(input_text, return_tensors="pt") | |
# Generate response from the model | |
outputs = model.generate(**inputs) | |
# Decode and return the model's response | |
response_text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return response_text | |
def process_image_and_query(uploaded_file, user_query): | |
""" | |
Process the uploaded image and user query to generate a response from the model. | |
Parameters: | |
- uploaded_file: The uploaded image file. | |
- user_query: The user's question about the image. | |
Returns: | |
- str: The generated response from the model. | |
""" | |
# Encode the uploaded image | |
encoded_image = input_image_setup(uploaded_file) | |
# Generate response using the encoded image and user query | |
response = generate_model_response(encoded_image, user_query) | |
return response | |
# Create Gradio interface | |
iface = gr.Interface( | |
fn=process_image_and_query, | |
inputs=[ | |
gr.inputs.Image(type="file", label="Upload Image"), | |
gr.inputs.Textbox(label="User Query", placeholder="Enter your question about the image...") | |
], | |
outputs="text", | |
) | |
# Launch the Gradio app | |
iface.launch() | |