Spaces:
Runtime error
Runtime error
File size: 2,269 Bytes
2c4f69d d7dbc2c 612c5f5 d7dbc2c 2c4f69d 612c5f5 d7dbc2c 612c5f5 d7dbc2c 612c5f5 d7dbc2c 612c5f5 d7dbc2c 612c5f5 d7dbc2c 612c5f5 d7dbc2c 612c5f5 d7dbc2c 612c5f5 d7dbc2c 612c5f5 d7dbc2c 612c5f5 d7dbc2c 612c5f5 d7dbc2c 612c5f5 d7dbc2c 612c5f5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import re
import base64
import io
import torch
import gradio as gr
from PIL import Image
from transformers import MllamaForConditionalGeneration, AutoProcessor
# Load the model and processor
model_id = "meta-llama/Llama-3.2-11B-Vision-Instruct"
model = MllamaForConditionalGeneration.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
device_map="auto",
)
processor = AutoProcessor.from_pretrained(model_id)
def generate_model_response(image_file, user_query):
"""
Processes the uploaded image and user query to generate a response from the model.
Parameters:
- image_file: The uploaded image file.
- user_query: The user's question about the image.
Returns:
- str: The generated response from the model, formatted as HTML.
"""
try:
# Load and prepare the image
raw_image = Image.open(image_file).convert("RGB")
# Prepare input for the model using the processor
conversation = [
{
"role": "user",
"content": [
{"type": "image", "url": "<|image|>"}, # Placeholder for image
{"type": "text", "text": user_query}
]
}
]
# Apply chat template to prepare inputs for the model
inputs = processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False)
# Process the image and text inputs together
inputs = processor(inputs, raw_image, return_tensors="pt").to(model.device)
# Generate response from the model
outputs = model.generate(**inputs)
# Decode and format the response
generated_text = processor.decode(outputs[0], skip_special_tokens=True)
return generated_text
except Exception as e:
print(f"Error in generating response: {e}")
return f"<p>An error occurred: {str(e)}</p>"
# Gradio Interface
iface = gr.Interface(
fn=generate_model_response,
inputs=[
gr.Image(type="file", label="Upload Image"),
gr.Textbox(label="Enter your question", placeholder="How many calories are in this food?")
],
outputs=gr.HTML(label="Response from Model"),
)
iface.launch(share=True)
|