Spaces:
Runtime error
Runtime error
File size: 2,757 Bytes
d7dbc2c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
import requests
import base64
from transformers import AutoModelForCausalLM, AutoTokenizer
import gradio as gr
# Load the Hugging Face model and tokenizer
model_id = "meta-llama/llama-3-2-90b-vision-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)
def input_image_setup(uploaded_file):
"""
Encodes the uploaded image file into a base64 string.
Parameters:
- uploaded_file: File-like object uploaded via Gradio.
Returns:
- encoded_image (str): Base64 encoded string of the image data.
"""
if uploaded_file is not None:
bytes_data = uploaded_file.read()
encoded_image = base64.b64encode(bytes_data).decode("utf-8")
return encoded_image
else:
raise FileNotFoundError("No file uploaded")
def generate_model_response(encoded_image, user_query, assistant_prompt="You are a helpful assistant. Answer the following user query in 1 or 2 sentences: "):
"""
Sends an image and a query to the model and retrieves the description or answer.
Parameters:
- encoded_image (str): Base64-encoded image string.
- user_query (str): The user's question about the image.
- assistant_prompt (str): Optional prompt to guide the model's response.
Returns:
- str: The model's response for the given image and query.
"""
# Prepare input for the model
input_text = assistant_prompt + user_query + "\n"
# Tokenize input text
inputs = tokenizer(input_text, return_tensors="pt")
# Generate response from the model
outputs = model.generate(**inputs)
# Decode and return the model's response
response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response_text
def process_image_and_query(uploaded_file, user_query):
"""
Process the uploaded image and user query to generate a response from the model.
Parameters:
- uploaded_file: The uploaded image file.
- user_query: The user's question about the image.
Returns:
- str: The generated response from the model.
"""
# Encode the uploaded image
encoded_image = input_image_setup(uploaded_file)
# Generate response using the encoded image and user query
response = generate_model_response(encoded_image, user_query)
return response
# Create Gradio interface
iface = gr.Interface(
fn=process_image_and_query,
inputs=[
gr.inputs.Image(type="file", label="Upload Image"),
gr.inputs.Textbox(label="User Query", placeholder="Enter your question about the image...")
],
outputs="text",
)
# Launch the Gradio app
iface.launch()
|