Spaces:

Kilos1
/

Nutrition_App

Runtime error

App Files Files Community

Kilos1 commited on Mar 8

Commit

d7dbc2c

verified ·

1 Parent(s): dbd1c33

Update multimodal_queries.py

Browse files

Files changed (1) hide show

multimodal_queries.py +86 -0

multimodal_queries.py CHANGED Viewed

	@@ -0,0 +1,86 @@

+import requests
+import base64
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import gradio as gr
+# Load the Hugging Face model and tokenizer
+model_id = "meta-llama/llama-3-2-90b-vision-instruct"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id)
+def input_image_setup(uploaded_file):
+    """
+    Encodes the uploaded image file into a base64 string.
+    Parameters:
+    - uploaded_file: File-like object uploaded via Gradio.
+    Returns:
+    - encoded_image (str): Base64 encoded string of the image data.
+    """
+    if uploaded_file is not None:
+        bytes_data = uploaded_file.read()
+        encoded_image = base64.b64encode(bytes_data).decode("utf-8")
+        return encoded_image
+    else:
+        raise FileNotFoundError("No file uploaded")
+def generate_model_response(encoded_image, user_query, assistant_prompt="You are a helpful assistant. Answer the following user query in 1 or 2 sentences: "):
+    """
+    Sends an image and a query to the model and retrieves the description or answer.
+    Parameters:
+    - encoded_image (str): Base64-encoded image string.
+    - user_query (str): The user's question about the image.
+    - assistant_prompt (str): Optional prompt to guide the model's response.
+    Returns:
+    - str: The model's response for the given image and query.
+    """
+    # Prepare input for the model
+    input_text = assistant_prompt + user_query + "\n![Image](data:image/jpeg;base64," + encoded_image + ")"
+    # Tokenize input text
+    inputs = tokenizer(input_text, return_tensors="pt")
+    # Generate response from the model
+    outputs = model.generate(**inputs)
+    # Decode and return the model's response
+    response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return response_text
+def process_image_and_query(uploaded_file, user_query):
+    """
+    Process the uploaded image and user query to generate a response from the model.
+    Parameters:
+    - uploaded_file: The uploaded image file.
+    - user_query: The user's question about the image.
+    Returns:
+    - str: The generated response from the model.
+    """
+    # Encode the uploaded image
+    encoded_image = input_image_setup(uploaded_file)
+    # Generate response using the encoded image and user query
+    response = generate_model_response(encoded_image, user_query)
+    return response
+# Create Gradio interface
+iface = gr.Interface(
+    fn=process_image_and_query,
+    inputs=[
+        gr.inputs.Image(type="file", label="Upload Image"),
+        gr.inputs.Textbox(label="User Query", placeholder="Enter your question about the image...")
+    ],
+    outputs="text",
+)
+# Launch the Gradio app
+iface.launch()