File size: 3,546 Bytes
d66422e
 
 
68e425a
d66422e
81ea024
d0ee1ff
d66422e
81be464
 
d66422e
81ea024
d66422e
68e425a
d66422e
 
81ea024
d66422e
 
81ea024
d66422e
81ea024
 
 
 
 
 
d66422e
 
 
 
 
 
81ea024
d66422e
 
 
 
 
 
 
 
81ea024
d66422e
68e425a
d66422e
68e425a
81ea024
68e425a
d66422e
68e425a
81ea024
68e425a
81ea024
 
 
 
d66422e
68e425a
81ea024
d66422e
81ea024
 
 
 
 
 
d66422e
81ea024
68e425a
d66422e
68e425a
81ea024
057300b
81ea024
68e425a
 
 
 
 
 
 
81ea024
68e425a
81ea024
68e425a
 
 
81ea024
 
68e425a
81ea024
68e425a
 
5a13cd8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import re
import base64
from transformers import AutoModelForCausalLM, AutoTokenizer
import gradio as gr
from PIL import Image
import io
from transformers import Owlv2Processor, Owlv2ForObjectDetection

processor = Owlv2Processor.from_pretrained("google/owlv2-large-patch14-finetuned")
model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-large-patch14-finetuned")

def input_image_setup(image_file):
    """
    Encodes the uploaded image file into a base64 string.

    Parameters:
    - image_file: Image file uploaded via Gradio

    Returns:
    - encoded_image (str): Base64 encoded string of the image data
    """
    if image_file is not None:
        # Convert the PIL Image object to bytes and encode in Base64
        buffered = io.BytesIO()
        image_file.save(buffered, format="JPEG")
        img_bytes = buffered.getvalue()
        encoded_image = base64.b64encode(img_bytes).decode("utf-8")
        return encoded_image
    else:
        raise FileNotFoundError("No file uploaded")

def format_response(response_text):
    """
    Formats the model response to display each item as HTML elements.
    """
    response_text = re.sub(r"\*\*(.*?)\*\*", r"<p><strong>\1</strong></p>", response_text)
    response_text = re.sub(r"(?m)^\s*\*\s(.*)", r"<li>\1</li>", response_text)
    response_text = re.sub(r"(<li>.*?</li>)+", lambda match: f"<ul>{match.group(0)}</ul>", response_text, flags=re.DOTALL)
    response_text = re.sub(r"</p>(?=<p>)", r"</p><br>", response_text)
    response_text = re.sub(r"(\n|\\n)+", r"<br>", response_text)
    return response_text

def generate_model_response(image_file, user_query):
    """
    Processes the uploaded image and user query to generate a response from the model.

    Parameters:
    - image_file: The uploaded image file.
    - user_query: The user's question about the image.

    Returns:
    - str: The generated response from the model, formatted as HTML.
    """
    try:
        encoded_image = input_image_setup(image_file)
    except FileNotFoundError as e:
        return f"<p>{str(e)}</p>"

    assistant_prompt = """
        You are an expert nutritionist. Analyze the food items in the image and provide a detailed nutritional assessment:

        1. **Identification**: List each food item.
        2. **Portion & Calories**: Specify portion size and calories for each item.
        3. **Total Calories**: Provide the total.
        4. **Nutrient Breakdown**: Detail key nutrients.
        5. **Health Evaluation**: Evaluate meal healthiness.
        6. **Disclaimer**: "Nutritional info is approximate. Consult a nutritionist for precise advice."

        Format your response accordingly.
    """

    input_text = assistant_prompt + "\n\n" + user_query + "\n![Image](data:image/jpeg;base64," + encoded_image + ")"

    inputs = AutoTokenizer(input_text, return_tensors="pt")

    try:
        outputs = model.generate(**inputs)
        raw_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        formatted_response = format_response(raw_response)
        return formatted_response
    except Exception as e:
        print(f"Error in generating response: {e}")
        return f"<p>An error occurred: {str(e)}</p>"

# Gradio Interface
iface = gr.Interface(
    fn=generate_model_response,
    inputs=[
        gr.Image(type="pil", label="Upload Image"),
        gr.Textbox(label="Enter your question", placeholder="How many calories are in this food?")
    ],
    outputs=gr.HTML(label="Nutritional Assessment")
)

iface.launch(share=True)