Spaces:

Kilos1
/

Nutrition_App

Runtime error

App Files Files Community

Nutrition_App / multimodal_queries.py

Kilos1

Update multimodal_queries.py

612c5f5 verified 2 months ago

raw

history blame

2.27 kB

	import re
	import base64
	import io
	import torch
	import gradio as gr
	from PIL import Image
	from transformers import MllamaForConditionalGeneration, AutoProcessor

	# Load the model and processor
	model_id = "meta-llama/Llama-3.2-11B-Vision-Instruct"
	model = MllamaForConditionalGeneration.from_pretrained(
	model_id,
	torch_dtype=torch.bfloat16,
	device_map="auto",
	)
	processor = AutoProcessor.from_pretrained(model_id)

	def generate_model_response(image_file, user_query):
	"""
	Processes the uploaded image and user query to generate a response from the model.

	Parameters:
	- image_file: The uploaded image file.
	- user_query: The user's question about the image.

	Returns:
	- str: The generated response from the model, formatted as HTML.
	"""
	try:
	# Load and prepare the image
	raw_image = Image.open(image_file).convert("RGB")

	# Prepare input for the model using the processor
	conversation = [
	{
	"role": "user",
	"content": [
	{"type": "image", "url": "<\|image\|>"}, # Placeholder for image
	{"type": "text", "text": user_query}
	]
	}
	]

	# Apply chat template to prepare inputs for the model
	inputs = processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False)

	# Process the image and text inputs together
	inputs = processor(inputs, raw_image, return_tensors="pt").to(model.device)

	# Generate response from the model
	outputs = model.generate(**inputs)

	# Decode and format the response
	generated_text = processor.decode(outputs[0], skip_special_tokens=True)

	return generated_text

	except Exception as e:
	print(f"Error in generating response: {e}")
	return f"<p>An error occurred: {str(e)}</p>"

	# Gradio Interface
	iface = gr.Interface(
	fn=generate_model_response,
	inputs=[
	gr.Image(type="file", label="Upload Image"),
	gr.Textbox(label="Enter your question", placeholder="How many calories are in this food?")
	],
	outputs=gr.HTML(label="Response from Model"),
	)

	iface.launch(share=True)