Spaces:

Kilos1
/

Nutrition_App

Runtime error

App Files Files Community

Nutrition_App / multimodal_queries.py

Kilos1

Update multimodal_queries.py

2c4f69d verified 9 months ago

raw

history blame

2.81 kB

	import re
	import base64
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import gradio as gr
	from PIL import Image
	import io
	from transformers import Owlv2Processor, Owlv2ForObjectDetection

	processor = Owlv2Processor.from_pretrained("google/owlv2-large-patch14-finetuned")
	model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-large-patch14-finetuned")

	def input_image_setup(uploaded_file):
	"""
	Encodes the uploaded image file into a base64 string.

	Parameters:
	- uploaded_file: File-like object uploaded via Gradio.

	Returns:
	- encoded_image (str): Base64 encoded string of the image data.
	"""
	if uploaded_file is not None:
	bytes_data = uploaded_file.read()
	encoded_image = base64.b64encode(bytes_data).decode("utf-8")
	return encoded_image
	else:
	raise FileNotFoundError("No file uploaded")

	def generate_model_response(encoded_image, user_query, assistant_prompt="You are a helpful assistant. Answer the following user query in 1 or 2 sentences: "):
	"""
	Sends an image and a query to the model and retrieves the description or answer.

	Parameters:
	- encoded_image (str): Base64-encoded image string.
	- user_query (str): The user's question about the image.
	- assistant_prompt (str): Optional prompt to guide the model's response.

	Returns:
	- str: The model's response for the given image and query.
	"""

	# Prepare input for the model
	input_text = assistant_prompt + user_query + "\n![Image](data:image/jpeg;base64," + encoded_image + ")"

	# Tokenize input text
	inputs = tokenizer(input_text, return_tensors="pt")

	# Generate response from the model
	outputs = model.generate(**inputs)

	# Decode and return the model's response
	response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

	return response_text

	def process_image_and_query(uploaded_file, user_query):
	"""
	Process the uploaded image and user query to generate a response from the model.

	Parameters:
	- uploaded_file: The uploaded image file.
	- user_query: The user's question about the image.

	Returns:
	- str: The generated response from the model.
	"""

	# Encode the uploaded image
	encoded_image = input_image_setup(uploaded_file)

	# Generate response using the encoded image and user query
	response = generate_model_response(encoded_image, user_query)

	return response

	# Create Gradio interface
	iface = gr.Interface(
	fn=process_image_and_query,
	inputs=[
	gr.inputs.Image(type="file", label="Upload Image"),
	gr.inputs.Textbox(label="User Query", placeholder="Enter your question about the image...")
	],
	outputs="text",
	)

	# Launch the Gradio app
	iface.launch()