Spaces:

ManishThota
/

InstructVQA

Paused

InstructVQA / app.py

Update app.py

6a70eef verified over 1 year ago

1.29 kB

	import gradio as gr

	# gr.load("models/ManishThota/InstructBlip-VQA").launch()

	from PIL import Image
	import torch
	from transformers import BlipProcessor, BlipForQuestionAnswering

	# Initialize the model and processor
	processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
	model = BlipForQuestionAnswering.from_pretrained("ManishThota/InstructBlip-VQA")

	def predict_answer(image, question):
	# Convert PIL image to RGB if not already
	image = image.convert("RGB")

	# Prepare inputs
	encoding = processor(image, question, return_tensors="pt")

	out = model.generate(**encoding)
	generated_text = processor.decode(out[0], skip_special_tokens=True)

	return generated_text


	def gradio_predict(image, question):
	answer = predict_answer(image, question)
	return answer

	# Define the Gradio interface
	iface = gr.Interface(
	fn=gradio_predict,
	inputs=[gr.Image(type="pil", label="Upload or Drag an Image"), gr.Textbox(label="Question", placeholder="e.g. What is this?", scale=4)],
	outputs=gr.TextArea(label="Answer"),
	title="Visual Question Answering",
	description="This model answers questions based on the content of an image. Powered by BLIP.",
	)

	# Launch the app
	iface.queue().launch(debug=True)

	# demo.queue().launch(debug=True)