moondream2-batch-processing

Running on Zero

App Files Files Community

moondream2-batch-processing / app.py

Csplk

Update app.py

d8d5348 verified 12 months ago

raw

history blame

1.65 kB

	import spaces
	import torch
	import re
	import gradio as gr
	from PIL import Image
	import io
	from transformers import AutoTokenizer, AutoModelForCausalLM

	if torch.cuda.is_available():
	device, dtype = "cuda", torch.float16
	else:
	device, dtype = "cpu", torch.float32

	model_id = "vikhyatk/moondream2"
	revision = "2024-04-02"
	tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
	moondream = AutoModelForCausalLM.from_pretrained(
	model_id, trust_remote_code=True, revision=revision
	).to(device=device, dtype=dtype)
	moondream.eval()

	@spaces.GPU(duration=10)
	def answer_questions(images, prompt_text):
	prompts = [p.strip() for p in prompt_text.split(',')] # Splitting and cleaning prompts
	# Extracting images from tuples and converting to PIL images
	image_objects = [Image.open(io.BytesIO(img[0])) for img in images]
	image_embeds = [moondream.encode_image(img) for img in image_objects]
	answers = moondream.batch_answer(
	images=image_embeds,
	prompts=prompts,
	tokenizer=tokenizer,
	)
	return ["\n".join(ans) for ans in answers]

	with gr.Blocks() as demo:
	gr.Markdown("# 🌔 moondream2\nA tiny vision language model. [GitHub](https://github.com/vikhyatk/moondream)")
	with gr.Row():
	img = gr.Gallery(label="Upload Images", type="pil")
	prompt = gr.Textbox(label="Input Prompts", placeholder="Enter prompts separated by commas. Ex: Describe this image, What is in this image?", lines=2)
	submit = gr.Button("Submit")
	output = gr.TextArea(label="Responses", lines=4)
	submit.click(answer_questions, [img, prompt], output)

	demo.queue().launch()