Spaces:

MrOvkill
/

moondream-2-multi-interrogation

Running

v0.4

fe2b226 6 months ago

1.53 kB

	from transformers import AutoTokenizer, AutoModelForCausalLM
	import gradio as gr
	import spaces
	from PIL import Image
	import hashlib
	import base64

	def load_md2():
	model = AutoModelForCausalLM.from_pretrained("vikhyatk/moondream2", device_map="cpu", trust_remote_code=True,revision="2025-01-09")
	return model

	global md2

	md2 = load_md2()

	@spaces.GPU()
	def moondream2(question, image, history=None):
	global md2
	model = md2
	model.cuda()
	hsh = hashlib.sha256(bts := image.resize((224,224), Image.NEAREST).tobytes()).hexdigest()
	b64 = base64.b64encode(bts).decode('utf-8')
	res = model.query(image, question) if question is not None and question != "" else model.caption(image)
	model.cpu()
	ress = []
	if history is not None:
	for itm in history:
	ress.append(itm)
	ress.append({
	"answer": res if question["answer"] is not None and question != "" else None,
	"caption": res if question["caption"] is None or question == "" else None,
	"sha256": hsh,
	"image_b64": b64
	})
	return ress, ress

	def gui():
	with gr.Blocks() as blk:
	with gr.Row():
	imgs = gr.Image(label="input", type="pil", elem_id="imgs")
	with gr.Row():
	txt = gr.Textbox(label="prompt")
	with gr.Row():
	btn = gr.Button("Run")
	with gr.Row():
	res = gr.JSON(label="output")
	with gr.Row(visible=False):
	history = gr.JSON(label="history")
	btn.click(moondream2, inputs=[txt, imgs, history], outputs=[res, history])
	blk.launch(share=False)

	if __name__ == "__main__":
	gui()