MrOvkill's picture
v0.4
fe2b226
raw
history blame
1.53 kB
from transformers import AutoTokenizer, AutoModelForCausalLM
import gradio as gr
import spaces
from PIL import Image
import hashlib
import base64
def load_md2():
model = AutoModelForCausalLM.from_pretrained("vikhyatk/moondream2", device_map="cpu", trust_remote_code=True,revision="2025-01-09")
return model
global md2
md2 = load_md2()
@spaces.GPU()
def moondream2(question, image, history=None):
global md2
model = md2
model.cuda()
hsh = hashlib.sha256(bts := image.resize((224,224), Image.NEAREST).tobytes()).hexdigest()
b64 = base64.b64encode(bts).decode('utf-8')
res = model.query(image, question) if question is not None and question != "" else model.caption(image)
model.cpu()
ress = []
if history is not None:
for itm in history:
ress.append(itm)
ress.append({
"answer": res if question["answer"] is not None and question != "" else None,
"caption": res if question["caption"] is None or question == "" else None,
"sha256": hsh,
"image_b64": b64
})
return ress, ress
def gui():
with gr.Blocks() as blk:
with gr.Row():
imgs = gr.Image(label="input", type="pil", elem_id="imgs")
with gr.Row():
txt = gr.Textbox(label="prompt")
with gr.Row():
btn = gr.Button("Run")
with gr.Row():
res = gr.JSON(label="output")
with gr.Row(visible=False):
history = gr.JSON(label="history")
btn.click(moondream2, inputs=[txt, imgs, history], outputs=[res, history])
blk.launch(share=False)
if __name__ == "__main__":
gui()