File size: 1,530 Bytes
4d31c25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fe2b226
 
4d31c25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
from transformers import AutoTokenizer, AutoModelForCausalLM
import gradio as gr
import spaces
from PIL import Image
import hashlib
import base64

def load_md2():
  model = AutoModelForCausalLM.from_pretrained("vikhyatk/moondream2", device_map="cpu", trust_remote_code=True,revision="2025-01-09")
  return model

global md2

md2 = load_md2()

@spaces.GPU()
def moondream2(question, image, history=None):
  global md2
  model = md2
  model.cuda()
  hsh = hashlib.sha256(bts := image.resize((224,224), Image.NEAREST).tobytes()).hexdigest()
  b64 = base64.b64encode(bts).decode('utf-8')
  res = model.query(image, question) if question is not None and question != "" else model.caption(image)
  model.cpu()
  ress = []
  if history is not None:
    for itm in history:
      ress.append(itm)
  ress.append({
    "answer": res if question["answer"] is not None and question != "" else None,
    "caption": res if question["caption"] is None or question == "" else None,
    "sha256": hsh,
    "image_b64": b64
  })
  return ress, ress

def gui():
  with gr.Blocks() as blk:
    with gr.Row():
        imgs = gr.Image(label="input", type="pil", elem_id="imgs")
    with gr.Row():
      txt = gr.Textbox(label="prompt")
    with gr.Row():
      btn = gr.Button("Run")
    with gr.Row():
      res = gr.JSON(label="output")
    with gr.Row(visible=False):
      history = gr.JSON(label="history")
    btn.click(moondream2, inputs=[txt, imgs, history], outputs=[res, history])
  blk.launch(share=False)

if __name__ == "__main__":
  gui()