rdezwart commited on
Commit
83c71a6
·
1 Parent(s): d00bd74

Try to implement threading

Browse files
Files changed (1) hide show
  1. app.py +26 -8
app.py CHANGED
@@ -1,18 +1,37 @@
 
 
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
 
3
 
4
  # Moondream does not support the HuggingFace pipeline system, so we have to do it manually
5
  moondream_id = "vikhyatk/moondream2"
6
  moondream_revision = "2024-04-02"
7
- moondream_tokenizer = AutoTokenizer.from_pretrained(moondream_id, code_revision=moondream_revision)
8
- moondream_model = AutoModelForCausalLM.from_pretrained(
9
- moondream_id, trust_remote_code=True, code_revision=moondream_revision
10
  )
 
11
 
12
 
13
  def answer_question(_img, _prompt):
14
- image_embeds = moondream_model.encode_image(_img)
15
- return moondream_model.answer_question(image_embeds, _prompt, moondream_tokenizer)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
 
18
  with gr.Blocks() as app:
@@ -31,7 +50,6 @@ with gr.Blocks() as app:
31
  output = gr.TextArea(label="Output")
32
 
33
  submit.click(answer_question, [img, prompt], output)
34
- prompt.submit(answer_question, [img, prompt], output)
35
 
36
  if __name__ == "__main__":
37
- app.launch()
 
1
+ from threading import Thread
2
+
3
  import gradio as gr
4
+ from transformers import PreTrainedModel
5
+ from transformers import TextIteratorStreamer, AutoModelForCausalLM, AutoTokenizer
6
 
7
  # Moondream does not support the HuggingFace pipeline system, so we have to do it manually
8
  moondream_id = "vikhyatk/moondream2"
9
  moondream_revision = "2024-04-02"
10
+ moondream_tokenizer = AutoTokenizer.from_pretrained(moondream_id, revision=moondream_revision)
11
+ moondream: PreTrainedModel = AutoModelForCausalLM.from_pretrained(
12
+ moondream_id, trust_remote_code=True, revision=moondream_revision, torch_dtype="auto"
13
  )
14
+ moondream.eval()
15
 
16
 
17
  def answer_question(_img, _prompt):
18
+ image_embeds = moondream.encode_image(_img)
19
+ streamer = TextIteratorStreamer(moondream_tokenizer, skip_special_tokens=True)
20
+ thread = Thread(
21
+ target=moondream.answer_question,
22
+ kwargs={
23
+ "image_embeds": image_embeds,
24
+ "question": _prompt,
25
+ "tokenizer": moondream_tokenizer,
26
+ "streamer": streamer,
27
+ },
28
+ )
29
+ thread.start()
30
+
31
+ buffer = ""
32
+ for new_text in streamer:
33
+ buffer += new_text
34
+ yield buffer.strip()
35
 
36
 
37
  with gr.Blocks() as app:
 
50
  output = gr.TextArea(label="Output")
51
 
52
  submit.click(answer_question, [img, prompt], output)
 
53
 
54
  if __name__ == "__main__":
55
+ app.queue().launch()