Spaces:

rdezwart
/

FoodIdentifier

Sleeping

rdezwart commited on Apr 17, 2024

Commit

83c71a6

1 Parent(s): d00bd74

Try to implement threading

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,18 +1,37 @@
 import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer
 # Moondream does not support the HuggingFace pipeline system, so we have to do it manually
 moondream_id = "vikhyatk/moondream2"
 moondream_revision = "2024-04-02"
-moondream_tokenizer = AutoTokenizer.from_pretrained(moondream_id, code_revision=moondream_revision)
-moondream_model = AutoModelForCausalLM.from_pretrained(
-    moondream_id, trust_remote_code=True, code_revision=moondream_revision
 )
 def answer_question(_img, _prompt):
-    image_embeds = moondream_model.encode_image(_img)
-    return moondream_model.answer_question(image_embeds, _prompt, moondream_tokenizer)
 with gr.Blocks() as app:
@@ -31,7 +50,6 @@ with gr.Blocks() as app:
         output = gr.TextArea(label="Output")
     submit.click(answer_question, [img, prompt], output)
-    prompt.submit(answer_question, [img, prompt], output)
 if __name__ == "__main__":
-    app.launch()

+from threading import Thread
 import gradio as gr
+from transformers import PreTrainedModel
+from transformers import TextIteratorStreamer, AutoModelForCausalLM, AutoTokenizer
 # Moondream does not support the HuggingFace pipeline system, so we have to do it manually
 moondream_id = "vikhyatk/moondream2"
 moondream_revision = "2024-04-02"
+moondream_tokenizer = AutoTokenizer.from_pretrained(moondream_id, revision=moondream_revision)
+moondream: PreTrainedModel = AutoModelForCausalLM.from_pretrained(
+    moondream_id, trust_remote_code=True, revision=moondream_revision, torch_dtype="auto"
 )
+moondream.eval()
 def answer_question(_img, _prompt):
+    image_embeds = moondream.encode_image(_img)
+    streamer = TextIteratorStreamer(moondream_tokenizer, skip_special_tokens=True)
+    thread = Thread(
+        target=moondream.answer_question,
+        kwargs={
+            "image_embeds": image_embeds,
+            "question": _prompt,
+            "tokenizer": moondream_tokenizer,
+            "streamer": streamer,
+        },
+    )
+    thread.start()
+    buffer = ""
+    for new_text in streamer:
+        buffer += new_text
+        yield buffer.strip()
 with gr.Blocks() as app:
         output = gr.TextArea(label="Output")
     submit.click(answer_question, [img, prompt], output)
 if __name__ == "__main__":
+    app.queue().launch()