Spaces:

ModularityAI
/

AskMoondream

Runtime error

App Files Files Community

hanzla commited on Apr 27, 2024

Commit

b23fbc0

1 Parent(s): 1014254

chat interface

Browse files

Files changed (1) hide show

app.py +34 -24

app.py CHANGED Viewed

@@ -1,21 +1,18 @@
 import spaces
 import torch
 import gradio as gr
 from threading import Thread
-from transformers import AutoTokenizer, AutoModelForCausalLM
-# Install the necessary package for the model
 import subprocess
-subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"},
-               shell=True)
-# Initialize the tokenizer and model
 model_id = "vikhyatk/moondream2"
 revision = "2024-04-02"
 tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
 moondream = AutoModelForCausalLM.from_pretrained(
-    model_id, revision=revision, trust_remote_code=True,
     torch_dtype=torch.bfloat16, device_map={"": "cuda"},
     attn_implementation="flash_attention_2"
 )
@@ -23,28 +20,41 @@ moondream.eval()
 @spaces.GPU(duration=10)
-def chatbot_response(img, text_input):
-    # Here we assume an encoded image processing if needed
     image_embeds = moondream.encode_image(img)
-    inputs = tokenizer.encode(text_input, return_tensors="pt")
-    outputs = moondream.generate(inputs, max_length=200)
-    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return response
-# Setting up Gradio Interface
 with gr.Blocks(theme="Monochrome") as demo:
-    gr.Markdown("# AskMoondream Chatbot")
-    with gr.Row():
-        img = gr.Image(type="pil", label="Upload an Image")
-        text_input = gr.Textbox(label="Ask a question or describe an image", placeholder="Type here...")
     with gr.Row():
         submit = gr.Button("Submit")
-        response = gr.TextArea(label="Response", placeholder="Moondream's response will appear here...")
-    # Define what happens when the user interacts with the interface
-    submit.click(chatbot_response, inputs=[img, text_input], outputs=response)
-    text_input.submit(chatbot_response, inputs=[img, text_input], outputs=response)
-# Launch the demo
 demo.queue().launch()

 import spaces
 import torch
+import re
 import gradio as gr
 from threading import Thread
+from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
 import subprocess
+subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
 model_id = "vikhyatk/moondream2"
 revision = "2024-04-02"
 tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
 moondream = AutoModelForCausalLM.from_pretrained(
+    model_id, trust_remote_code=True, revision=revision,
     torch_dtype=torch.bfloat16, device_map={"": "cuda"},
     attn_implementation="flash_attention_2"
 )
 @spaces.GPU(duration=10)
+def answer_question(img, prompt):
     image_embeds = moondream.encode_image(img)
+    streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
+    thread = Thread(
+        target=moondream.answer_question,
+        kwargs={
+            "image_embeds": image_embeds,
+            "question": prompt,
+            "tokenizer": tokenizer,
+            "streamer": streamer,
+        },
+    )
+    thread.start()
+    buffer = ""
+    for new_text in streamer:
+        buffer += new_text
+        yield buffer.strip()
 with gr.Blocks(theme="Monochrome") as demo:
+    gr.Markdown(
+        """
+        # AskMoondream: Moondream 2 Demonstration Space
+        Moondream2 is a 1.86B parameter model initialized with weights from SigLIP and Phi 1.5.
+        Modularity AI presents this open source huggingface space for running fast experimental inferences on Moondream2.
+        """
+    )
     with gr.Row():
+        prompt = gr.Textbox(label="Input", value="Describe this image.", scale=4)
         submit = gr.Button("Submit")
+    with gr.Row():
+        img = gr.Image(type="pil", label="Upload an Image")
+        output = gr.TextArea(label="Response")
+    submit.click(answer_question, [img, prompt], output)
+    prompt.submit(answer_question, [img, prompt], output)
 demo.queue().launch()