FoodIdentifier / app.py
rdezwart's picture
Move Gradio app creation into main block
60a2be3
raw
history blame
1.74 kB
from threading import Thread
import gradio as gr
from transformers import PreTrainedModel
from transformers import TextIteratorStreamer, AutoModelForCausalLM, AutoTokenizer
# Moondream does not support the HuggingFace pipeline system, so we have to do it manually
moondream_id = "vikhyatk/moondream2"
moondream_revision = "2024-04-02"
moondream_tokenizer = AutoTokenizer.from_pretrained(moondream_id, revision=moondream_revision)
moondream: PreTrainedModel = AutoModelForCausalLM.from_pretrained(
moondream_id, trust_remote_code=True, revision=moondream_revision, torch_dtype="auto"
)
moondream.eval()
def answer_question(_img, _prompt):
image_embeds = moondream.encode_image(_img)
streamer = TextIteratorStreamer(moondream_tokenizer, skip_special_tokens=True)
thread = Thread(
target=moondream.answer_question,
kwargs={
"image_embeds": image_embeds,
"question": _prompt,
"tokenizer": moondream_tokenizer,
"streamer": streamer,
},
)
thread.start()
buffer = ""
for new_text in streamer:
buffer += new_text
yield buffer.strip()
if __name__ == "__main__":
with gr.Blocks() as app:
gr.Markdown(
"""
# Food Identifier
Final project for IAT 481 at Simon Fraser University, Spring 2024.
"""
)
with gr.Row():
prompt = gr.Textbox(label="Input", value="Describe this image.")
submit = gr.Button("Submit")
with gr.Row():
img = gr.Image(label="Image", type="pil")
output = gr.TextArea(label="Output")
submit.click(answer_question, [img, prompt], output)
app.queue().launch()