Spaces:
Running
Running
File size: 4,360 Bytes
83c71a6 5756dad ada211a 698fd5a ada211a 5756dad ada211a 6dafc63 77b3326 83c71a6 ada211a bf22d27 6dafc63 ada211a 77b3326 ada211a 2f92f19 ada211a 83c71a6 ada211a 83c71a6 ada211a 83c71a6 2f92f19 927201f 22d7c64 ada211a 22d7c64 ada211a da68fcd ada211a 698fd5a da68fcd 0a5c36c ada211a 0721dd9 cbb61c3 0721dd9 cbb61c3 0721dd9 1d43352 cbb61c3 5756dad 60a2be3 ada211a cbb61c3 4d495ce 0721dd9 b32931d e2ae5eb 4d495ce 0721dd9 8a23239 ada211a cbb61c3 4d495ce d92f93c cbb61c3 60a2be3 83c71a6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
from threading import Thread
import gradio as gr
import torch
from PIL import Image
from transformers import PreTrainedModel # for type hint
from transformers import TextIteratorStreamer, AutoModelForCausalLM, AutoTokenizer # Moondream
from transformers import YolosImageProcessor, YolosForObjectDetection # YOLOS-small-300
# --- Moondream --- #
# Moondream does not support the HuggingFace pipeline system, so we have to do it manually
moondream_id = "vikhyatk/moondream2"
moondream_revision = "2024-04-02"
moondream_tokenizer = AutoTokenizer.from_pretrained(moondream_id, revision=moondream_revision)
moondream_model: PreTrainedModel = AutoModelForCausalLM.from_pretrained(
moondream_id, trust_remote_code=True, revision=moondream_revision
)
moondream_model.eval()
# --- YOLOS --- #
yolos_id = "hustvl/yolos-small-300"
yolos_processor: YolosImageProcessor = YolosImageProcessor.from_pretrained(yolos_id)
yolos_model: YolosForObjectDetection = YolosForObjectDetection.from_pretrained(yolos_id)
def answer_question(img, prompt):
"""
Submits an image and prompt to the Moondream model.
:param img:
:param prompt:
:return: yields the output buffer string
"""
image_embeds = moondream_model.encode_image(img)
streamer = TextIteratorStreamer(moondream_tokenizer, skip_special_tokens=True)
thread = Thread(
target=moondream_model.answer_question,
kwargs={
"image_embeds": image_embeds,
"question": prompt,
"tokenizer": moondream_tokenizer,
"streamer": streamer,
},
)
thread.start()
buffer = ""
for new_text in streamer:
buffer += new_text
yield buffer.strip()
def detect_objects(img: Image.Image):
inputs = yolos_processor(img, return_tensors="pt")
outputs = yolos_model(**inputs)
target_sizes = torch.tensor([tuple(reversed(img.size))])
results = yolos_processor.post_process_object_detection(outputs, threshold=0.7, target_sizes=target_sizes)[0]
box_images = []
for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
box = [round(i, 2) for i in box.tolist()]
print(
f"Detected {yolos_model.config.id2label[label.item()]} with confidence "
f"{round(score.item(), 3)} at location {box}"
)
box_images.append((
img.crop((box[0], box[1], box[2], box[3])),
f"{yolos_model.config.id2label[label.item()]} ({round(score.item(), 3)})")
)
return box_images
def gallery_selected(evt: gr.SelectData):
"""
Listener for the gallery selection event.
:return: index of the currently selected image
"""
print(f"Index: {evt.index}, Value: {evt.value}, Selected: {evt.selected}")
return evt.index, gr.Button(interactive=evt.selected)
if __name__ == "__main__":
with gr.Blocks() as app:
gr.Markdown(
"""
# Food Identifier
Final project for IAT 481 at Simon Fraser University, Spring 2024.
"""
)
with gr.Tab("Object Detection"):
with gr.Row():
with gr.Column():
yolos_input = gr.Image(type="pil")
yolos_submit = gr.Button("Submit")
yolos_output = gr.Gallery(label="Detected Objects", object_fit="scale-down", columns=3, scale=1,
show_share_button=False, selected_index=None, allow_preview=False)
with gr.Row():
yolos_selected = gr.TextArea(label="Selected Image Index")
proceed_button = gr.Button("To Moondream", interactive=False)
with gr.Tab("Inference"):
with gr.Row():
moon_prompt = gr.Textbox(label="Input", value="Describe this image.")
moon_submit = gr.Button("Submit")
with gr.Row():
moon_img = gr.Image(label="Image", type="pil")
moon_output = gr.TextArea(label="Output")
# --- YOLOS --- #
yolos_submit.click(detect_objects, [yolos_input], yolos_output)
yolos_output.select(gallery_selected, None, [yolos_selected, proceed_button])
# --- Moondream --- #
moon_submit.click(answer_question, [moon_img, moon_prompt], moon_output)
app.queue().launch()
|