Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,036 Bytes
6a8ca1f 3068721 6a8ca1f b1e9279 733bfde eb25e53 6a8ca1f b8e5afc 6a8ca1f 3068721 733bfde 6a8ca1f 27f8b5a 04fc1f1 ee5e19e 134e8f7 b8e5afc aae971d 3b88725 b8e5afc db2ea29 b8e5afc 3f71d24 680cfd1 3f71d24 b8e5afc 07b2bd0 3f71d24 b8e5afc 9188a89 134e8f7 6a8ca1f ee5e19e 471f9af 07b2bd0 471f9af 07b2bd0 6a8ca1f 1635aec 69cfbe8 e9ecb71 69cfbe8 6a8ca1f 1635aec 07b2bd0 6a8ca1f b1e9279 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import spaces
import torch
import re
import gradio as gr
from threading import Thread
from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
from PIL import ImageDraw
from torchvision.transforms.v2 import Resize
import subprocess
#subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
#subprocess.run('cp -r moondream/torch clients/python/moondream/torch')
#subprocess.run('pip install moondream[gpu]')
model_id = "vikhyatk/moondream2"
revision = "2025-01-09"
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
moondream = AutoModelForCausalLM.from_pretrained(
model_id, trust_remote_code=True, revision=revision,
torch_dtype=torch.bfloat16, device_map={"": "cuda"},
)
moondream.eval()
@spaces.GPU(durtion="150")
def answer_questions(image_tuples, prompt_text):
result = ""
Q_and_A = ""
prompts = [p.strip() for p in prompt_text.split(',')]
image_embeds = [img[0] for img in image_tuples if img[0] is not None]
answers = []
for prompt in prompts:
answers.append(moondream.batch_answer(
images=[img.convert("RGB") for img in image_embeds],
prompts=[prompt] * len(image_embeds),
tokenizer=tokenizer
))
for i, prompt in enumerate(prompts):
Q_and_A += f"### Q: {prompt}\n"
for j, image_tuple in enumerate(image_tuples):
image_name = f"image{j+1}"
answer_text = answers[i][j]
Q_and_A += f"**{image_name} A:** \n {answer_text} \n"
result = {'headers': prompts, 'data': answers}
#print("result\n{}\n\nQ_and_A\n{}\n\n".format(result, Q_and_A))
return Q_and_A, result
with gr.Blocks() as demo:
gr.Markdown("# moondream2 unofficial batch processing demo")
gr.Markdown("1. Select images\n2. Enter one or more prompts separated by commas. Ex: Describe this image, What is in this image?\n\n")
gr.Markdown("**Currently each image will be sent as a batch with the prompts thus asking each prompt on each image**")
gr.Markdown("*Running on free CPU space tier currently so results may take a bit to process compared to duplicating space and using GPU space hardware*")
gr.Markdown("A tiny vision language model. [moondream2](https://huggingface.co/vikhyatk/moondream2)")
with gr.Row():
img = gr.Gallery(label="Upload Images", type="pil", preview=True, columns=4)
with gr.Row():
prompt = gr.Textbox(label="Input Prompts", placeholder="Enter prompts (one prompt for each image provided) separated by commas. Ex: Describe this image, What is in this image?", lines=8)
with gr.Row():
submit = gr.Button("Submit")
with gr.Row():
output = gr.Markdown(label="Questions and Answers", line_breaks=True)
with gr.Row():
output2 = gr.Dataframe(label="Structured Dataframe", type="array", wrap=True)
submit.click(answer_questions, inputs=[img, prompt], outputs=[output, output2])
demo.queue().launch()
|