moondream2-batch-processing

Running on Zero

App Files Files Community

Csplk commited on Jan 10

Commit

b8e5afc

verified ·

1 Parent(s): c389d1e

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -23

app.py CHANGED Viewed

@@ -6,28 +6,18 @@ from threading import Thread
 from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
 from PIL import ImageDraw
 from torchvision.transforms.v2 import Resize
 import subprocess
-subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
-mooondream = AutoModelForCausalLM.from_pretrained(
-    "vikhyatk/moondream2",
-    revision="2025-01-09",
-    trust_remote_code=True,
-    device_map={"": "cuda"},
-    #attn_implementation="flash_attention_2"
-)
-'''
 model_id = "vikhyatk/moondream2"
-revision = "2024-08-26"
 tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
 moondream = AutoModelForCausalLM.from_pretrained(
     model_id, trust_remote_code=True, revision=revision,
     torch_dtype=torch.bfloat16, device_map={"": "cuda"},
     attn_implementation="flash_attention_2"
 )
-'''
 moondream.eval()
@@ -35,26 +25,25 @@ moondream.eval()
 def answer_questions(image_tuples, prompt_text):
     result = ""
     Q_and_A = ""
-    prompts = [p.strip() for p in prompt_text.split(',')]
     image_embeds = [img[0] for img in image_tuples if img[0] is not None]
     answers = []
     for prompt in prompts:
-        thread = Thread(target=lambda: answers.append(moondream.batch_answer(
-                images=[img.convert("RGB") for img in image_embeds],
-                prompts=[prompt] * len(image_embeds),
-                tokenizer=tokenizer)))
-        thread.start()
-        thread.join()
     for i, prompt in enumerate(prompts):
         Q_and_A += f"### Q: {prompt}\n"
         for j, image_tuple in enumerate(image_tuples):
             image_name = f"image{j+1}"
-            answer_text = answers[i][j]
             Q_and_A += f"**{image_name} A:** \n {answer_text} \n"
-    result = {'headers': prompts, 'data': answers}
     print("result\n{}\n\nQ_and_A\n{}\n\n".format(result, Q_and_A))
     return Q_and_A, result

 from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
 from PIL import ImageDraw
 from torchvision.transforms.v2 import Resize
 import subprocess
+subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
 model_id = "vikhyatk/moondream2"
+revision = "2025-01-09"
 tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
 moondream = AutoModelForCausalLM.from_pretrained(
     model_id, trust_remote_code=True, revision=revision,
     torch_dtype=torch.bfloat16, device_map={"": "cuda"},
     attn_implementation="flash_attention_2"
 )
 moondream.eval()
 def answer_questions(image_tuples, prompt_text):
     result = ""
     Q_and_A = ""
+    prompts = [p.strip() for p in prompt_text.split(',')]
     image_embeds = [img[0] for img in image_tuples if img[0] is not None]
     answers = []
     for prompt in prompts:
+        answers.append(moondream.batch_answer(
+            images=[img.convert("RGB") for img in image_embeds],
+            prompts=[prompt] * len(image_embeds),
+            tokenizer=tokenizer
+        ))
     for i, prompt in enumerate(prompts):
         Q_and_A += f"### Q: {prompt}\n"
         for j, image_tuple in enumerate(image_tuples):
             image_name = f"image{j+1}"
+            answer_text = answers[i][j]
             Q_and_A += f"**{image_name} A:** \n {answer_text} \n"
+    result = {'headers': prompts, 'data': answers}
     print("result\n{}\n\nQ_and_A\n{}\n\n".format(result, Q_and_A))
     return Q_and_A, result