Csplk commited on
Commit
b8e5afc
ยท
verified ยท
1 Parent(s): c389d1e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -23
app.py CHANGED
@@ -6,28 +6,18 @@ from threading import Thread
6
  from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
7
  from PIL import ImageDraw
8
  from torchvision.transforms.v2 import Resize
9
-
10
  import subprocess
11
- subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
12
 
13
- mooondream = AutoModelForCausalLM.from_pretrained(
14
- "vikhyatk/moondream2",
15
- revision="2025-01-09",
16
- trust_remote_code=True,
17
- device_map={"": "cuda"},
18
- #attn_implementation="flash_attention_2"
19
- )
20
 
21
- '''
22
  model_id = "vikhyatk/moondream2"
23
- revision = "2024-08-26"
24
  tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
25
  moondream = AutoModelForCausalLM.from_pretrained(
26
  model_id, trust_remote_code=True, revision=revision,
27
  torch_dtype=torch.bfloat16, device_map={"": "cuda"},
28
  attn_implementation="flash_attention_2"
29
  )
30
- '''
31
 
32
  moondream.eval()
33
 
@@ -35,26 +25,25 @@ moondream.eval()
35
  def answer_questions(image_tuples, prompt_text):
36
  result = ""
37
  Q_and_A = ""
38
- prompts = [p.strip() for p in prompt_text.split(',')]
39
  image_embeds = [img[0] for img in image_tuples if img[0] is not None]
40
  answers = []
41
-
42
  for prompt in prompts:
43
- thread = Thread(target=lambda: answers.append(moondream.batch_answer(
44
- images=[img.convert("RGB") for img in image_embeds],
45
- prompts=[prompt] * len(image_embeds),
46
- tokenizer=tokenizer)))
47
- thread.start()
48
- thread.join()
49
-
50
  for i, prompt in enumerate(prompts):
51
  Q_and_A += f"### Q: {prompt}\n"
52
  for j, image_tuple in enumerate(image_tuples):
53
  image_name = f"image{j+1}"
54
- answer_text = answers[i][j]
55
  Q_and_A += f"**{image_name} A:** \n {answer_text} \n"
56
 
57
- result = {'headers': prompts, 'data': answers}
58
  print("result\n{}\n\nQ_and_A\n{}\n\n".format(result, Q_and_A))
59
  return Q_and_A, result
60
 
 
6
  from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
7
  from PIL import ImageDraw
8
  from torchvision.transforms.v2 import Resize
 
9
  import subprocess
 
10
 
11
+ subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
 
 
 
 
 
 
12
 
 
13
  model_id = "vikhyatk/moondream2"
14
+ revision = "2025-01-09"
15
  tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
16
  moondream = AutoModelForCausalLM.from_pretrained(
17
  model_id, trust_remote_code=True, revision=revision,
18
  torch_dtype=torch.bfloat16, device_map={"": "cuda"},
19
  attn_implementation="flash_attention_2"
20
  )
 
21
 
22
  moondream.eval()
23
 
 
25
  def answer_questions(image_tuples, prompt_text):
26
  result = ""
27
  Q_and_A = ""
28
+ prompts = [p.strip() for p in prompt_text.split(',')]
29
  image_embeds = [img[0] for img in image_tuples if img[0] is not None]
30
  answers = []
31
+
32
  for prompt in prompts:
33
+ answers.append(moondream.batch_answer(
34
+ images=[img.convert("RGB") for img in image_embeds],
35
+ prompts=[prompt] * len(image_embeds),
36
+ tokenizer=tokenizer
37
+ ))
38
+
 
39
  for i, prompt in enumerate(prompts):
40
  Q_and_A += f"### Q: {prompt}\n"
41
  for j, image_tuple in enumerate(image_tuples):
42
  image_name = f"image{j+1}"
43
+ answer_text = answers[i][j]
44
  Q_and_A += f"**{image_name} A:** \n {answer_text} \n"
45
 
46
+ result = {'headers': prompts, 'data': answers}
47
  print("result\n{}\n\nQ_and_A\n{}\n\n".format(result, Q_and_A))
48
  return Q_and_A, result
49