programmnix-askui commited on
Commit
0d32e06
Β·
1 Parent(s): ad97641
Files changed (2) hide show
  1. app.py +9 -25
  2. requirements.txt +1 -2
app.py CHANGED
@@ -8,8 +8,10 @@ from PIL import Image, ImageDraw
8
  from io import BytesIO
9
  import re
10
 
11
- from deepseek_vl.models import DeepseekVLV2Processor, DeepseekVLV2ForCausalLM
12
- from deepseek_vl.utils.io import load_pil_images
 
 
13
  from transformers import AutoModelForCausalLM
14
 
15
 
@@ -56,9 +58,8 @@ def rescale_bounding_boxes(bounding_boxes, original_width, original_height, scal
56
 
57
  def deepseek():
58
  print("helloe!!!!")
59
-
60
  # specify the path to the model
61
- model_path = "deepseek-ai/deepseek-vl2-small"
62
  vl_chat_processor: DeepseekVLV2Processor = DeepseekVLV2Processor.from_pretrained(model_path)
63
  tokenizer = vl_chat_processor.tokenizer
64
 
@@ -70,29 +71,11 @@ def deepseek():
70
  {
71
  "role": "<|User|>",
72
  "content": "<image>\n<|ref|>The giraffe at the back.<|/ref|>.",
73
- "images": ["./images/visual_grounding.jpeg"],
74
  },
75
  {"role": "<|Assistant|>", "content": ""},
76
  ]
77
 
78
- ## multiple images (or in-context learning) conversation example
79
- # conversation = [
80
- # {
81
- # "role": "User",
82
- # "content": "<image_placeholder>A dog wearing nothing in the foreground, "
83
- # "<image_placeholder>a dog wearing a santa hat, "
84
- # "<image_placeholder>a dog wearing a wizard outfit, and "
85
- # "<image_placeholder>what's the dog wearing?",
86
- # "images": [
87
- # "images/dog_a.png",
88
- # "images/dog_b.png",
89
- # "images/dog_c.png",
90
- # "images/dog_d.png",
91
- # ],
92
- # },
93
- # {"role": "Assistant", "content": ""}
94
- # ]
95
-
96
  # load images and prepare for inputs
97
  pil_images = load_pil_images(conversation)
98
  prepare_inputs = vl_chat_processor(
@@ -106,7 +89,7 @@ def deepseek():
106
  inputs_embeds = vl_gpt.prepare_inputs_embeds(**prepare_inputs)
107
 
108
  # run the model to get the response
109
- outputs = vl_gpt.language_model.generate(
110
  inputs_embeds=inputs_embeds,
111
  attention_mask=prepare_inputs.attention_mask,
112
  pad_token_id=tokenizer.eos_token_id,
@@ -117,9 +100,10 @@ def deepseek():
117
  use_cache=True
118
  )
119
 
120
- answer = tokenizer.decode(outputs[0].cpu().tolist(), skip_special_tokens=True)
121
  print(f"{prepare_inputs['sft_format'][0]}", answer)
122
 
 
123
  @spaces.GPU
124
  def run_example(image, text_input, model_id="OS-Copilot/OS-Atlas-Base-7B"):
125
 
 
8
  from io import BytesIO
9
  import re
10
 
11
+ from deepseek_vl2.models import DeepseekVLV2Processor, DeepseekVLV2ForCausalLM
12
+ from deepseek_vl2.utils.io import load_pil_images
13
+
14
+
15
  from transformers import AutoModelForCausalLM
16
 
17
 
 
58
 
59
  def deepseek():
60
  print("helloe!!!!")
 
61
  # specify the path to the model
62
+ model_path = "deepseek-ai/deepseek-vl2-tiny"
63
  vl_chat_processor: DeepseekVLV2Processor = DeepseekVLV2Processor.from_pretrained(model_path)
64
  tokenizer = vl_chat_processor.tokenizer
65
 
 
71
  {
72
  "role": "<|User|>",
73
  "content": "<image>\n<|ref|>The giraffe at the back.<|/ref|>.",
74
+ "images": ["./images/visual_grounding_1.jpeg"],
75
  },
76
  {"role": "<|Assistant|>", "content": ""},
77
  ]
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  # load images and prepare for inputs
80
  pil_images = load_pil_images(conversation)
81
  prepare_inputs = vl_chat_processor(
 
89
  inputs_embeds = vl_gpt.prepare_inputs_embeds(**prepare_inputs)
90
 
91
  # run the model to get the response
92
+ outputs = vl_gpt.language.generate(
93
  inputs_embeds=inputs_embeds,
94
  attention_mask=prepare_inputs.attention_mask,
95
  pad_token_id=tokenizer.eos_token_id,
 
100
  use_cache=True
101
  )
102
 
103
+ answer = tokenizer.decode(outputs[0].cpu().tolist(), skip_special_tokens=False)
104
  print(f"{prepare_inputs['sft_format'][0]}", answer)
105
 
106
+
107
  @spaces.GPU
108
  def run_example(image, text_input, model_id="OS-Copilot/OS-Atlas-Base-7B"):
109
 
requirements.txt CHANGED
@@ -6,5 +6,4 @@ torchvision
6
  transformers
7
  accelerate==0.30.0
8
  qwen-vl-utils
9
- deepseek-vl @ git+https://github.com/deepseek-ai/DeepSeek-VL.git
10
-
 
6
  transformers
7
  accelerate==0.30.0
8
  qwen-vl-utils
9
+ deepseek-vl2 @ git+https://github.com/deepseek-ai/DeepSeek-VL2.git