danilohssantana commited on
Commit
dbcff35
·
1 Parent(s): 1af9e28
Files changed (2) hide show
  1. .DS_Store +0 -0
  2. main.py +40 -13
.DS_Store ADDED
Binary file (6.15 kB). View file
 
main.py CHANGED
@@ -18,13 +18,28 @@ class PredictRequest(BaseModel):
18
  image_base64: str
19
  prompt: str
20
 
21
- checkpoint = "Qwen/Qwen2-VL-2B-Instruct"
22
- min_pixels = 256 * 28 * 28
23
- max_pixels = 1280 * 28 * 28
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  processor = AutoProcessor.from_pretrained(
25
- checkpoint, min_pixels=min_pixels, max_pixels=max_pixels
 
 
26
  )
27
- model = Qwen2VLForConditionalGeneration.from_pretrained(
28
  checkpoint,
29
  torch_dtype=torch.bfloat16,
30
  device_map="auto",
@@ -108,15 +123,27 @@ def predict(data: PredictRequest):
108
 
109
 
110
  # Create the input message structure
 
 
 
 
 
 
 
 
 
 
111
  messages = [
112
- {
113
- "role": "user",
114
- "content": [
115
- {"type": "image", "image": f"data:image;base64,{data.image_base64}"},
116
- {"type": "text", "text": data.prompt},
117
- ],
118
- }
119
- ]
 
 
120
 
121
  # Prepare inputs for the model
122
  text = processor.apply_chat_template(
 
18
  image_base64: str
19
  prompt: str
20
 
21
+ # checkpoint = "Qwen/Qwen2-VL-2B-Instruct"
22
+ # min_pixels = 256 * 28 * 28
23
+ # max_pixels = 1280 * 28 * 28
24
+ # processor = AutoProcessor.from_pretrained(
25
+ # checkpoint, min_pixels=min_pixels, max_pixels=max_pixels
26
+ # )
27
+ # model = Qwen2VLForConditionalGeneration.from_pretrained(
28
+ # checkpoint,
29
+ # torch_dtype=torch.bfloat16,
30
+ # device_map="auto",
31
+ # # attn_implementation="flash_attention_2",
32
+ # )
33
+
34
+ checkpoint = "Qwen/Qwen2.5-VL-3B-Instruct"
35
+ min_pixels = 256*28*28
36
+ max_pixels = 1280*28*28
37
  processor = AutoProcessor.from_pretrained(
38
+ checkpoint,
39
+ min_pixels=min_pixels,
40
+ max_pixels=max_pixels
41
  )
42
+ model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
43
  checkpoint,
44
  torch_dtype=torch.bfloat16,
45
  device_map="auto",
 
123
 
124
 
125
  # Create the input message structure
126
+ # messages = [
127
+ # {
128
+ # "role": "user",
129
+ # "content": [
130
+ # {"type": "image", "image": f"data:image;base64,{data.image_base64}"},
131
+ # {"type": "text", "text": data.prompt},
132
+ # ],
133
+ # }
134
+ # ]
135
+
136
  messages = [
137
+ {"role": "system", "content": "You are a helpful assistant with vision abilities."},
138
+ {
139
+ "role": "user",
140
+ "content": [
141
+ {"type": "image", "image": image} for image in data.image_base64
142
+ ]
143
+ + [{"type": "text", "text": data.prompt}],
144
+ },
145
+ ]
146
+
147
 
148
  # Prepare inputs for the model
149
  text = processor.apply_chat_template(