Spaces:
Running
Running
Commit
·
dbcff35
1
Parent(s):
1af9e28
new model
Browse files
.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
main.py
CHANGED
@@ -18,13 +18,28 @@ class PredictRequest(BaseModel):
|
|
18 |
image_base64: str
|
19 |
prompt: str
|
20 |
|
21 |
-
checkpoint = "Qwen/Qwen2-VL-2B-Instruct"
|
22 |
-
min_pixels = 256 * 28 * 28
|
23 |
-
max_pixels = 1280 * 28 * 28
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
processor = AutoProcessor.from_pretrained(
|
25 |
-
checkpoint,
|
|
|
|
|
26 |
)
|
27 |
-
model =
|
28 |
checkpoint,
|
29 |
torch_dtype=torch.bfloat16,
|
30 |
device_map="auto",
|
@@ -108,15 +123,27 @@ def predict(data: PredictRequest):
|
|
108 |
|
109 |
|
110 |
# Create the input message structure
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
messages = [
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
}
|
119 |
-
|
|
|
|
|
120 |
|
121 |
# Prepare inputs for the model
|
122 |
text = processor.apply_chat_template(
|
|
|
18 |
image_base64: str
|
19 |
prompt: str
|
20 |
|
21 |
+
# checkpoint = "Qwen/Qwen2-VL-2B-Instruct"
|
22 |
+
# min_pixels = 256 * 28 * 28
|
23 |
+
# max_pixels = 1280 * 28 * 28
|
24 |
+
# processor = AutoProcessor.from_pretrained(
|
25 |
+
# checkpoint, min_pixels=min_pixels, max_pixels=max_pixels
|
26 |
+
# )
|
27 |
+
# model = Qwen2VLForConditionalGeneration.from_pretrained(
|
28 |
+
# checkpoint,
|
29 |
+
# torch_dtype=torch.bfloat16,
|
30 |
+
# device_map="auto",
|
31 |
+
# # attn_implementation="flash_attention_2",
|
32 |
+
# )
|
33 |
+
|
34 |
+
checkpoint = "Qwen/Qwen2.5-VL-3B-Instruct"
|
35 |
+
min_pixels = 256*28*28
|
36 |
+
max_pixels = 1280*28*28
|
37 |
processor = AutoProcessor.from_pretrained(
|
38 |
+
checkpoint,
|
39 |
+
min_pixels=min_pixels,
|
40 |
+
max_pixels=max_pixels
|
41 |
)
|
42 |
+
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
43 |
checkpoint,
|
44 |
torch_dtype=torch.bfloat16,
|
45 |
device_map="auto",
|
|
|
123 |
|
124 |
|
125 |
# Create the input message structure
|
126 |
+
# messages = [
|
127 |
+
# {
|
128 |
+
# "role": "user",
|
129 |
+
# "content": [
|
130 |
+
# {"type": "image", "image": f"data:image;base64,{data.image_base64}"},
|
131 |
+
# {"type": "text", "text": data.prompt},
|
132 |
+
# ],
|
133 |
+
# }
|
134 |
+
# ]
|
135 |
+
|
136 |
messages = [
|
137 |
+
{"role": "system", "content": "You are a helpful assistant with vision abilities."},
|
138 |
+
{
|
139 |
+
"role": "user",
|
140 |
+
"content": [
|
141 |
+
{"type": "image", "image": image} for image in data.image_base64
|
142 |
+
]
|
143 |
+
+ [{"type": "text", "text": data.prompt}],
|
144 |
+
},
|
145 |
+
]
|
146 |
+
|
147 |
|
148 |
# Prepare inputs for the model
|
149 |
text = processor.apply_chat_template(
|