qwen2.5-VL-api

Running

App Files Files Community

danilohssantana commited on Feb 18

Commit

dbcff35

1 Parent(s): 1af9e28

new model

Browse files

Files changed (2) hide show

.DS_Store +0 -0
main.py +40 -13

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

main.py CHANGED Viewed

@@ -18,13 +18,28 @@ class PredictRequest(BaseModel):
     image_base64: str
     prompt: str
-checkpoint = "Qwen/Qwen2-VL-2B-Instruct"
-min_pixels = 256 * 28 * 28
-max_pixels = 1280 * 28 * 28
 processor = AutoProcessor.from_pretrained(
-    checkpoint, min_pixels=min_pixels, max_pixels=max_pixels
 )
-model = Qwen2VLForConditionalGeneration.from_pretrained(
     checkpoint,
     torch_dtype=torch.bfloat16,
     device_map="auto",
@@ -108,15 +123,27 @@ def predict(data: PredictRequest):
     # Create the input message structure
     messages = [
-        {
-            "role": "user",
-            "content": [
-                {"type": "image", "image": f"data:image;base64,{data.image_base64}"},
-                {"type": "text", "text": data.prompt},
-            ],
-        }
-    ]
     # Prepare inputs for the model
     text = processor.apply_chat_template(

     image_base64: str
     prompt: str
+# checkpoint = "Qwen/Qwen2-VL-2B-Instruct"
+# min_pixels = 256 * 28 * 28
+# max_pixels = 1280 * 28 * 28
+# processor = AutoProcessor.from_pretrained(
+#     checkpoint, min_pixels=min_pixels, max_pixels=max_pixels
+# )
+# model = Qwen2VLForConditionalGeneration.from_pretrained(
+#     checkpoint,
+#     torch_dtype=torch.bfloat16,
+#     device_map="auto",
+#     # attn_implementation="flash_attention_2",
+# )
+checkpoint = "Qwen/Qwen2.5-VL-3B-Instruct"
+min_pixels = 256*28*28
+max_pixels = 1280*28*28
 processor = AutoProcessor.from_pretrained(
+    checkpoint,
+    min_pixels=min_pixels,
+    max_pixels=max_pixels
 )
+model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     checkpoint,
     torch_dtype=torch.bfloat16,
     device_map="auto",
     # Create the input message structure
+    # messages = [
+    #     {
+    #         "role": "user",
+    #         "content": [
+    #             {"type": "image", "image": f"data:image;base64,{data.image_base64}"},
+    #             {"type": "text", "text": data.prompt},
+    #         ],
+    #     }
+    # ]
     messages = [
+    {"role": "system", "content": "You are a helpful assistant with vision abilities."},
+    {
+        "role": "user",
+        "content": [
+            {"type": "image", "image": image} for image in data.image_base64
+        ]
+        + [{"type": "text", "text": data.prompt}],
+    },
+]
     # Prepare inputs for the model
     text = processor.apply_chat_template(