qwen2.5-VL-api

Running

danilohssantana commited on Feb 19

Commit

98d8559

1 Parent(s): 2044733

fixing parameters

Files changed (2) hide show

.DS_Store CHANGED Viewed

Binary files a/.DS_Store and b/.DS_Store differ

main.py CHANGED Viewed

@@ -118,21 +118,28 @@ def predict(data: PredictRequest):
     Generates a description for an image using the Qwen-2-VL model.
     Args:
-        data (any): The encoded image and the prompt to be used.
-        prompt (str): The text prompt to guide the model's response.
     Returns:
-        str: The generated description of the image.
     """
-    # Create the input message structure
     messages = [
         {
             "role": "user",
             "content": [
-                {"type": "image", "image": f"data:image;base64,{data.image_base64}"},
-                {"type": "text", "text": data.prompt},
-            ],
         }
     ]

     Generates a description for an image using the Qwen-2-VL model.
     Args:
+        data (PredictRequest): The request containing encoded images and a prompt.
     Returns:
+        dict: The generated description of the image(s).
     """
+    # Ensure image_base64 is a list (even if a single image is provided)
+    image_list = (
+        data.image_base64
+        if isinstance(data.image_base64, list)
+        else [data.image_base64]
+    )
+    # Create the input message structure with multiple images
     messages = [
         {
             "role": "user",
             "content": [
+                {"type": "image", "image": f"data:image;base64,{image}"}
+                for image in image_list
+            ]
+            + [{"type": "text", "text": data.prompt}],
         }
     ]