qwen2.5-VL-api

Running

App Files Files Community

danilohssantana commited on Feb 19

Commit

d0e2871

1 Parent(s): 263b331

adding logs and time

Browse files

Files changed (1) hide show

main.py +8 -76

main.py CHANGED Viewed

@@ -1,4 +1,6 @@
 import base64
 from io import BytesIO
 import torch
@@ -53,37 +55,6 @@ def read_root():
     return {"message": "API is live. Use the /predict endpoint."}
-# def encode_image(image_path, max_size=(800, 800), quality=85):
-#     """
-#     Converts an image from a local file path to a Base64-encoded string with optimized size.
-#     Args:
-#         image_path (str): The path to the image file.
-#         max_size (tuple): The maximum width and height of the resized image.
-#         quality (int): The compression quality (1-100, higher means better quality but bigger size).
-#     Returns:
-#         str: Base64-encoded representation of the optimized image.
-#     """
-#     try:
-#         with Image.open(image_path) as img:
-#             # Convert to RGB (avoid issues with PNG transparency)
-#             img = img.convert("RGB")
-#             # Resize while maintaining aspect ratio
-#             img.thumbnail(max_size, Image.LANCZOS)
-#             # Save to buffer with compression
-#             buffer = BytesIO()
-#             img.save(
-#                 buffer, format="JPEG", quality=quality
-#             )  # Save as JPEG to reduce size
-#             return base64.b64encode(buffer.getvalue()).decode("utf-8")
-#     except Exception as e:
-#         print(f"❌ Error encoding image {image_path}: {e}")
-#         return None
 def encode_image(image_data: BytesIO, max_size=(800, 800), quality=85):
     """
     Converts an image from file data to a Base64-encoded string with optimized size.
@@ -124,7 +95,7 @@ def predict(data: PredictRequest):
         dict: The generated description of the image(s).
     """
-    print("Calling /predict endpoint...")
     # Ensure image_base64 is a list (even if a single image is provided)
     image_list = (
@@ -158,7 +129,9 @@ def predict(data: PredictRequest):
         return_tensors="pt",
     ).to(model.device)
-    print("Starting generation...")
     # Generate the output
     generated_ids = model.generate(**inputs, max_new_tokens=2056)
@@ -172,47 +145,6 @@ def predict(data: PredictRequest):
         clean_up_tokenization_spaces=False,
     )
-    return {"response": output_text[0] if output_text else "No description generated."}
-# @app.get("/predict")
-# def predict(image_url: str = Query(...), prompt: str = Query(...)):
-#     image = encode_image(image_url)
-#     messages = [
-#         {
-#             "role": "system",
-#             "content": "You are a helpful assistant with vision abilities.",
-#         },
-#         {
-#             "role": "user",
-#             "content": [
-#                 {"type": "image", "image": f"data:image;base64,{image}"},
-#                 {"type": "text", "text": prompt},
-#             ],
-#         },
-#     ]
-#     text = processor.apply_chat_template(
-#         messages, tokenize=False, add_generation_prompt=True
-#     )
-#     image_inputs, video_inputs = process_vision_info(messages)
-#     inputs = processor(
-#         text=[text],
-#         images=image_inputs,
-#         videos=video_inputs,
-#         padding=True,
-#         return_tensors="pt",
-#     ).to(model.device)
-#     with torch.no_grad():
-#         generated_ids = model.generate(**inputs, max_new_tokens=128)
-#     generated_ids_trimmed = [
-#         out_ids[len(in_ids) :]
-#         for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
-#     ]
-#     output_texts = processor.batch_decode(
-#         generated_ids_trimmed,
-#         skip_special_tokens=True,
-#         clean_up_tokenization_spaces=False,
-#     )
-#     return {"response": output_texts[0]}

 import base64
+import logging
+import time
 from io import BytesIO
 import torch
     return {"message": "API is live. Use the /predict endpoint."}
 def encode_image(image_data: BytesIO, max_size=(800, 800), quality=85):
     """
     Converts an image from file data to a Base64-encoded string with optimized size.
         dict: The generated description of the image(s).
     """
+    logging.warning("Calling /predict endpoint...")
     # Ensure image_base64 is a list (even if a single image is provided)
     image_list = (
         return_tensors="pt",
     ).to(model.device)
+    logging.warning("Starting generation...")
+    start_time = time.time()
     # Generate the output
     generated_ids = model.generate(**inputs, max_new_tokens=2056)
         clean_up_tokenization_spaces=False,
     )
+    logging.warning(f"Generation completed in {time.time() - start_time:.2f}s.")
+    return {"response": output_text[0] if output_text else "No description generated."}