Spaces:
Runtime error
Runtime error
Commit
·
d0e2871
1
Parent(s):
263b331
adding logs and time
Browse files
main.py
CHANGED
|
@@ -1,4 +1,6 @@
|
|
| 1 |
import base64
|
|
|
|
|
|
|
| 2 |
from io import BytesIO
|
| 3 |
|
| 4 |
import torch
|
|
@@ -53,37 +55,6 @@ def read_root():
|
|
| 53 |
return {"message": "API is live. Use the /predict endpoint."}
|
| 54 |
|
| 55 |
|
| 56 |
-
# def encode_image(image_path, max_size=(800, 800), quality=85):
|
| 57 |
-
# """
|
| 58 |
-
# Converts an image from a local file path to a Base64-encoded string with optimized size.
|
| 59 |
-
|
| 60 |
-
# Args:
|
| 61 |
-
# image_path (str): The path to the image file.
|
| 62 |
-
# max_size (tuple): The maximum width and height of the resized image.
|
| 63 |
-
# quality (int): The compression quality (1-100, higher means better quality but bigger size).
|
| 64 |
-
|
| 65 |
-
# Returns:
|
| 66 |
-
# str: Base64-encoded representation of the optimized image.
|
| 67 |
-
# """
|
| 68 |
-
# try:
|
| 69 |
-
# with Image.open(image_path) as img:
|
| 70 |
-
# # Convert to RGB (avoid issues with PNG transparency)
|
| 71 |
-
# img = img.convert("RGB")
|
| 72 |
-
|
| 73 |
-
# # Resize while maintaining aspect ratio
|
| 74 |
-
# img.thumbnail(max_size, Image.LANCZOS)
|
| 75 |
-
|
| 76 |
-
# # Save to buffer with compression
|
| 77 |
-
# buffer = BytesIO()
|
| 78 |
-
# img.save(
|
| 79 |
-
# buffer, format="JPEG", quality=quality
|
| 80 |
-
# ) # Save as JPEG to reduce size
|
| 81 |
-
# return base64.b64encode(buffer.getvalue()).decode("utf-8")
|
| 82 |
-
# except Exception as e:
|
| 83 |
-
# print(f"❌ Error encoding image {image_path}: {e}")
|
| 84 |
-
# return None
|
| 85 |
-
|
| 86 |
-
|
| 87 |
def encode_image(image_data: BytesIO, max_size=(800, 800), quality=85):
|
| 88 |
"""
|
| 89 |
Converts an image from file data to a Base64-encoded string with optimized size.
|
|
@@ -124,7 +95,7 @@ def predict(data: PredictRequest):
|
|
| 124 |
dict: The generated description of the image(s).
|
| 125 |
"""
|
| 126 |
|
| 127 |
-
|
| 128 |
|
| 129 |
# Ensure image_base64 is a list (even if a single image is provided)
|
| 130 |
image_list = (
|
|
@@ -158,7 +129,9 @@ def predict(data: PredictRequest):
|
|
| 158 |
return_tensors="pt",
|
| 159 |
).to(model.device)
|
| 160 |
|
| 161 |
-
|
|
|
|
|
|
|
| 162 |
|
| 163 |
# Generate the output
|
| 164 |
generated_ids = model.generate(**inputs, max_new_tokens=2056)
|
|
@@ -172,47 +145,6 @@ def predict(data: PredictRequest):
|
|
| 172 |
clean_up_tokenization_spaces=False,
|
| 173 |
)
|
| 174 |
|
| 175 |
-
|
| 176 |
|
| 177 |
-
|
| 178 |
-
# @app.get("/predict")
|
| 179 |
-
# def predict(image_url: str = Query(...), prompt: str = Query(...)):
|
| 180 |
-
|
| 181 |
-
# image = encode_image(image_url)
|
| 182 |
-
|
| 183 |
-
# messages = [
|
| 184 |
-
# {
|
| 185 |
-
# "role": "system",
|
| 186 |
-
# "content": "You are a helpful assistant with vision abilities.",
|
| 187 |
-
# },
|
| 188 |
-
# {
|
| 189 |
-
# "role": "user",
|
| 190 |
-
# "content": [
|
| 191 |
-
# {"type": "image", "image": f"data:image;base64,{image}"},
|
| 192 |
-
# {"type": "text", "text": prompt},
|
| 193 |
-
# ],
|
| 194 |
-
# },
|
| 195 |
-
# ]
|
| 196 |
-
# text = processor.apply_chat_template(
|
| 197 |
-
# messages, tokenize=False, add_generation_prompt=True
|
| 198 |
-
# )
|
| 199 |
-
# image_inputs, video_inputs = process_vision_info(messages)
|
| 200 |
-
# inputs = processor(
|
| 201 |
-
# text=[text],
|
| 202 |
-
# images=image_inputs,
|
| 203 |
-
# videos=video_inputs,
|
| 204 |
-
# padding=True,
|
| 205 |
-
# return_tensors="pt",
|
| 206 |
-
# ).to(model.device)
|
| 207 |
-
# with torch.no_grad():
|
| 208 |
-
# generated_ids = model.generate(**inputs, max_new_tokens=128)
|
| 209 |
-
# generated_ids_trimmed = [
|
| 210 |
-
# out_ids[len(in_ids) :]
|
| 211 |
-
# for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
|
| 212 |
-
# ]
|
| 213 |
-
# output_texts = processor.batch_decode(
|
| 214 |
-
# generated_ids_trimmed,
|
| 215 |
-
# skip_special_tokens=True,
|
| 216 |
-
# clean_up_tokenization_spaces=False,
|
| 217 |
-
# )
|
| 218 |
-
# return {"response": output_texts[0]}
|
|
|
|
| 1 |
import base64
|
| 2 |
+
import logging
|
| 3 |
+
import time
|
| 4 |
from io import BytesIO
|
| 5 |
|
| 6 |
import torch
|
|
|
|
| 55 |
return {"message": "API is live. Use the /predict endpoint."}
|
| 56 |
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
def encode_image(image_data: BytesIO, max_size=(800, 800), quality=85):
|
| 59 |
"""
|
| 60 |
Converts an image from file data to a Base64-encoded string with optimized size.
|
|
|
|
| 95 |
dict: The generated description of the image(s).
|
| 96 |
"""
|
| 97 |
|
| 98 |
+
logging.warning("Calling /predict endpoint...")
|
| 99 |
|
| 100 |
# Ensure image_base64 is a list (even if a single image is provided)
|
| 101 |
image_list = (
|
|
|
|
| 129 |
return_tensors="pt",
|
| 130 |
).to(model.device)
|
| 131 |
|
| 132 |
+
logging.warning("Starting generation...")
|
| 133 |
+
|
| 134 |
+
start_time = time.time()
|
| 135 |
|
| 136 |
# Generate the output
|
| 137 |
generated_ids = model.generate(**inputs, max_new_tokens=2056)
|
|
|
|
| 145 |
clean_up_tokenization_spaces=False,
|
| 146 |
)
|
| 147 |
|
| 148 |
+
logging.warning(f"Generation completed in {time.time() - start_time:.2f}s.")
|
| 149 |
|
| 150 |
+
return {"response": output_text[0] if output_text else "No description generated."}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|