danilohssantana commited on
Commit
d0e2871
·
1 Parent(s): 263b331

adding logs and time

Browse files
Files changed (1) hide show
  1. main.py +8 -76
main.py CHANGED
@@ -1,4 +1,6 @@
1
  import base64
 
 
2
  from io import BytesIO
3
 
4
  import torch
@@ -53,37 +55,6 @@ def read_root():
53
  return {"message": "API is live. Use the /predict endpoint."}
54
 
55
 
56
- # def encode_image(image_path, max_size=(800, 800), quality=85):
57
- # """
58
- # Converts an image from a local file path to a Base64-encoded string with optimized size.
59
-
60
- # Args:
61
- # image_path (str): The path to the image file.
62
- # max_size (tuple): The maximum width and height of the resized image.
63
- # quality (int): The compression quality (1-100, higher means better quality but bigger size).
64
-
65
- # Returns:
66
- # str: Base64-encoded representation of the optimized image.
67
- # """
68
- # try:
69
- # with Image.open(image_path) as img:
70
- # # Convert to RGB (avoid issues with PNG transparency)
71
- # img = img.convert("RGB")
72
-
73
- # # Resize while maintaining aspect ratio
74
- # img.thumbnail(max_size, Image.LANCZOS)
75
-
76
- # # Save to buffer with compression
77
- # buffer = BytesIO()
78
- # img.save(
79
- # buffer, format="JPEG", quality=quality
80
- # ) # Save as JPEG to reduce size
81
- # return base64.b64encode(buffer.getvalue()).decode("utf-8")
82
- # except Exception as e:
83
- # print(f"❌ Error encoding image {image_path}: {e}")
84
- # return None
85
-
86
-
87
  def encode_image(image_data: BytesIO, max_size=(800, 800), quality=85):
88
  """
89
  Converts an image from file data to a Base64-encoded string with optimized size.
@@ -124,7 +95,7 @@ def predict(data: PredictRequest):
124
  dict: The generated description of the image(s).
125
  """
126
 
127
- print("Calling /predict endpoint...")
128
 
129
  # Ensure image_base64 is a list (even if a single image is provided)
130
  image_list = (
@@ -158,7 +129,9 @@ def predict(data: PredictRequest):
158
  return_tensors="pt",
159
  ).to(model.device)
160
 
161
- print("Starting generation...")
 
 
162
 
163
  # Generate the output
164
  generated_ids = model.generate(**inputs, max_new_tokens=2056)
@@ -172,47 +145,6 @@ def predict(data: PredictRequest):
172
  clean_up_tokenization_spaces=False,
173
  )
174
 
175
- return {"response": output_text[0] if output_text else "No description generated."}
176
 
177
-
178
- # @app.get("/predict")
179
- # def predict(image_url: str = Query(...), prompt: str = Query(...)):
180
-
181
- # image = encode_image(image_url)
182
-
183
- # messages = [
184
- # {
185
- # "role": "system",
186
- # "content": "You are a helpful assistant with vision abilities.",
187
- # },
188
- # {
189
- # "role": "user",
190
- # "content": [
191
- # {"type": "image", "image": f"data:image;base64,{image}"},
192
- # {"type": "text", "text": prompt},
193
- # ],
194
- # },
195
- # ]
196
- # text = processor.apply_chat_template(
197
- # messages, tokenize=False, add_generation_prompt=True
198
- # )
199
- # image_inputs, video_inputs = process_vision_info(messages)
200
- # inputs = processor(
201
- # text=[text],
202
- # images=image_inputs,
203
- # videos=video_inputs,
204
- # padding=True,
205
- # return_tensors="pt",
206
- # ).to(model.device)
207
- # with torch.no_grad():
208
- # generated_ids = model.generate(**inputs, max_new_tokens=128)
209
- # generated_ids_trimmed = [
210
- # out_ids[len(in_ids) :]
211
- # for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
212
- # ]
213
- # output_texts = processor.batch_decode(
214
- # generated_ids_trimmed,
215
- # skip_special_tokens=True,
216
- # clean_up_tokenization_spaces=False,
217
- # )
218
- # return {"response": output_texts[0]}
 
1
  import base64
2
+ import logging
3
+ import time
4
  from io import BytesIO
5
 
6
  import torch
 
55
  return {"message": "API is live. Use the /predict endpoint."}
56
 
57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  def encode_image(image_data: BytesIO, max_size=(800, 800), quality=85):
59
  """
60
  Converts an image from file data to a Base64-encoded string with optimized size.
 
95
  dict: The generated description of the image(s).
96
  """
97
 
98
+ logging.warning("Calling /predict endpoint...")
99
 
100
  # Ensure image_base64 is a list (even if a single image is provided)
101
  image_list = (
 
129
  return_tensors="pt",
130
  ).to(model.device)
131
 
132
+ logging.warning("Starting generation...")
133
+
134
+ start_time = time.time()
135
 
136
  # Generate the output
137
  generated_ids = model.generate(**inputs, max_new_tokens=2056)
 
145
  clean_up_tokenization_spaces=False,
146
  )
147
 
148
+ logging.warning(f"Generation completed in {time.time() - start_time:.2f}s.")
149
 
150
+ return {"response": output_text[0] if output_text else "No description generated."}