0xnewton-superlore
commited on
Commit
·
426785e
1
Parent(s):
d318463
nits throw on bad request
Browse files- handler.py +51 -6
handler.py
CHANGED
@@ -1,11 +1,15 @@
|
|
1 |
import base64
|
2 |
-
import io
|
3 |
import torch
|
4 |
from typing import Dict, List, Any
|
|
|
5 |
from transformers import CLIPProcessor, CLIPModel
|
6 |
from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
|
7 |
from PIL import Image
|
8 |
from torch.nn.functional import cosine_similarity
|
|
|
|
|
|
|
|
|
9 |
|
10 |
class EndpointHandler():
|
11 |
def __init__(self, path: str="", image_size: int=224) -> None:
|
@@ -34,7 +38,7 @@ class EndpointHandler():
|
|
34 |
data (Dict[str, Any]): A dictionary containing the following key:
|
35 |
- "inputs" (Dict[str, list]): A dictionary containing the following keys:
|
36 |
- "image_list" (List[str]): A list of base64-encoded images.
|
37 |
-
- "text_list" (List[str]): A list of text strings.
|
38 |
|
39 |
Returns:
|
40 |
Dict[str, list]: A dictionary containing the following keys:
|
@@ -43,10 +47,37 @@ class EndpointHandler():
|
|
43 |
- "similarity_scores" (List[List[float]]): A list of similarity scores between image and text embeddings.
|
44 |
Empty if either "image_list" or "text_list" is empty.
|
45 |
"""
|
|
|
|
|
|
|
46 |
inputs = data.get("inputs", {})
|
|
|
|
|
|
|
|
|
47 |
image_list = inputs.get("image_list", []) # list of b64 images
|
48 |
-
text_list = inputs.get("text_list", []) # list of texts
|
49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
image_features = self.get_image_embeddings(image_list) if len(image_list) > 0 else None
|
51 |
text_features = self.get_text_embeddings(text_list) if len(text_list) > 0 else None
|
52 |
|
@@ -68,7 +99,7 @@ class EndpointHandler():
|
|
68 |
for base64_image in base64_images:
|
69 |
# Decode the base64-encoded image and convert it to an RGB image
|
70 |
image_data = base64.b64decode(base64_image)
|
71 |
-
image = Image.open(
|
72 |
preprocessed_image = self.image_transform(image).unsqueeze(0)
|
73 |
preprocessed_images.append(preprocessed_image)
|
74 |
|
@@ -83,7 +114,7 @@ class EndpointHandler():
|
|
83 |
|
84 |
return image_features
|
85 |
|
86 |
-
def get_text_embeddings(self, text_list: List[str]) -> torch.Tensor:
|
87 |
with torch.no_grad():
|
88 |
# Tokenize the input text list
|
89 |
input_tokens = self.processor(text_list, return_tensors="pt", padding=True, truncation=True)
|
@@ -93,4 +124,18 @@ class EndpointHandler():
|
|
93 |
text_features = self.model.get_text_features(**input_tokens)
|
94 |
return text_features
|
95 |
|
96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import base64
|
|
|
2 |
import torch
|
3 |
from typing import Dict, List, Any
|
4 |
+
from io import BytesIO
|
5 |
from transformers import CLIPProcessor, CLIPModel
|
6 |
from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
|
7 |
from PIL import Image
|
8 |
from torch.nn.functional import cosine_similarity
|
9 |
+
from typing import Union
|
10 |
+
|
11 |
+
max_text_list_length = 30
|
12 |
+
max_image_list_length = 20
|
13 |
|
14 |
class EndpointHandler():
|
15 |
def __init__(self, path: str="", image_size: int=224) -> None:
|
|
|
38 |
data (Dict[str, Any]): A dictionary containing the following key:
|
39 |
- "inputs" (Dict[str, list]): A dictionary containing the following keys:
|
40 |
- "image_list" (List[str]): A list of base64-encoded images.
|
41 |
+
- "text_list" (Union[List[str], str]): A list of text strings.
|
42 |
|
43 |
Returns:
|
44 |
Dict[str, list]: A dictionary containing the following keys:
|
|
|
47 |
- "similarity_scores" (List[List[float]]): A list of similarity scores between image and text embeddings.
|
48 |
Empty if either "image_list" or "text_list" is empty.
|
49 |
"""
|
50 |
+
if not isinstance(data, dict):
|
51 |
+
raise ValueError("Expected input data to be a dict.")
|
52 |
+
|
53 |
inputs = data.get("inputs", {})
|
54 |
+
|
55 |
+
if not isinstance(inputs, dict):
|
56 |
+
raise ValueError("Expected 'inputs' to be a dict.")
|
57 |
+
|
58 |
image_list = inputs.get("image_list", []) # list of b64 images
|
59 |
+
text_list = inputs.get("text_list", []) # list of texts (or just plain string)
|
60 |
|
61 |
+
if not isinstance(image_list, list):
|
62 |
+
raise ValueError("Expected 'image_list' to be a list.")
|
63 |
+
if not isinstance(text_list, list) and not isinstance(text_list, str):
|
64 |
+
raise ValueError("Expected 'text_list' to be a list or string.")
|
65 |
+
if not all(isinstance(image, str) for image in image_list):
|
66 |
+
raise ValueError("Expected 'image_list' to contain only strings.")
|
67 |
+
if isinstance(text_list, list) and not all(isinstance(text, str) for text in text_list):
|
68 |
+
raise ValueError("Expected 'text_list' to contain only strings.")
|
69 |
+
|
70 |
+
# if text_list is a string, convert to list
|
71 |
+
if isinstance(text_list, str):
|
72 |
+
text_list = [text_list]
|
73 |
+
|
74 |
+
if len(image_list) > max_image_list_length:
|
75 |
+
raise ValueError(f"Expected 'image_list' to have a maximum length of {max_image_list_length}.")
|
76 |
+
if len(text_list) > max_text_list_length:
|
77 |
+
raise ValueError(f"Expected 'text_list' to have a maximum length of {max_text_list_length}.")
|
78 |
+
if not all(is_valid_base64_image(image) for image in image_list):
|
79 |
+
raise ValueError("Expected 'image_list' to contain only valid base64-encoded images.")
|
80 |
+
|
81 |
image_features = self.get_image_embeddings(image_list) if len(image_list) > 0 else None
|
82 |
text_features = self.get_text_embeddings(text_list) if len(text_list) > 0 else None
|
83 |
|
|
|
99 |
for base64_image in base64_images:
|
100 |
# Decode the base64-encoded image and convert it to an RGB image
|
101 |
image_data = base64.b64decode(base64_image)
|
102 |
+
image = Image.open(BytesIO(image_data)).convert("RGB")
|
103 |
preprocessed_image = self.image_transform(image).unsqueeze(0)
|
104 |
preprocessed_images.append(preprocessed_image)
|
105 |
|
|
|
114 |
|
115 |
return image_features
|
116 |
|
117 |
+
def get_text_embeddings(self, text_list: Union[List[str], str]) -> torch.Tensor:
|
118 |
with torch.no_grad():
|
119 |
# Tokenize the input text list
|
120 |
input_tokens = self.processor(text_list, return_tensors="pt", padding=True, truncation=True)
|
|
|
124 |
text_features = self.model.get_text_features(**input_tokens)
|
125 |
return text_features
|
126 |
|
127 |
+
|
128 |
+
def is_valid_base64_image(data: str) -> bool:
|
129 |
+
try:
|
130 |
+
# Decode the base64 string
|
131 |
+
img_data = base64.b64decode(data)
|
132 |
+
|
133 |
+
# Open the image using PIL
|
134 |
+
img = Image.open(BytesIO(img_data))
|
135 |
+
|
136 |
+
# Check that the image format is supported
|
137 |
+
img.verify()
|
138 |
+
|
139 |
+
return True
|
140 |
+
except:
|
141 |
+
return False
|