Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -106,6 +106,25 @@ def show_mask(mask, ax, random_color=False):
|
|
106 |
|
107 |
|
108 |
def process_image_detection(image, target_label, surprise_rating):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
110 |
|
111 |
# Get original image DPI and size
|
@@ -113,7 +132,7 @@ def process_image_detection(image, target_label, surprise_rating):
|
|
113 |
original_size = image.size
|
114 |
|
115 |
# Calculate relative font size based on image dimensions
|
116 |
-
base_fontsize = min(original_size) / 40
|
117 |
|
118 |
owlv2_processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16")
|
119 |
owlv2_model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16").to(device)
|
@@ -130,7 +149,7 @@ def process_image_detection(image, target_label, surprise_rating):
|
|
130 |
target_sizes = torch.tensor([image.size[::-1]]).to(device)
|
131 |
results = owlv2_processor.post_process_object_detection(outputs, target_sizes=target_sizes)[0]
|
132 |
|
133 |
-
dpi = 300
|
134 |
figsize = (original_size[0] / dpi, original_size[1] / dpi)
|
135 |
fig = plt.figure(figsize=figsize, dpi=dpi)
|
136 |
|
@@ -170,7 +189,7 @@ def process_image_detection(image, target_label, surprise_rating):
|
|
170 |
(box[0], box[1]),
|
171 |
box[2] - box[0],
|
172 |
box[3] - box[1],
|
173 |
-
linewidth=max(2, min(original_size) / 500),
|
174 |
edgecolor='red',
|
175 |
facecolor='none'
|
176 |
)
|
|
|
106 |
|
107 |
|
108 |
def process_image_detection(image, target_label, surprise_rating):
|
109 |
+
# Handle different image input types
|
110 |
+
if isinstance(image, tuple):
|
111 |
+
if len(image) > 0 and image[0] is not None:
|
112 |
+
image = Image.fromarray(image[0])
|
113 |
+
else:
|
114 |
+
raise ValueError("Invalid image tuple provided")
|
115 |
+
elif isinstance(image, np.ndarray):
|
116 |
+
image = Image.fromarray(image)
|
117 |
+
elif isinstance(image, str):
|
118 |
+
image = Image.open(image)
|
119 |
+
|
120 |
+
# Ensure image is in PIL Image format
|
121 |
+
if not isinstance(image, Image.Image):
|
122 |
+
raise ValueError("Input must be a PIL Image, numpy array, or valid image path")
|
123 |
+
|
124 |
+
# Ensure image is in RGB mode
|
125 |
+
if image.mode != 'RGB':
|
126 |
+
image = image.convert('RGB')
|
127 |
+
|
128 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
129 |
|
130 |
# Get original image DPI and size
|
|
|
132 |
original_size = image.size
|
133 |
|
134 |
# Calculate relative font size based on image dimensions
|
135 |
+
base_fontsize = min(original_size) / 40
|
136 |
|
137 |
owlv2_processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16")
|
138 |
owlv2_model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16").to(device)
|
|
|
149 |
target_sizes = torch.tensor([image.size[::-1]]).to(device)
|
150 |
results = owlv2_processor.post_process_object_detection(outputs, target_sizes=target_sizes)[0]
|
151 |
|
152 |
+
dpi = 300
|
153 |
figsize = (original_size[0] / dpi, original_size[1] / dpi)
|
154 |
fig = plt.figure(figsize=figsize, dpi=dpi)
|
155 |
|
|
|
189 |
(box[0], box[1]),
|
190 |
box[2] - box[0],
|
191 |
box[3] - box[1],
|
192 |
+
linewidth=max(2, min(original_size) / 500),
|
193 |
edgecolor='red',
|
194 |
facecolor='none'
|
195 |
)
|