Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -106,6 +106,25 @@ def show_mask(mask, ax, random_color=False):
|
|
| 106 |
|
| 107 |
|
| 108 |
def process_image_detection(image, target_label, surprise_rating):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 110 |
|
| 111 |
# Get original image DPI and size
|
|
@@ -113,7 +132,7 @@ def process_image_detection(image, target_label, surprise_rating):
|
|
| 113 |
original_size = image.size
|
| 114 |
|
| 115 |
# Calculate relative font size based on image dimensions
|
| 116 |
-
base_fontsize = min(original_size) / 40
|
| 117 |
|
| 118 |
owlv2_processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16")
|
| 119 |
owlv2_model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16").to(device)
|
|
@@ -130,7 +149,7 @@ def process_image_detection(image, target_label, surprise_rating):
|
|
| 130 |
target_sizes = torch.tensor([image.size[::-1]]).to(device)
|
| 131 |
results = owlv2_processor.post_process_object_detection(outputs, target_sizes=target_sizes)[0]
|
| 132 |
|
| 133 |
-
dpi = 300
|
| 134 |
figsize = (original_size[0] / dpi, original_size[1] / dpi)
|
| 135 |
fig = plt.figure(figsize=figsize, dpi=dpi)
|
| 136 |
|
|
@@ -170,7 +189,7 @@ def process_image_detection(image, target_label, surprise_rating):
|
|
| 170 |
(box[0], box[1]),
|
| 171 |
box[2] - box[0],
|
| 172 |
box[3] - box[1],
|
| 173 |
-
linewidth=max(2, min(original_size) / 500),
|
| 174 |
edgecolor='red',
|
| 175 |
facecolor='none'
|
| 176 |
)
|
|
|
|
| 106 |
|
| 107 |
|
| 108 |
def process_image_detection(image, target_label, surprise_rating):
|
| 109 |
+
# Handle different image input types
|
| 110 |
+
if isinstance(image, tuple):
|
| 111 |
+
if len(image) > 0 and image[0] is not None:
|
| 112 |
+
image = Image.fromarray(image[0])
|
| 113 |
+
else:
|
| 114 |
+
raise ValueError("Invalid image tuple provided")
|
| 115 |
+
elif isinstance(image, np.ndarray):
|
| 116 |
+
image = Image.fromarray(image)
|
| 117 |
+
elif isinstance(image, str):
|
| 118 |
+
image = Image.open(image)
|
| 119 |
+
|
| 120 |
+
# Ensure image is in PIL Image format
|
| 121 |
+
if not isinstance(image, Image.Image):
|
| 122 |
+
raise ValueError("Input must be a PIL Image, numpy array, or valid image path")
|
| 123 |
+
|
| 124 |
+
# Ensure image is in RGB mode
|
| 125 |
+
if image.mode != 'RGB':
|
| 126 |
+
image = image.convert('RGB')
|
| 127 |
+
|
| 128 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 129 |
|
| 130 |
# Get original image DPI and size
|
|
|
|
| 132 |
original_size = image.size
|
| 133 |
|
| 134 |
# Calculate relative font size based on image dimensions
|
| 135 |
+
base_fontsize = min(original_size) / 40
|
| 136 |
|
| 137 |
owlv2_processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16")
|
| 138 |
owlv2_model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16").to(device)
|
|
|
|
| 149 |
target_sizes = torch.tensor([image.size[::-1]]).to(device)
|
| 150 |
results = owlv2_processor.post_process_object_detection(outputs, target_sizes=target_sizes)[0]
|
| 151 |
|
| 152 |
+
dpi = 300
|
| 153 |
figsize = (original_size[0] / dpi, original_size[1] / dpi)
|
| 154 |
fig = plt.figure(figsize=figsize, dpi=dpi)
|
| 155 |
|
|
|
|
| 189 |
(box[0], box[1]),
|
| 190 |
box[2] - box[0],
|
| 191 |
box[3] - box[1],
|
| 192 |
+
linewidth=max(2, min(original_size) / 500),
|
| 193 |
edgecolor='red',
|
| 194 |
facecolor='none'
|
| 195 |
)
|