reab5555 commited on
Commit
76cd7ac
·
verified ·
1 Parent(s): 4c9f62d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -3
app.py CHANGED
@@ -106,6 +106,25 @@ def show_mask(mask, ax, random_color=False):
106
 
107
 
108
  def process_image_detection(image, target_label, surprise_rating):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  device = "cuda" if torch.cuda.is_available() else "cpu"
110
 
111
  # Get original image DPI and size
@@ -113,7 +132,7 @@ def process_image_detection(image, target_label, surprise_rating):
113
  original_size = image.size
114
 
115
  # Calculate relative font size based on image dimensions
116
- base_fontsize = min(original_size) / 40 # Adjust this divisor to change overall font size
117
 
118
  owlv2_processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16")
119
  owlv2_model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16").to(device)
@@ -130,7 +149,7 @@ def process_image_detection(image, target_label, surprise_rating):
130
  target_sizes = torch.tensor([image.size[::-1]]).to(device)
131
  results = owlv2_processor.post_process_object_detection(outputs, target_sizes=target_sizes)[0]
132
 
133
- dpi = 300 # Increased DPI for better text rendering
134
  figsize = (original_size[0] / dpi, original_size[1] / dpi)
135
  fig = plt.figure(figsize=figsize, dpi=dpi)
136
 
@@ -170,7 +189,7 @@ def process_image_detection(image, target_label, surprise_rating):
170
  (box[0], box[1]),
171
  box[2] - box[0],
172
  box[3] - box[1],
173
- linewidth=max(2, min(original_size) / 500), # Scale line width with image size
174
  edgecolor='red',
175
  facecolor='none'
176
  )
 
106
 
107
 
108
  def process_image_detection(image, target_label, surprise_rating):
109
+ # Handle different image input types
110
+ if isinstance(image, tuple):
111
+ if len(image) > 0 and image[0] is not None:
112
+ image = Image.fromarray(image[0])
113
+ else:
114
+ raise ValueError("Invalid image tuple provided")
115
+ elif isinstance(image, np.ndarray):
116
+ image = Image.fromarray(image)
117
+ elif isinstance(image, str):
118
+ image = Image.open(image)
119
+
120
+ # Ensure image is in PIL Image format
121
+ if not isinstance(image, Image.Image):
122
+ raise ValueError("Input must be a PIL Image, numpy array, or valid image path")
123
+
124
+ # Ensure image is in RGB mode
125
+ if image.mode != 'RGB':
126
+ image = image.convert('RGB')
127
+
128
  device = "cuda" if torch.cuda.is_available() else "cpu"
129
 
130
  # Get original image DPI and size
 
132
  original_size = image.size
133
 
134
  # Calculate relative font size based on image dimensions
135
+ base_fontsize = min(original_size) / 40
136
 
137
  owlv2_processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16")
138
  owlv2_model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16").to(device)
 
149
  target_sizes = torch.tensor([image.size[::-1]]).to(device)
150
  results = owlv2_processor.post_process_object_detection(outputs, target_sizes=target_sizes)[0]
151
 
152
+ dpi = 300
153
  figsize = (original_size[0] / dpi, original_size[1] / dpi)
154
  fig = plt.figure(figsize=figsize, dpi=dpi)
155
 
 
189
  (box[0], box[1]),
190
  box[2] - box[0],
191
  box[3] - box[1],
192
+ linewidth=max(2, min(original_size) / 500),
193
  edgecolor='red',
194
  facecolor='none'
195
  )