Spaces:
Running
Running
File size: 4,436 Bytes
1e96bca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
import math
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import clip
import torch
def crop_image(img, position):
def distance(x1,y1,x2,y2):
return math.sqrt(pow(x1 - x2, 2) + pow(y1 - y2, 2))
position = position.tolist()
for i in range(4):
for j in range(i+1, 4):
if(position[i][0] > position[j][0]):
tmp = position[j]
position[j] = position[i]
position[i] = tmp
if position[0][1] > position[1][1]:
tmp = position[0]
position[0] = position[1]
position[1] = tmp
if position[2][1] > position[3][1]:
tmp = position[2]
position[2] = position[3]
position[3] = tmp
x1, y1 = position[0][0], position[0][1]
x2, y2 = position[2][0], position[2][1]
x3, y3 = position[3][0], position[3][1]
x4, y4 = position[1][0], position[1][1]
corners = np.zeros((4,2), np.float32)
corners[0] = [x1, y1]
corners[1] = [x2, y2]
corners[2] = [x4, y4]
corners[3] = [x3, y3]
img_width = distance((x1+x4)/2, (y1+y4)/2, (x2+x3)/2, (y2+y3)/2)
img_height = distance((x1+x2)/2, (y1+y2)/2, (x4+x3)/2, (y4+y3)/2)
corners_trans = np.zeros((4,2), np.float32)
corners_trans[0] = [0, 0]
corners_trans[1] = [img_width - 1, 0]
corners_trans[2] = [0, img_height - 1]
corners_trans[3] = [img_width - 1, img_height - 1]
transform = cv2.getPerspectiveTransform(corners, corners_trans)
dst = cv2.warpPerspective(img, transform, (int(img_width), int(img_height)))
return dst
def calculate_size(box):
return (box[2]-box[0]) * (box[3]-box[1])
def calculate_iou(box1, box2):
xA = max(box1[0], box2[0])
yA = max(box1[1], box2[1])
xB = min(box1[2], box2[2])
yB = min(box1[3], box2[3])
interArea = max(0, xB - xA) * max(0, yB - yA)
box1Area = (box1[2] - box1[0]) * (box1[3] - box1[1])
box2Area = (box2[2] - box2[0]) * (box2[3] - box2[1])
unionArea = box1Area + box2Area - interArea
iou = interArea / unionArea
return iou
def crop(image, box, i, text_data=None):
image = Image.open(image)
if text_data:
draw = ImageDraw.Draw(image)
draw.rectangle(((text_data[0], text_data[1]), (text_data[2], text_data[3])), outline="red", width=5)
# font_size = int((text_data[3] - text_data[1])*0.75)
# font = ImageFont.truetype("arial.ttf", font_size)
# draw.text((text_data[0]+5, text_data[1]+5), str(i), font=font, fill="red")
cropped_image = image.crop(box)
cropped_image.save(f"./temp/{i}.jpg")
def in_box(box, target):
if (box[0] > target[0]) and (box[1] > target[1]) and (box[2] < target[2]) and (box[3] < target[3]):
return True
else:
return False
def crop_for_clip(image, box, i, position):
image = Image.open(image)
w, h = image.size
if position == "left":
bound = [0, 0, w/2, h]
elif position == "right":
bound = [w/2, 0, w, h]
elif position == "top":
bound = [0, 0, w, h/2]
elif position == "bottom":
bound = [0, h/2, w, h]
elif position == "top left":
bound = [0, 0, w/2, h/2]
elif position == "top right":
bound = [w/2, 0, w, h/2]
elif position == "bottom left":
bound = [0, h/2, w/2, h]
elif position == "bottom right":
bound = [w/2, h/2, w, h]
else:
bound = [0, 0, w, h]
if in_box(box, bound):
cropped_image = image.crop(box)
cropped_image.save(f"./temp/{i}.jpg")
return True
else:
return False
def clip_for_icon(clip_model, clip_preprocess, images, prompt):
image_features = []
for image_file in images:
image = clip_preprocess(Image.open(image_file)).unsqueeze(0).to(next(clip_model.parameters()).device)
image_feature = clip_model.encode_image(image)
image_features.append(image_feature)
image_features = torch.cat(image_features)
text = clip.tokenize([prompt]).to(next(clip_model.parameters()).device)
text_features = clip_model.encode_text(text)
image_features /= image_features.norm(dim=-1, keepdim=True)
text_features /= text_features.norm(dim=-1, keepdim=True)
similarity = (100.0 * image_features @ text_features.T).softmax(dim=0).squeeze(0)
_, max_pos = torch.max(similarity, dim=0)
pos = max_pos.item()
return pos
|