Spaces:
Sleeping
Sleeping
cheng
commited on
Commit
·
f558a24
1
Parent(s):
1ab570a
add more time
Browse files- Equirec2Perspec.py +76 -0
- app.py +52 -41
Equirec2Perspec.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import cv2
|
4 |
+
import numpy as np
|
5 |
+
|
6 |
+
|
7 |
+
def xyz2lonlat(xyz):
|
8 |
+
atan2 = np.arctan2
|
9 |
+
asin = np.arcsin
|
10 |
+
|
11 |
+
norm = np.linalg.norm(xyz, axis=-1, keepdims=True)
|
12 |
+
xyz_norm = xyz / norm
|
13 |
+
x = xyz_norm[..., 0:1]
|
14 |
+
y = xyz_norm[..., 1:2]
|
15 |
+
z = xyz_norm[..., 2:]
|
16 |
+
|
17 |
+
lon = atan2(x, z)
|
18 |
+
lat = asin(y)
|
19 |
+
lst = [lon, lat]
|
20 |
+
|
21 |
+
out = np.concatenate(lst, axis=-1)
|
22 |
+
return out
|
23 |
+
|
24 |
+
|
25 |
+
def lonlat2XY(lonlat, shape):
|
26 |
+
X = (lonlat[..., 0:1] / (2 * np.pi) + 0.5) * (shape[1] - 1)
|
27 |
+
Y = (lonlat[..., 1:] / (np.pi) + 0.5) * (shape[0] - 1)
|
28 |
+
lst = [X, Y]
|
29 |
+
out = np.concatenate(lst, axis=-1)
|
30 |
+
|
31 |
+
return out
|
32 |
+
|
33 |
+
|
34 |
+
class Equirectangular:
|
35 |
+
def __init__(self, img):
|
36 |
+
# self._img = cv2.imread(img_name, cv2.IMREAD_COLOR)
|
37 |
+
self._img = img
|
38 |
+
[self._height, self._width, _] = self._img.shape
|
39 |
+
# cp = self._img.copy()
|
40 |
+
# w = self._width
|
41 |
+
# self._img[:, :w/8, :] = cp[:, 7*w/8:, :]
|
42 |
+
# self._img[:, w/8:, :] = cp[:, :7*w/8, :]
|
43 |
+
|
44 |
+
def GetPerspective(self, FOV, THETA, PHI, height, width):
|
45 |
+
#
|
46 |
+
# THETA is left/right angle, PHI is up/down angle, both in degree
|
47 |
+
#
|
48 |
+
|
49 |
+
f = 0.5 * width * 1 / np.tan(0.5 * FOV / 180.0 * np.pi)
|
50 |
+
cx = (width - 1) / 2.0
|
51 |
+
cy = (height - 1) / 2.0
|
52 |
+
K = np.array([
|
53 |
+
[f, 0, cx],
|
54 |
+
[0, f, cy],
|
55 |
+
[0, 0, 1],
|
56 |
+
], np.float32)
|
57 |
+
K_inv = np.linalg.inv(K)
|
58 |
+
|
59 |
+
x = np.arange(width)
|
60 |
+
y = np.arange(height)
|
61 |
+
x, y = np.meshgrid(x, y)
|
62 |
+
z = np.ones_like(x)
|
63 |
+
xyz = np.concatenate([x[..., None], y[..., None], z[..., None]], axis=-1)
|
64 |
+
xyz = xyz @ K_inv.T
|
65 |
+
|
66 |
+
y_axis = np.array([0.0, 1.0, 0.0], np.float32)
|
67 |
+
x_axis = np.array([1.0, 0.0, 0.0], np.float32)
|
68 |
+
R1, _ = cv2.Rodrigues(y_axis * np.radians(THETA))
|
69 |
+
R2, _ = cv2.Rodrigues(np.dot(R1, x_axis) * np.radians(PHI))
|
70 |
+
R = R2 @ R1
|
71 |
+
xyz = xyz @ R.T
|
72 |
+
lonlat = xyz2lonlat(xyz)
|
73 |
+
XY = lonlat2XY(lonlat, shape=self._img.shape).astype(np.float32)
|
74 |
+
persp = cv2.remap(self._img, XY[..., 0], XY[..., 1], cv2.INTER_CUBIC, borderMode=cv2.BORDER_WRAP)
|
75 |
+
|
76 |
+
return persp
|
app.py
CHANGED
@@ -8,16 +8,16 @@ from PIL import Image
|
|
8 |
import numpy as np
|
9 |
from pathlib import Path
|
10 |
import gradio as gr
|
11 |
-
|
12 |
import warnings
|
13 |
-
|
14 |
import torch
|
|
|
|
|
|
|
15 |
|
16 |
os.system("python setup.py build develop --user")
|
17 |
os.system("pip install packaging==21.3")
|
18 |
warnings.filterwarnings("ignore")
|
19 |
|
20 |
-
|
21 |
from groundingdino.models import build_model
|
22 |
from groundingdino.util.slconfig import SLConfig
|
23 |
from groundingdino.util.utils import clean_state_dict
|
@@ -26,7 +26,9 @@ import groundingdino.datasets.transforms as T
|
|
26 |
|
27 |
from huggingface_hub import hf_hub_download
|
28 |
|
29 |
-
|
|
|
|
|
30 |
|
31 |
# Use this command for evaluate the GLIP-T model
|
32 |
config_file = "groundingdino/config/GroundingDINO_SwinT_OGC.py"
|
@@ -34,8 +36,32 @@ ckpt_repo_id = "ShilongLiu/GroundingDINO"
|
|
34 |
ckpt_filenmae = "groundingdino_swint_ogc.pth"
|
35 |
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
def load_model_hf(model_config_path, repo_id, filename, device='cpu'):
|
38 |
-
args = SLConfig.fromfile(model_config_path)
|
39 |
model = build_model(args)
|
40 |
args.device = device
|
41 |
|
@@ -44,7 +70,8 @@ def load_model_hf(model_config_path, repo_id, filename, device='cpu'):
|
|
44 |
log = model.load_state_dict(clean_state_dict(checkpoint['model']), strict=False)
|
45 |
print("Model loaded from {} \n => {}".format(cache_file, log))
|
46 |
_ = model.eval()
|
47 |
-
return model
|
|
|
48 |
|
49 |
def image_transform_grounding(init_image):
|
50 |
transform = T.Compose([
|
@@ -52,18 +79,21 @@ def image_transform_grounding(init_image):
|
|
52 |
T.ToTensor(),
|
53 |
T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
|
54 |
])
|
55 |
-
image, _ = transform(init_image, None)
|
56 |
return init_image, image
|
57 |
|
|
|
58 |
def image_transform_grounding_for_vis(init_image):
|
59 |
transform = T.Compose([
|
60 |
T.RandomResize([800], max_size=1333),
|
61 |
])
|
62 |
-
image, _ = transform(init_image, None)
|
63 |
return image
|
64 |
|
|
|
65 |
model = load_model_hf(config_file, ckpt_repo_id, ckpt_filenmae)
|
66 |
|
|
|
67 |
def run_grounding(input_image, grounding_caption, box_threshold, text_threshold):
|
68 |
init_image = input_image.convert("RGB")
|
69 |
original_size = init_image.size
|
@@ -72,49 +102,30 @@ def run_grounding(input_image, grounding_caption, box_threshold, text_threshold)
|
|
72 |
image_pil: Image = image_transform_grounding_for_vis(init_image)
|
73 |
|
74 |
# run grounidng
|
75 |
-
boxes, logits, phrases = predict(model, image_tensor, grounding_caption, box_threshold, text_threshold,
|
|
|
76 |
annotated_frame = annotate(image_source=np.asarray(image_pil), boxes=boxes, logits=logits, phrases=phrases)
|
77 |
image_with_box = Image.fromarray(cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB))
|
78 |
|
79 |
-
|
80 |
return image_with_box
|
81 |
|
82 |
-
if __name__ == "__main__":
|
83 |
-
|
84 |
-
parser = argparse.ArgumentParser("Grounding DINO demo", add_help=True)
|
85 |
-
parser.add_argument("--debug", action="store_true", help="using debug mode")
|
86 |
-
parser.add_argument("--share", action="store_true", help="share the app")
|
87 |
-
args = parser.parse_args()
|
88 |
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
gr.Markdown("
|
93 |
-
gr.Markdown("Note the model runs on CPU, so it may take a while to run the model.")
|
94 |
|
95 |
with gr.Row():
|
96 |
with gr.Column():
|
97 |
-
input_image = gr.Image(source='upload', type="
|
98 |
-
|
99 |
-
run_button = gr.Button(label="Run")
|
100 |
-
with gr.Accordion("Advanced options", open=False):
|
101 |
-
box_threshold = gr.Slider(
|
102 |
-
label="Box Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.001
|
103 |
-
)
|
104 |
-
text_threshold = gr.Slider(
|
105 |
-
label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.001
|
106 |
-
)
|
107 |
|
108 |
with gr.Column():
|
109 |
-
gallery = gr.
|
110 |
-
|
111 |
-
# label="grounding results"
|
112 |
-
).style(full_width=True, full_height=True)
|
113 |
-
# gallery = gr.Gallery(label="Generated images", show_label=False).style(
|
114 |
-
# grid=[1], height="auto", container=True, full_width=True, full_height=True)
|
115 |
-
|
116 |
-
run_button.click(fn=run_grounding, inputs=[
|
117 |
-
input_image, grounding_caption, box_threshold, text_threshold], outputs=[gallery])
|
118 |
|
119 |
-
|
|
|
120 |
|
|
|
|
8 |
import numpy as np
|
9 |
from pathlib import Path
|
10 |
import gradio as gr
|
|
|
11 |
import warnings
|
|
|
12 |
import torch
|
13 |
+
import Equirec2Perspec as E2P
|
14 |
+
import cv2
|
15 |
+
import numpy as np
|
16 |
|
17 |
os.system("python setup.py build develop --user")
|
18 |
os.system("pip install packaging==21.3")
|
19 |
warnings.filterwarnings("ignore")
|
20 |
|
|
|
21 |
from groundingdino.models import build_model
|
22 |
from groundingdino.util.slconfig import SLConfig
|
23 |
from groundingdino.util.utils import clean_state_dict
|
|
|
26 |
|
27 |
from huggingface_hub import hf_hub_download
|
28 |
|
29 |
+
picture_height = 360
|
30 |
+
picture_width = 540
|
31 |
+
picture_fov = 45
|
32 |
|
33 |
# Use this command for evaluate the GLIP-T model
|
34 |
config_file = "groundingdino/config/GroundingDINO_SwinT_OGC.py"
|
|
|
36 |
ckpt_filenmae = "groundingdino_swint_ogc.pth"
|
37 |
|
38 |
|
39 |
+
def detection(image):
|
40 |
+
sub_images = processPanorama(image)
|
41 |
+
processed_images = [np.array(sub_image) for sub_image in sub_images]
|
42 |
+
|
43 |
+
return processed_images
|
44 |
+
|
45 |
+
|
46 |
+
def processPanorama(image):
|
47 |
+
equ = E2P.Equirectangular(image)
|
48 |
+
FOV = picture_fov
|
49 |
+
y_axis = 0
|
50 |
+
|
51 |
+
sub_images = []
|
52 |
+
while y_axis <= 0:
|
53 |
+
z_axis = -150
|
54 |
+
while z_axis <= 90:
|
55 |
+
img = equ.GetPerspective(FOV, z_axis, y_axis, picture_height, picture_width)
|
56 |
+
# cv2.imwrite(f'{directory_name}_{z_axis}z.jpg', img)
|
57 |
+
sub_images.append(img)
|
58 |
+
z_axis += FOV
|
59 |
+
y_axis += FOV
|
60 |
+
return sub_images
|
61 |
+
|
62 |
+
|
63 |
def load_model_hf(model_config_path, repo_id, filename, device='cpu'):
|
64 |
+
args = SLConfig.fromfile(model_config_path)
|
65 |
model = build_model(args)
|
66 |
args.device = device
|
67 |
|
|
|
70 |
log = model.load_state_dict(clean_state_dict(checkpoint['model']), strict=False)
|
71 |
print("Model loaded from {} \n => {}".format(cache_file, log))
|
72 |
_ = model.eval()
|
73 |
+
return model
|
74 |
+
|
75 |
|
76 |
def image_transform_grounding(init_image):
|
77 |
transform = T.Compose([
|
|
|
79 |
T.ToTensor(),
|
80 |
T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
|
81 |
])
|
82 |
+
image, _ = transform(init_image, None) # 3, h, w
|
83 |
return init_image, image
|
84 |
|
85 |
+
|
86 |
def image_transform_grounding_for_vis(init_image):
|
87 |
transform = T.Compose([
|
88 |
T.RandomResize([800], max_size=1333),
|
89 |
])
|
90 |
+
image, _ = transform(init_image, None) # 3, h, w
|
91 |
return image
|
92 |
|
93 |
+
|
94 |
model = load_model_hf(config_file, ckpt_repo_id, ckpt_filenmae)
|
95 |
|
96 |
+
|
97 |
def run_grounding(input_image, grounding_caption, box_threshold, text_threshold):
|
98 |
init_image = input_image.convert("RGB")
|
99 |
original_size = init_image.size
|
|
|
102 |
image_pil: Image = image_transform_grounding_for_vis(init_image)
|
103 |
|
104 |
# run grounidng
|
105 |
+
boxes, logits, phrases = predict(model, image_tensor, grounding_caption, box_threshold, text_threshold,
|
106 |
+
device='cpu')
|
107 |
annotated_frame = annotate(image_source=np.asarray(image_pil), boxes=boxes, logits=logits, phrases=phrases)
|
108 |
image_with_box = Image.fromarray(cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB))
|
109 |
|
|
|
110 |
return image_with_box
|
111 |
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
|
113 |
+
if __name__ == "__main__":
|
114 |
+
detect_app = gr.Blocks()
|
115 |
+
with detect_app:
|
116 |
+
gr.Markdown("# Panorama Traffic Sign Detection Demo")
|
117 |
+
gr.Markdown("Note the model runs on CPU for demo, so it may take a while to run the model.")
|
118 |
|
119 |
with gr.Row():
|
120 |
with gr.Column():
|
121 |
+
input_image = gr.Image(source='upload', type="numpy", label="Please upload a panorama picture.")
|
122 |
+
run_button = gr.Button(label="Process & Detect")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
|
124 |
with gr.Column():
|
125 |
+
gallery = gr.Gallery(label="Detection Results").style(
|
126 |
+
columns=[3], preview=False, object_fit="none")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
|
128 |
+
run_button.click(fn=detection, inputs=[
|
129 |
+
input_image], outputs=[gallery])
|
130 |
|
131 |
+
detect_app.launch(share=False, show_api=False, show_error=True)
|