RoadProjectDemo

Sleeping

App Files Files

cheng commited on Jun 6, 2023

Commit

f558a24

1 Parent(s): 1ab570a

add more time

Browse files

Files changed (2) hide show

Equirec2Perspec.py +76 -0
app.py +52 -41

Equirec2Perspec.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import os
+import sys
+import cv2
+import numpy as np
+def xyz2lonlat(xyz):
+    atan2 = np.arctan2
+    asin = np.arcsin
+    norm = np.linalg.norm(xyz, axis=-1, keepdims=True)
+    xyz_norm = xyz / norm
+    x = xyz_norm[..., 0:1]
+    y = xyz_norm[..., 1:2]
+    z = xyz_norm[..., 2:]
+    lon = atan2(x, z)
+    lat = asin(y)
+    lst = [lon, lat]
+    out = np.concatenate(lst, axis=-1)
+    return out
+def lonlat2XY(lonlat, shape):
+    X = (lonlat[..., 0:1] / (2 * np.pi) + 0.5) * (shape[1] - 1)
+    Y = (lonlat[..., 1:] / (np.pi) + 0.5) * (shape[0] - 1)
+    lst = [X, Y]
+    out = np.concatenate(lst, axis=-1)
+    return out
+class Equirectangular:
+    def __init__(self, img):
+        # self._img = cv2.imread(img_name, cv2.IMREAD_COLOR)
+        self._img = img
+        [self._height, self._width, _] = self._img.shape
+        # cp = self._img.copy()
+        # w = self._width
+        # self._img[:, :w/8, :] = cp[:, 7*w/8:, :]
+        # self._img[:, w/8:, :] = cp[:, :7*w/8, :]
+    def GetPerspective(self, FOV, THETA, PHI, height, width):
+        #
+        # THETA is left/right angle, PHI is up/down angle, both in degree
+        #
+        f = 0.5 * width * 1 / np.tan(0.5 * FOV / 180.0 * np.pi)
+        cx = (width - 1) / 2.0
+        cy = (height - 1) / 2.0
+        K = np.array([
+            [f, 0, cx],
+            [0, f, cy],
+            [0, 0, 1],
+        ], np.float32)
+        K_inv = np.linalg.inv(K)
+        x = np.arange(width)
+        y = np.arange(height)
+        x, y = np.meshgrid(x, y)
+        z = np.ones_like(x)
+        xyz = np.concatenate([x[..., None], y[..., None], z[..., None]], axis=-1)
+        xyz = xyz @ K_inv.T
+        y_axis = np.array([0.0, 1.0, 0.0], np.float32)
+        x_axis = np.array([1.0, 0.0, 0.0], np.float32)
+        R1, _ = cv2.Rodrigues(y_axis * np.radians(THETA))
+        R2, _ = cv2.Rodrigues(np.dot(R1, x_axis) * np.radians(PHI))
+        R = R2 @ R1
+        xyz = xyz @ R.T
+        lonlat = xyz2lonlat(xyz)
+        XY = lonlat2XY(lonlat, shape=self._img.shape).astype(np.float32)
+        persp = cv2.remap(self._img, XY[..., 0], XY[..., 1], cv2.INTER_CUBIC, borderMode=cv2.BORDER_WRAP)
+        return persp

app.py CHANGED Viewed

@@ -8,16 +8,16 @@ from PIL import Image
 import numpy as np
 from pathlib import Path
 import gradio as gr
 import warnings
 import torch
 os.system("python setup.py build develop --user")
 os.system("pip install packaging==21.3")
 warnings.filterwarnings("ignore")
 from groundingdino.models import build_model
 from groundingdino.util.slconfig import SLConfig
 from groundingdino.util.utils import clean_state_dict
@@ -26,7 +26,9 @@ import groundingdino.datasets.transforms as T
 from huggingface_hub import hf_hub_download
 # Use this command for evaluate the GLIP-T model
 config_file = "groundingdino/config/GroundingDINO_SwinT_OGC.py"
@@ -34,8 +36,32 @@ ckpt_repo_id = "ShilongLiu/GroundingDINO"
 ckpt_filenmae = "groundingdino_swint_ogc.pth"
 def load_model_hf(model_config_path, repo_id, filename, device='cpu'):
-    args = SLConfig.fromfile(model_config_path)
     model = build_model(args)
     args.device = device
@@ -44,7 +70,8 @@ def load_model_hf(model_config_path, repo_id, filename, device='cpu'):
     log = model.load_state_dict(clean_state_dict(checkpoint['model']), strict=False)
     print("Model loaded from {} \n => {}".format(cache_file, log))
     _ = model.eval()
-    return model
 def image_transform_grounding(init_image):
     transform = T.Compose([
@@ -52,18 +79,21 @@ def image_transform_grounding(init_image):
         T.ToTensor(),
         T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
     ])
-    image, _ = transform(init_image, None) # 3, h, w
     return init_image, image
 def image_transform_grounding_for_vis(init_image):
     transform = T.Compose([
         T.RandomResize([800], max_size=1333),
     ])
-    image, _ = transform(init_image, None) # 3, h, w
     return image
 model = load_model_hf(config_file, ckpt_repo_id, ckpt_filenmae)
 def run_grounding(input_image, grounding_caption, box_threshold, text_threshold):
     init_image = input_image.convert("RGB")
     original_size = init_image.size
@@ -72,49 +102,30 @@ def run_grounding(input_image, grounding_caption, box_threshold, text_threshold)
     image_pil: Image = image_transform_grounding_for_vis(init_image)
     # run grounidng
-    boxes, logits, phrases = predict(model, image_tensor, grounding_caption, box_threshold, text_threshold, device='cpu')
     annotated_frame = annotate(image_source=np.asarray(image_pil), boxes=boxes, logits=logits, phrases=phrases)
     image_with_box = Image.fromarray(cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB))
     return image_with_box
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser("Grounding DINO demo", add_help=True)
-    parser.add_argument("--debug", action="store_true", help="using debug mode")
-    parser.add_argument("--share", action="store_true", help="share the app")
-    args = parser.parse_args()
-    block = gr.Blocks().queue()
-    with block:
-        gr.Markdown("# [Grounding DINO](https://github.com/IDEA-Research/GroundingDINO)")
-        gr.Markdown("### Open-World Detection with Grounding DINO")
-        gr.Markdown("Note the model runs on CPU, so it may take a while to run the model.")
         with gr.Row():
             with gr.Column():
-                input_image = gr.Image(source='upload', type="pil")
-                grounding_caption = gr.Textbox(label="Detection Prompt")
-                run_button = gr.Button(label="Run")
-                with gr.Accordion("Advanced options", open=False):
-                    box_threshold = gr.Slider(
-                        label="Box Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.001
-                    )
-                    text_threshold = gr.Slider(
-                        label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.001
-                    )
             with gr.Column():
-                gallery = gr.outputs.Image(
-                    type="pil",
-                    # label="grounding results"
-                ).style(full_width=True, full_height=True)
-                # gallery = gr.Gallery(label="Generated images", show_label=False).style(
-                #         grid=[1], height="auto", container=True, full_width=True, full_height=True)
-        run_button.click(fn=run_grounding, inputs=[
-                        input_image, grounding_caption, box_threshold, text_threshold], outputs=[gallery])
-    block.launch(share=False, show_api=False, show_error=True)

 import numpy as np
 from pathlib import Path
 import gradio as gr
 import warnings
 import torch
+import Equirec2Perspec as E2P
+import cv2
+import numpy as np
 os.system("python setup.py build develop --user")
 os.system("pip install packaging==21.3")
 warnings.filterwarnings("ignore")
 from groundingdino.models import build_model
 from groundingdino.util.slconfig import SLConfig
 from groundingdino.util.utils import clean_state_dict
 from huggingface_hub import hf_hub_download
+picture_height = 360
+picture_width = 540
+picture_fov = 45
 # Use this command for evaluate the GLIP-T model
 config_file = "groundingdino/config/GroundingDINO_SwinT_OGC.py"
 ckpt_filenmae = "groundingdino_swint_ogc.pth"
+def detection(image):
+    sub_images = processPanorama(image)
+    processed_images = [np.array(sub_image) for sub_image in sub_images]
+    return processed_images
+def processPanorama(image):
+    equ = E2P.Equirectangular(image)
+    FOV = picture_fov
+    y_axis = 0
+    sub_images = []
+    while y_axis <= 0:
+        z_axis = -150
+        while z_axis <= 90:
+            img = equ.GetPerspective(FOV, z_axis, y_axis, picture_height, picture_width)
+            # cv2.imwrite(f'{directory_name}_{z_axis}z.jpg', img)
+            sub_images.append(img)
+            z_axis += FOV
+        y_axis += FOV
+    return sub_images
 def load_model_hf(model_config_path, repo_id, filename, device='cpu'):
+    args = SLConfig.fromfile(model_config_path)
     model = build_model(args)
     args.device = device
     log = model.load_state_dict(clean_state_dict(checkpoint['model']), strict=False)
     print("Model loaded from {} \n => {}".format(cache_file, log))
     _ = model.eval()
+    return model
 def image_transform_grounding(init_image):
     transform = T.Compose([
         T.ToTensor(),
         T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
     ])
+    image, _ = transform(init_image, None)  # 3, h, w
     return init_image, image
 def image_transform_grounding_for_vis(init_image):
     transform = T.Compose([
         T.RandomResize([800], max_size=1333),
     ])
+    image, _ = transform(init_image, None)  # 3, h, w
     return image
 model = load_model_hf(config_file, ckpt_repo_id, ckpt_filenmae)
 def run_grounding(input_image, grounding_caption, box_threshold, text_threshold):
     init_image = input_image.convert("RGB")
     original_size = init_image.size
     image_pil: Image = image_transform_grounding_for_vis(init_image)
     # run grounidng
+    boxes, logits, phrases = predict(model, image_tensor, grounding_caption, box_threshold, text_threshold,
+                                     device='cpu')
     annotated_frame = annotate(image_source=np.asarray(image_pil), boxes=boxes, logits=logits, phrases=phrases)
     image_with_box = Image.fromarray(cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB))
     return image_with_box
+if __name__ == "__main__":
+    detect_app = gr.Blocks()
+    with detect_app:
+        gr.Markdown("# Panorama Traffic Sign Detection Demo")
+        gr.Markdown("Note the model runs on CPU for demo, so it may take a while to run the model.")
         with gr.Row():
             with gr.Column():
+                input_image = gr.Image(source='upload', type="numpy", label="Please upload a panorama picture.")
+                run_button = gr.Button(label="Process & Detect")
             with gr.Column():
+                gallery = gr.Gallery(label="Detection Results").style(
+                    columns=[3], preview=False, object_fit="none")
+        run_button.click(fn=detection, inputs=[
+            input_image], outputs=[gallery])
+    detect_app.launch(share=False, show_api=False, show_error=True)