AlshimaaGamalAlsaied commited on
Commit
aad5579
·
1 Parent(s): be1ddbb
Files changed (2) hide show
  1. app.py +167 -61
  2. inferer.py +238 -0
app.py CHANGED
@@ -1,73 +1,179 @@
1
- import gradio as gr
2
- #import torch
3
- import yolov7
4
 
5
 
6
- #
7
- # from huggingface_hub import hf_hub_download
8
- from huggingface_hub import HfApi
9
 
10
 
11
- # Images
12
- #torch.hub.download_url_to_file('https://github.com/ultralytics/yolov5/raw/master/data/images/zidane.jpg', 'zidane.jpg')
13
- #torch.hub.download_url_to_file('https://raw.githubusercontent.com/obss/sahi/main/tests/data/small-vehicles1.jpeg', 'small-vehicles1.jpeg')
14
 
15
- def yolov7_inference(
16
- image: gr.inputs.Image = None,
17
- model_path: gr.inputs.Dropdown = None,
18
- image_size: gr.inputs.Slider = 640,
19
- conf_threshold: gr.inputs.Slider = 0.25,
20
- iou_threshold: gr.inputs.Slider = 0.45,
21
- ):
22
- """
23
- YOLOv7 inference function
24
- Args:
25
- image: Input image
26
- model_path: Path to the model
27
- image_size: Image size
28
- conf_threshold: Confidence threshold
29
- iou_threshold: IOU threshold
30
- Returns:
31
- Rendered image
32
- """
33
-
34
- model = yolov7.load(model_path, device="cpu", hf_model=True, trace=False)
35
- model.conf = conf_threshold
36
- model.iou = iou_threshold
37
- results = model([image], size=image_size)
38
- return results.render()[0]
39
 
40
 
41
 
42
- inputs = [
43
- gr.inputs.Image(type="pil", label="Input Image"),
44
- gr.inputs.Dropdown(
45
- choices=[
46
- "alshimaa/model_baseline",
47
- "alshimaa/model_yolo7",
48
- #"kadirnar/yolov7-v0.1",
49
- ],
50
- default="alshimaa/model_baseline",
51
- label="Model",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  )
53
- #gr.inputs.Slider(minimum=320, maximum=1280, default=640, step=32, label="Image Size")
54
- #gr.inputs.Slider(minimum=0.0, maximum=1.0, default=0.25, step=0.05, label="Confidence Threshold"),
55
- #gr.inputs.Slider(minimum=0.0, maximum=1.0, default=0.45, step=0.05, label="IOU Threshold")
56
- ]
57
-
58
- outputs = gr.outputs.Image(type="filepath", label="Output Image")
59
- title = "Smart Environmental Eye (SEE)"
60
-
61
- examples = [['image1.jpg', 'alshimaa/model_yolo7', 640, 0.25, 0.45], ['image2.jpg', 'alshimaa/model_yolo7', 640, 0.25, 0.45], ['image3.jpg', 'alshimaa/model_yolo7', 640, 0.25, 0.45]]
62
- demo_app = gr.Interface(
63
- fn=yolov7_inference,
64
- inputs=inputs,
65
- outputs=outputs,
66
- title=title,
67
- examples=examples,
68
- cache_examples=True,
69
- theme='huggingface',
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  )
71
 
72
- demo_app.launch(debug=True, enable_queue=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
 
1
+ # import gradio as gr
2
+ # #import torch
3
+ # import yolov7
4
 
5
 
6
+ # #
7
+ # # from huggingface_hub import hf_hub_download
8
+ # from huggingface_hub import HfApi
9
 
10
 
11
+ # # Images
12
+ # #torch.hub.download_url_to_file('https://github.com/ultralytics/yolov5/raw/master/data/images/zidane.jpg', 'zidane.jpg')
13
+ # #torch.hub.download_url_to_file('https://raw.githubusercontent.com/obss/sahi/main/tests/data/small-vehicles1.jpeg', 'small-vehicles1.jpeg')
14
 
15
+ # def yolov7_inference(
16
+ # image: gr.inputs.Image = None,
17
+ # model_path: gr.inputs.Dropdown = None,
18
+ # image_size: gr.inputs.Slider = 640,
19
+ # conf_threshold: gr.inputs.Slider = 0.25,
20
+ # iou_threshold: gr.inputs.Slider = 0.45,
21
+ # ):
22
+ # """
23
+ # YOLOv7 inference function
24
+ # Args:
25
+ # image: Input image
26
+ # model_path: Path to the model
27
+ # image_size: Image size
28
+ # conf_threshold: Confidence threshold
29
+ # iou_threshold: IOU threshold
30
+ # Returns:
31
+ # Rendered image
32
+ # """
33
+
34
+ # model = yolov7.load(model_path, device="cpu", hf_model=True, trace=False)
35
+ # model.conf = conf_threshold
36
+ # model.iou = iou_threshold
37
+ # results = model([image], size=image_size)
38
+ # return results.render()[0]
39
 
40
 
41
 
42
+ # inputs = [
43
+ # gr.inputs.Image(type="pil", label="Input Image"),
44
+ # gr.inputs.Dropdown(
45
+ # choices=[
46
+ # "alshimaa/model_baseline",
47
+ # "alshimaa/model_yolo7",
48
+ # #"kadirnar/yolov7-v0.1",
49
+ # ],
50
+ # default="alshimaa/model_baseline",
51
+ # label="Model",
52
+ # )
53
+ # #gr.inputs.Slider(minimum=320, maximum=1280, default=640, step=32, label="Image Size")
54
+ # #gr.inputs.Slider(minimum=0.0, maximum=1.0, default=0.25, step=0.05, label="Confidence Threshold"),
55
+ # #gr.inputs.Slider(minimum=0.0, maximum=1.0, default=0.45, step=0.05, label="IOU Threshold")
56
+ # ]
57
+
58
+ # outputs = gr.outputs.Image(type="filepath", label="Output Image")
59
+ # title = "Smart Environmental Eye (SEE)"
60
+
61
+ # examples = [['image1.jpg', 'alshimaa/model_yolo7', 640, 0.25, 0.45], ['image2.jpg', 'alshimaa/model_yolo7', 640, 0.25, 0.45], ['image3.jpg', 'alshimaa/model_yolo7', 640, 0.25, 0.45]]
62
+ # demo_app = gr.Interface(
63
+ # fn=yolov7_inference,
64
+ # inputs=inputs,
65
+ # outputs=outputs,
66
+ # title=title,
67
+ # examples=examples,
68
+ # cache_examples=True,
69
+ # theme='huggingface',
70
+ # )
71
+
72
+ # demo_app.launch(debug=True, enable_queue=True)
73
+
74
+
75
+
76
+ import subprocess
77
+ import tempfile
78
+ import time
79
+ from pathlib import Path
80
+
81
+ import cv2
82
+ import gradio as gr
83
+
84
+ from inferer import Inferer
85
+
86
+ pipeline = Inferer("alshimaa/model_baseline", device='cuda')
87
+
88
+
89
+ def fn_image(image, conf_thres, iou_thres):
90
+ return pipeline(image, conf_thres, iou_thres)
91
+
92
+
93
+ def fn_video(video_file, conf_thres, iou_thres, start_sec, duration):
94
+ start_timestamp = time.strftime("%H:%M:%S", time.gmtime(start_sec))
95
+ end_timestamp = time.strftime("%H:%M:%S", time.gmtime(start_sec + duration))
96
+
97
+ suffix = Path(video_file).suffix
98
+
99
+ clip_temp_file = tempfile.NamedTemporaryFile(suffix=suffix)
100
+ subprocess.call(
101
+ f"ffmpeg -y -ss {start_timestamp} -i {video_file} -to {end_timestamp} -c copy {clip_temp_file.name}".split()
102
  )
103
+
104
+ # Reader of clip file
105
+ cap = cv2.VideoCapture(clip_temp_file.name)
106
+
107
+ # This is an intermediary temp file where we'll write the video to
108
+ # Unfortunately, gradio doesn't play too nice with videos rn so we have to do some hackiness
109
+ # with ffmpeg at the end of the function here.
110
+ with tempfile.NamedTemporaryFile(suffix=".mp4") as temp_file:
111
+ out = cv2.VideoWriter(temp_file.name, cv2.VideoWriter_fourcc(*"MP4V"), 30, (1280, 720))
112
+
113
+ num_frames = 0
114
+ max_frames = duration * 30
115
+ while cap.isOpened():
116
+ try:
117
+ ret, frame = cap.read()
118
+ if not ret:
119
+ break
120
+ except Exception as e:
121
+ print(e)
122
+ continue
123
+ print("FRAME DTYPE", type(frame))
124
+ out.write(pipeline(frame, conf_thres, iou_thres))
125
+ num_frames += 1
126
+ print("Processed {} frames".format(num_frames))
127
+ if num_frames == max_frames:
128
+ break
129
+
130
+ out.release()
131
+
132
+ # Aforementioned hackiness
133
+ out_file = tempfile.NamedTemporaryFile(suffix="out.mp4", delete=False)
134
+ subprocess.run(f"ffmpeg -y -loglevel quiet -stats -i {temp_file.name} -c:v libx264 {out_file.name}".split())
135
+
136
+ return out_file.name
137
+
138
+
139
+ image_interface = gr.Interface(
140
+ fn=fn_image,
141
+ inputs=[
142
+ "image",
143
+ gr.Slider(0, 1, value=0.5, label="Confidence Threshold"),
144
+ gr.Slider(0, 1, value=0.5, label="IOU Threshold"),
145
+ ],
146
+ outputs=gr.Image(type="file"),
147
+ examples=[["image1.jpg", 0.5, 0.5], ["image2.jpg", 0.25, 0.45], ["image3.jpg", 0.25, 0.45]],
148
+ title="Smart Environmental Eye (SEE)",
149
+ allow_flagging=False,
150
+ allow_screenshot=False,
151
  )
152
 
153
+ video_interface = gr.Interface(
154
+ fn=fn_video,
155
+ inputs=[
156
+ gr.Video(type="file"),
157
+ gr.Slider(0, 1, value=0.25, label="Confidence Threshold"),
158
+ gr.Slider(0, 1, value=0.45, label="IOU Threshold"),
159
+ gr.Slider(0, 10, value=0, label="Start Second", step=1),
160
+ gr.Slider(0, 10 if pipeline.device.type != 'cpu' else 3, value=4, label="Duration", step=1),
161
+ ],
162
+ outputs=gr.Video(type="file", format="mp4"),
163
+ # examples=[
164
+ # ["video.mp4", 0.25, 0.45, 0, 2],
165
+
166
+ # ],
167
+ title="Smart Environmental Eye (SEE)",
168
+ allow_flagging=False,
169
+ allow_screenshot=False,
170
+ )
171
+
172
+
173
+
174
+ if __name__ == "__main__":
175
+ gr.TabbedInterface(
176
+ [image_interface, video_interface],
177
+ ["Run on Images", "Run on Videos"],
178
+ ).launch()
179
 
inferer.py ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding:utf-8 -*-
3
+ import math
4
+ import os.path as osp
5
+
6
+ import cv2
7
+ import numpy as np
8
+ import torch
9
+ from huggingface_hub import hf_hub_download
10
+ from PIL import Image, ImageFont
11
+
12
+ from yolov6.data.data_augment import letterbox
13
+ from yolov6.layers.common import DetectBackend
14
+ from yolov6.utils.events import LOGGER, load_yaml
15
+ from yolov6.utils.nms import non_max_suppression
16
+
17
+
18
+ class Inferer:
19
+ def __init__(self, model_id, device="cpu", yaml="coco.yaml", img_size=640, half=False):
20
+ self.__dict__.update(locals())
21
+
22
+ # Init model
23
+ self.img_size = img_size
24
+ cuda = device != "cpu" and torch.cuda.is_available()
25
+ self.device = torch.device("cuda:0" if cuda else "cpu")
26
+ self.model = DetectBackend(hf_hub_download(model_id, "model.pt"), device=self.device)
27
+ self.stride = self.model.stride
28
+ self.class_names = load_yaml(yaml)["names"]
29
+ self.img_size = self.check_img_size(self.img_size, s=self.stride) # check image size
30
+
31
+ # Half precision
32
+ if half & (self.device.type != "cpu"):
33
+ self.model.model.half()
34
+ else:
35
+ self.model.model.float()
36
+ half = False
37
+
38
+ if self.device.type != "cpu":
39
+ self.model(
40
+ torch.zeros(1, 3, *self.img_size).to(self.device).type_as(next(self.model.model.parameters()))
41
+ ) # warmup
42
+
43
+ # Switch model to deploy status
44
+ self.model_switch(self.model, self.img_size)
45
+
46
+ def model_switch(self, model, img_size):
47
+ """Model switch to deploy status"""
48
+ from yolov6.layers.common import RepVGGBlock
49
+
50
+ for layer in model.modules():
51
+ if isinstance(layer, RepVGGBlock):
52
+ layer.switch_to_deploy()
53
+
54
+ LOGGER.info("Switch model to deploy modality.")
55
+
56
+ def __call__(
57
+ self,
58
+ path_or_image,
59
+ conf_thres=0.25,
60
+ iou_thres=0.45,
61
+ classes=None,
62
+ agnostic_nms=False,
63
+ max_det=1000,
64
+ hide_labels=False,
65
+ hide_conf=False,
66
+ ):
67
+ """Model Inference and results visualization"""
68
+
69
+ img, img_src = self.precess_image(path_or_image, self.img_size, self.stride, self.half)
70
+ img = img.to(self.device)
71
+ if len(img.shape) == 3:
72
+ img = img[None]
73
+ # expand for batch dim
74
+ pred_results = self.model(img)
75
+ det = non_max_suppression(pred_results, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)[0]
76
+
77
+ gn = torch.tensor(img_src.shape)[[1, 0, 1, 0]] # normalization gain whwh
78
+ img_ori = img_src
79
+
80
+ # check image and font
81
+ assert (
82
+ img_ori.data.contiguous
83
+ ), "Image needs to be contiguous. Please apply to input images with np.ascontiguousarray(im)."
84
+ self.font_check()
85
+
86
+ if len(det):
87
+ det[:, :4] = self.rescale(img.shape[2:], det[:, :4], img_src.shape).round()
88
+
89
+ for *xyxy, conf, cls in reversed(det):
90
+ class_num = int(cls) # integer class
91
+ label = (
92
+ None
93
+ if hide_labels
94
+ else (self.class_names[class_num] if hide_conf else f"{self.class_names[class_num]} {conf:.2f}")
95
+ )
96
+
97
+ self.plot_box_and_label(
98
+ img_ori,
99
+ max(round(sum(img_ori.shape) / 2 * 0.003), 2),
100
+ xyxy,
101
+ label,
102
+ color=self.generate_colors(class_num, True),
103
+ )
104
+
105
+ img_src = np.asarray(img_ori)
106
+
107
+ return img_src
108
+
109
+ @staticmethod
110
+ def precess_image(path_or_image, img_size, stride, half):
111
+ """Process image before image inference."""
112
+ if isinstance(path_or_image, str):
113
+ try:
114
+ img_src = cv2.imread(path_or_image)
115
+ assert img_src is not None, f"Invalid image: {path_or_image}"
116
+ except Exception as e:
117
+ LOGGER.warning(e)
118
+ elif isinstance(path_or_image, np.ndarray):
119
+ img_src = path_or_image
120
+ elif isinstance(path_or_image, Image.Image):
121
+ img_src = np.array(path_or_image)
122
+
123
+ image = letterbox(img_src, img_size, stride=stride)[0]
124
+
125
+ # Convert
126
+ image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
127
+ image = torch.from_numpy(np.ascontiguousarray(image))
128
+ image = image.half() if half else image.float() # uint8 to fp16/32
129
+ image /= 255 # 0 - 255 to 0.0 - 1.0
130
+
131
+ return image, img_src
132
+
133
+ @staticmethod
134
+ def rescale(ori_shape, boxes, target_shape):
135
+ """Rescale the output to the original image shape"""
136
+ ratio = min(ori_shape[0] / target_shape[0], ori_shape[1] / target_shape[1])
137
+ padding = (ori_shape[1] - target_shape[1] * ratio) / 2, (ori_shape[0] - target_shape[0] * ratio) / 2
138
+
139
+ boxes[:, [0, 2]] -= padding[0]
140
+ boxes[:, [1, 3]] -= padding[1]
141
+ boxes[:, :4] /= ratio
142
+
143
+ boxes[:, 0].clamp_(0, target_shape[1]) # x1
144
+ boxes[:, 1].clamp_(0, target_shape[0]) # y1
145
+ boxes[:, 2].clamp_(0, target_shape[1]) # x2
146
+ boxes[:, 3].clamp_(0, target_shape[0]) # y2
147
+
148
+ return boxes
149
+
150
+ def check_img_size(self, img_size, s=32, floor=0):
151
+ """Make sure image size is a multiple of stride s in each dimension, and return a new shape list of image."""
152
+ if isinstance(img_size, int): # integer i.e. img_size=640
153
+ new_size = max(self.make_divisible(img_size, int(s)), floor)
154
+ elif isinstance(img_size, list): # list i.e. img_size=[640, 480]
155
+ new_size = [max(self.make_divisible(x, int(s)), floor) for x in img_size]
156
+ else:
157
+ raise Exception(f"Unsupported type of img_size: {type(img_size)}")
158
+
159
+ if new_size != img_size:
160
+ print(f"WARNING: --img-size {img_size} must be multiple of max stride {s}, updating to {new_size}")
161
+ return new_size if isinstance(img_size, list) else [new_size] * 2
162
+
163
+ def make_divisible(self, x, divisor):
164
+ # Upward revision the value x to make it evenly divisible by the divisor.
165
+ return math.ceil(x / divisor) * divisor
166
+
167
+ @staticmethod
168
+ def plot_box_and_label(image, lw, box, label="", color=(128, 128, 128), txt_color=(255, 255, 255)):
169
+ # Add one xyxy box to image with label
170
+ p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
171
+ cv2.rectangle(image, p1, p2, color, thickness=lw, lineType=cv2.LINE_AA)
172
+ if label:
173
+ tf = max(lw - 1, 1) # font thickness
174
+ w, h = cv2.getTextSize(label, 0, fontScale=lw / 3, thickness=tf)[0] # text width, height
175
+ outside = p1[1] - h - 3 >= 0 # label fits outside box
176
+ p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3
177
+ cv2.rectangle(image, p1, p2, color, -1, cv2.LINE_AA) # filled
178
+ cv2.putText(
179
+ image,
180
+ label,
181
+ (p1[0], p1[1] - 2 if outside else p1[1] + h + 2),
182
+ 0,
183
+ lw / 3,
184
+ txt_color,
185
+ thickness=tf,
186
+ lineType=cv2.LINE_AA,
187
+ )
188
+
189
+ @staticmethod
190
+ def font_check(font="./yolov6/utils/Arial.ttf", size=10):
191
+ # Return a PIL TrueType Font, downloading to CONFIG_DIR if necessary
192
+ assert osp.exists(font), f"font path not exists: {font}"
193
+ try:
194
+ return ImageFont.truetype(str(font) if font.exists() else font.name, size)
195
+ except Exception as e: # download if missing
196
+ return ImageFont.truetype(str(font), size)
197
+
198
+ @staticmethod
199
+ def box_convert(x):
200
+ # Convert boxes with shape [n, 4] from [x1, y1, x2, y2] to [x, y, w, h] where x1y1=top-left, x2y2=bottom-right
201
+ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
202
+ y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center
203
+ y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center
204
+ y[:, 2] = x[:, 2] - x[:, 0] # width
205
+ y[:, 3] = x[:, 3] - x[:, 1] # height
206
+ return y
207
+
208
+ @staticmethod
209
+ def generate_colors(i, bgr=False):
210
+ hex = (
211
+ "FF3838",
212
+ "FF9D97",
213
+ "FF701F",
214
+ "FFB21D",
215
+ "CFD231",
216
+ "48F90A",
217
+ "92CC17",
218
+ "3DDB86",
219
+ "1A9334",
220
+ "00D4BB",
221
+ "2C99A8",
222
+ "00C2FF",
223
+ "344593",
224
+ "6473FF",
225
+ "0018EC",
226
+ "8438FF",
227
+ "520085",
228
+ "CB38FF",
229
+ "FF95C8",
230
+ "FF37C7",
231
+ )
232
+ palette = []
233
+ for iter in hex:
234
+ h = "#" + iter
235
+ palette.append(tuple(int(h[1 + i : 1 + i + 2], 16) for i in (0, 2, 4)))
236
+ num = len(palette)
237
+ color = palette[int(i) % num]
238
+ return (color[2], color[1], color[0]) if bgr else color