# import gradio as gr # #import torch # import yolov7 # # # # from huggingface_hub import hf_hub_download # from huggingface_hub import HfApi # # Images # #torch.hub.download_url_to_file('https://github.com/ultralytics/yolov5/raw/master/data/images/zidane.jpg', 'zidane.jpg') # #torch.hub.download_url_to_file('https://raw.githubusercontent.com/obss/sahi/main/tests/data/small-vehicles1.jpeg', 'small-vehicles1.jpeg') # def yolov7_inference( # image: gr.inputs.Image = None, # model_path: gr.inputs.Dropdown = None, # image_size: gr.inputs.Slider = 640, # conf_threshold: gr.inputs.Slider = 0.25, # iou_threshold: gr.inputs.Slider = 0.45, # ): # """ # YOLOv7 inference function # Args: # image: Input image # model_path: Path to the model # image_size: Image size # conf_threshold: Confidence threshold # iou_threshold: IOU threshold # Returns: # Rendered image # """ # model = yolov7.load(model_path, device="cpu", hf_model=True, trace=False) # model.conf = conf_threshold # model.iou = iou_threshold # results = model([image], size=image_size) # return results.render()[0] # inputs = [ # gr.inputs.Image(type="pil", label="Input Image"), # gr.inputs.Dropdown( # choices=[ # "alshimaa/model_baseline", # "alshimaa/model_yolo7", # #"kadirnar/yolov7-v0.1", # ], # default="alshimaa/model_baseline", # label="Model", # ) # #gr.inputs.Slider(minimum=320, maximum=1280, default=640, step=32, label="Image Size") # #gr.inputs.Slider(minimum=0.0, maximum=1.0, default=0.25, step=0.05, label="Confidence Threshold"), # #gr.inputs.Slider(minimum=0.0, maximum=1.0, default=0.45, step=0.05, label="IOU Threshold") # ] # outputs = gr.outputs.Image(type="filepath", label="Output Image") # title = "Smart Environmental Eye (SEE)" # examples = [['image1.jpg', 'alshimaa/model_yolo7', 640, 0.25, 0.45], ['image2.jpg', 'alshimaa/model_yolo7', 640, 0.25, 0.45], ['image3.jpg', 'alshimaa/model_yolo7', 640, 0.25, 0.45]] # demo_app = gr.Interface( # fn=yolov7_inference, # inputs=inputs, # outputs=outputs, # title=title, # examples=examples, # cache_examples=True, # theme='huggingface', # ) # demo_app.launch(debug=True, enable_queue=True) import subprocess import tempfile import time from pathlib import Path import cv2 import gradio as gr from inferer import Inferer pipeline = Inferer("alshimaa/model_baseline", device='cuda') def fn_image(image, conf_thres, iou_thres): return pipeline(image, conf_thres, iou_thres) def fn_video(video_file, conf_thres, iou_thres, start_sec, duration): start_timestamp = time.strftime("%H:%M:%S", time.gmtime(start_sec)) end_timestamp = time.strftime("%H:%M:%S", time.gmtime(start_sec + duration)) suffix = Path(video_file).suffix clip_temp_file = tempfile.NamedTemporaryFile(suffix=suffix) subprocess.call( f"ffmpeg -y -ss {start_timestamp} -i {video_file} -to {end_timestamp} -c copy {clip_temp_file.name}".split() ) # Reader of clip file cap = cv2.VideoCapture(clip_temp_file.name) # This is an intermediary temp file where we'll write the video to # Unfortunately, gradio doesn't play too nice with videos rn so we have to do some hackiness # with ffmpeg at the end of the function here. with tempfile.NamedTemporaryFile(suffix=".mp4") as temp_file: out = cv2.VideoWriter(temp_file.name, cv2.VideoWriter_fourcc(*"MP4V"), 30, (1280, 720)) num_frames = 0 max_frames = duration * 30 while cap.isOpened(): try: ret, frame = cap.read() if not ret: break except Exception as e: print(e) continue print("FRAME DTYPE", type(frame)) out.write(pipeline(frame, conf_thres, iou_thres)) num_frames += 1 print("Processed {} frames".format(num_frames)) if num_frames == max_frames: break out.release() # Aforementioned hackiness out_file = tempfile.NamedTemporaryFile(suffix="out.mp4", delete=False) subprocess.run(f"ffmpeg -y -loglevel quiet -stats -i {temp_file.name} -c:v libx264 {out_file.name}".split()) return out_file.name image_interface = gr.Interface( fn=fn_image, inputs=[ "image", gr.Slider(0, 1, value=0.5, label="Confidence Threshold"), gr.Slider(0, 1, value=0.5, label="IOU Threshold"), ], outputs=gr.Image(type="file"), examples=[["image1.jpg", 0.5, 0.5], ["image2.jpg", 0.25, 0.45], ["image3.jpg", 0.25, 0.45]], title="Smart Environmental Eye (SEE)", allow_flagging=False, allow_screenshot=False, ) video_interface = gr.Interface( fn=fn_video, inputs=[ gr.Video(type="file"), gr.Slider(0, 1, value=0.25, label="Confidence Threshold"), gr.Slider(0, 1, value=0.45, label="IOU Threshold"), gr.Slider(0, 10, value=0, label="Start Second", step=1), gr.Slider(0, 10 if pipeline.device.type != 'cpu' else 3, value=4, label="Duration", step=1), ], outputs=gr.Video(type="file", format="mp4"), # examples=[ # ["video.mp4", 0.25, 0.45, 0, 2], # ], title="Smart Environmental Eye (SEE)", allow_flagging=False, allow_screenshot=False, ) if __name__ == "__main__": gr.TabbedInterface( [image_interface, video_interface], ["Run on Images", "Run on Videos"], ).launch()