annading commited on
Commit
d7ae9fa
·
1 Parent(s): 50b188c

fixed ffmpeg issue

Browse files
Files changed (3) hide show
  1. .gitignore +2 -1
  2. app.py +2 -2
  3. owl_core.py +11 -4
.gitignore CHANGED
@@ -1,2 +1,3 @@
1
  *.pyc
2
- */__pycache__/**
 
 
1
  *.pyc
2
+ */__pycache__/**
3
+ *.mp4
app.py CHANGED
@@ -29,7 +29,7 @@ def run_owl(input_vid,
29
  text_prompt,
30
  confidence_threshold,
31
  fps_processed=fps_processed,
32
- scaling_factor=scaling_factor)
33
 
34
  global CSV_PATH
35
  CSV_PATH = csv_path
@@ -54,7 +54,7 @@ with gr.Blocks() as demo:
54
  with gr.Row():
55
  with gr.Column():
56
  input = gr.Video(label="Input Video", interactive=True)
57
- text_prompt = gr.Textbox(label="What do you want to detect? (Multiple species should be separated by commas")
58
  with gr.Accordion("Advanced Options", open=False):
59
  conf_threshold = gr.Slider(
60
  label="Confidence Threshold",
 
29
  text_prompt,
30
  confidence_threshold,
31
  fps_processed=fps_processed,
32
+ scaling_factor=1/scaling_factor)
33
 
34
  global CSV_PATH
35
  CSV_PATH = csv_path
 
54
  with gr.Row():
55
  with gr.Column():
56
  input = gr.Video(label="Input Video", interactive=True)
57
+ text_prompt = gr.Textbox(label="What do you want to detect? (Multiple species should be separated by commas)")
58
  with gr.Accordion("Advanced Options", open=False):
59
  conf_threshold = gr.Slider(
60
  label="Confidence Threshold",
owl_core.py CHANGED
@@ -11,11 +11,12 @@ from PIL import Image
11
  from utils import plot_predictions, mp4_to_png, vid_stitcher
12
  from transformers import Owlv2Processor, Owlv2ForObjectDetection
13
 
 
14
  def preprocess_text(text_prompt: str, num_prompts: int = 1):
15
  """
16
  Takes a string of text prompts and returns a list of lists of text prompts for each image.
17
  i.e. text_prompt = "a, b, c" -> [["a", "b", "c"], ["a", "b", "c"]]
18
- """
19
  text_prompt = [s.strip() for s in text_prompt.split(",")]
20
  text_queries = [text_prompt] * num_prompts
21
  # print("text_queries:", text_queries)
@@ -33,7 +34,7 @@ def owl_batch_prediction(
33
  with torch.no_grad():
34
  outputs = model(**inputs)
35
 
36
- # Target image sizes (height, width) to rescale box predictions [batch_size, 2]
37
  target_sizes = torch.Tensor([img.size[::-1] for img in images]).to(device)
38
  # Convert outputs (bounding boxes and class logits) to COCO API, resizes to original image size and filter by threshold
39
  results = processor.post_process_object_detection(outputs=outputs, target_sizes=target_sizes, threshold=threshold)
@@ -58,6 +59,7 @@ def owl_full_video(
58
  # create new dirs and paths for results
59
  filename = os.path.splitext(os.path.basename(vid_path))[0]
60
  results_dir = f'../temp/{filename}_{datetime.now().strftime("%H%M%S")}'
 
61
  frames_dir = os.path.join(results_dir, "frames")
62
 
63
  # if the frames directory does not exist, create it and get the frames from the video
@@ -86,7 +88,7 @@ def owl_full_video(
86
  for i in tqdm(range(0, len(frame_paths), batch_size), desc="Running batches"):
87
  frame_nums = [i*fps_processed for i in range(batch_size)]
88
  batch_paths = frame_paths[i:i+batch_size] # paths for this batch
89
- images = [Image.open(image_path) for image_path in batch_paths]
90
 
91
  # run owl on this batch of frames
92
  text_queries = preprocess_text(text_prompt, len(batch_paths))
@@ -128,4 +130,9 @@ def owl_full_video(
128
  # stitch the frames into a video
129
  save_path = vid_stitcher(frames_dir, output_path=os.path.join(results_dir, "output.mp4"))
130
 
131
- return csv_path, save_path
 
 
 
 
 
 
11
  from utils import plot_predictions, mp4_to_png, vid_stitcher
12
  from transformers import Owlv2Processor, Owlv2ForObjectDetection
13
 
14
+
15
  def preprocess_text(text_prompt: str, num_prompts: int = 1):
16
  """
17
  Takes a string of text prompts and returns a list of lists of text prompts for each image.
18
  i.e. text_prompt = "a, b, c" -> [["a", "b", "c"], ["a", "b", "c"]]
19
+ """
20
  text_prompt = [s.strip() for s in text_prompt.split(",")]
21
  text_queries = [text_prompt] * num_prompts
22
  # print("text_queries:", text_queries)
 
34
  with torch.no_grad():
35
  outputs = model(**inputs)
36
 
37
+ # Target image sizes (height, width) to rescale box predictions [batch_size, 2]
38
  target_sizes = torch.Tensor([img.size[::-1] for img in images]).to(device)
39
  # Convert outputs (bounding boxes and class logits) to COCO API, resizes to original image size and filter by threshold
40
  results = processor.post_process_object_detection(outputs=outputs, target_sizes=target_sizes, threshold=threshold)
 
59
  # create new dirs and paths for results
60
  filename = os.path.splitext(os.path.basename(vid_path))[0]
61
  results_dir = f'../temp/{filename}_{datetime.now().strftime("%H%M%S")}'
62
+ # results_dir = f'temp/{filename}_{datetime.now().strftime("%H%M%S")}'
63
  frames_dir = os.path.join(results_dir, "frames")
64
 
65
  # if the frames directory does not exist, create it and get the frames from the video
 
88
  for i in tqdm(range(0, len(frame_paths), batch_size), desc="Running batches"):
89
  frame_nums = [i*fps_processed for i in range(batch_size)]
90
  batch_paths = frame_paths[i:i+batch_size] # paths for this batch
91
+ images = [Image.open(image_path) for image_path in batch_paths]
92
 
93
  # run owl on this batch of frames
94
  text_queries = preprocess_text(text_prompt, len(batch_paths))
 
130
  # stitch the frames into a video
131
  save_path = vid_stitcher(frames_dir, output_path=os.path.join(results_dir, "output.mp4"))
132
 
133
+ return csv_path, save_path
134
+
135
+
136
+ # # DEBUGGING
137
+ # if __name__ == "__main__":
138
+ # owl_full_video('baboon_15s.mp4', 'baboon', 0.3, fps_processed=1, scaling_factor=4)