fixed ffmpeg issue
Browse files- .gitignore +2 -1
- app.py +2 -2
- owl_core.py +11 -4
.gitignore
CHANGED
@@ -1,2 +1,3 @@
|
|
1 |
*.pyc
|
2 |
-
*/__pycache__/**
|
|
|
|
1 |
*.pyc
|
2 |
+
*/__pycache__/**
|
3 |
+
*.mp4
|
app.py
CHANGED
@@ -29,7 +29,7 @@ def run_owl(input_vid,
|
|
29 |
text_prompt,
|
30 |
confidence_threshold,
|
31 |
fps_processed=fps_processed,
|
32 |
-
scaling_factor=scaling_factor)
|
33 |
|
34 |
global CSV_PATH
|
35 |
CSV_PATH = csv_path
|
@@ -54,7 +54,7 @@ with gr.Blocks() as demo:
|
|
54 |
with gr.Row():
|
55 |
with gr.Column():
|
56 |
input = gr.Video(label="Input Video", interactive=True)
|
57 |
-
text_prompt = gr.Textbox(label="What do you want to detect? (Multiple species should be separated by commas")
|
58 |
with gr.Accordion("Advanced Options", open=False):
|
59 |
conf_threshold = gr.Slider(
|
60 |
label="Confidence Threshold",
|
|
|
29 |
text_prompt,
|
30 |
confidence_threshold,
|
31 |
fps_processed=fps_processed,
|
32 |
+
scaling_factor=1/scaling_factor)
|
33 |
|
34 |
global CSV_PATH
|
35 |
CSV_PATH = csv_path
|
|
|
54 |
with gr.Row():
|
55 |
with gr.Column():
|
56 |
input = gr.Video(label="Input Video", interactive=True)
|
57 |
+
text_prompt = gr.Textbox(label="What do you want to detect? (Multiple species should be separated by commas)")
|
58 |
with gr.Accordion("Advanced Options", open=False):
|
59 |
conf_threshold = gr.Slider(
|
60 |
label="Confidence Threshold",
|
owl_core.py
CHANGED
@@ -11,11 +11,12 @@ from PIL import Image
|
|
11 |
from utils import plot_predictions, mp4_to_png, vid_stitcher
|
12 |
from transformers import Owlv2Processor, Owlv2ForObjectDetection
|
13 |
|
|
|
14 |
def preprocess_text(text_prompt: str, num_prompts: int = 1):
|
15 |
"""
|
16 |
Takes a string of text prompts and returns a list of lists of text prompts for each image.
|
17 |
i.e. text_prompt = "a, b, c" -> [["a", "b", "c"], ["a", "b", "c"]]
|
18 |
-
"""
|
19 |
text_prompt = [s.strip() for s in text_prompt.split(",")]
|
20 |
text_queries = [text_prompt] * num_prompts
|
21 |
# print("text_queries:", text_queries)
|
@@ -33,7 +34,7 @@ def owl_batch_prediction(
|
|
33 |
with torch.no_grad():
|
34 |
outputs = model(**inputs)
|
35 |
|
36 |
-
|
37 |
target_sizes = torch.Tensor([img.size[::-1] for img in images]).to(device)
|
38 |
# Convert outputs (bounding boxes and class logits) to COCO API, resizes to original image size and filter by threshold
|
39 |
results = processor.post_process_object_detection(outputs=outputs, target_sizes=target_sizes, threshold=threshold)
|
@@ -58,6 +59,7 @@ def owl_full_video(
|
|
58 |
# create new dirs and paths for results
|
59 |
filename = os.path.splitext(os.path.basename(vid_path))[0]
|
60 |
results_dir = f'../temp/{filename}_{datetime.now().strftime("%H%M%S")}'
|
|
|
61 |
frames_dir = os.path.join(results_dir, "frames")
|
62 |
|
63 |
# if the frames directory does not exist, create it and get the frames from the video
|
@@ -86,7 +88,7 @@ def owl_full_video(
|
|
86 |
for i in tqdm(range(0, len(frame_paths), batch_size), desc="Running batches"):
|
87 |
frame_nums = [i*fps_processed for i in range(batch_size)]
|
88 |
batch_paths = frame_paths[i:i+batch_size] # paths for this batch
|
89 |
-
images = [Image.open(image_path) for image_path
|
90 |
|
91 |
# run owl on this batch of frames
|
92 |
text_queries = preprocess_text(text_prompt, len(batch_paths))
|
@@ -128,4 +130,9 @@ def owl_full_video(
|
|
128 |
# stitch the frames into a video
|
129 |
save_path = vid_stitcher(frames_dir, output_path=os.path.join(results_dir, "output.mp4"))
|
130 |
|
131 |
-
return csv_path, save_path
|
|
|
|
|
|
|
|
|
|
|
|
11 |
from utils import plot_predictions, mp4_to_png, vid_stitcher
|
12 |
from transformers import Owlv2Processor, Owlv2ForObjectDetection
|
13 |
|
14 |
+
|
15 |
def preprocess_text(text_prompt: str, num_prompts: int = 1):
|
16 |
"""
|
17 |
Takes a string of text prompts and returns a list of lists of text prompts for each image.
|
18 |
i.e. text_prompt = "a, b, c" -> [["a", "b", "c"], ["a", "b", "c"]]
|
19 |
+
"""
|
20 |
text_prompt = [s.strip() for s in text_prompt.split(",")]
|
21 |
text_queries = [text_prompt] * num_prompts
|
22 |
# print("text_queries:", text_queries)
|
|
|
34 |
with torch.no_grad():
|
35 |
outputs = model(**inputs)
|
36 |
|
37 |
+
# Target image sizes (height, width) to rescale box predictions [batch_size, 2]
|
38 |
target_sizes = torch.Tensor([img.size[::-1] for img in images]).to(device)
|
39 |
# Convert outputs (bounding boxes and class logits) to COCO API, resizes to original image size and filter by threshold
|
40 |
results = processor.post_process_object_detection(outputs=outputs, target_sizes=target_sizes, threshold=threshold)
|
|
|
59 |
# create new dirs and paths for results
|
60 |
filename = os.path.splitext(os.path.basename(vid_path))[0]
|
61 |
results_dir = f'../temp/{filename}_{datetime.now().strftime("%H%M%S")}'
|
62 |
+
# results_dir = f'temp/{filename}_{datetime.now().strftime("%H%M%S")}'
|
63 |
frames_dir = os.path.join(results_dir, "frames")
|
64 |
|
65 |
# if the frames directory does not exist, create it and get the frames from the video
|
|
|
88 |
for i in tqdm(range(0, len(frame_paths), batch_size), desc="Running batches"):
|
89 |
frame_nums = [i*fps_processed for i in range(batch_size)]
|
90 |
batch_paths = frame_paths[i:i+batch_size] # paths for this batch
|
91 |
+
images = [Image.open(image_path) for image_path in batch_paths]
|
92 |
|
93 |
# run owl on this batch of frames
|
94 |
text_queries = preprocess_text(text_prompt, len(batch_paths))
|
|
|
130 |
# stitch the frames into a video
|
131 |
save_path = vid_stitcher(frames_dir, output_path=os.path.join(results_dir, "output.mp4"))
|
132 |
|
133 |
+
return csv_path, save_path
|
134 |
+
|
135 |
+
|
136 |
+
# # DEBUGGING
|
137 |
+
# if __name__ == "__main__":
|
138 |
+
# owl_full_video('baboon_15s.mp4', 'baboon', 0.3, fps_processed=1, scaling_factor=4)
|