reab5555 commited on
Commit
c9f1714
·
verified ·
1 Parent(s): 5dde850

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -37
app.py CHANGED
@@ -11,10 +11,19 @@ import tempfile
11
  import shutil
12
 
13
  # Check if CUDA is available, otherwise use CPU
14
- device = 'cuda' if torch.cuda.is_available() else 'cpu'
15
 
16
  processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16")
17
- model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16").to(device)
 
 
 
 
 
 
 
 
 
18
 
19
  def process_video(video_path, target, progress=gr.Progress()):
20
  if video_path is None:
@@ -37,6 +46,19 @@ def process_video(video_path, target, progress=gr.Progress()):
37
  temp_dir = tempfile.mkdtemp()
38
  frame_paths = []
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  for i, time in enumerate(progress.tqdm(np.arange(0, video_duration, frame_duration))):
41
  frame_number = int(time * original_fps)
42
  cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
@@ -48,41 +70,52 @@ def process_video(video_path, target, progress=gr.Progress()):
48
  img_resized = cv2.resize(img, (640, 360))
49
  pil_img = Image.fromarray(cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB))
50
 
51
- # Process single image
52
- inputs = processor(text=[target], images=pil_img, return_tensors="pt", padding=True).to(device)
53
- outputs = model(**inputs)
54
-
55
- target_sizes = torch.Tensor([pil_img.size[::-1]])
56
- results = processor.post_process_object_detection(outputs, target_sizes=target_sizes)
57
-
58
- draw = ImageDraw.Draw(pil_img)
59
- max_score = 0
60
-
61
- try:
62
- font = ImageFont.truetype("arial.ttf", 20)
63
- except IOError:
64
- font = ImageFont.load_default()
65
-
66
- boxes, scores, labels = results[0]["boxes"], results[0]["scores"], results[0]["labels"]
67
-
68
- for box, score, label in zip(boxes, scores, labels):
69
- if score.item() >= 0.5:
70
- box = [round(i, 2) for i in box.tolist()]
71
- object_label = target
72
- confidence = round(score.item(), 3)
73
- annotation = f"{object_label}: {confidence}"
74
-
75
- draw.rectangle(box, outline="red", width=2)
76
- text_position = (box[0], box[1] - 20)
77
- draw.text(text_position, annotation, fill="white", font=font)
78
-
79
- max_score = max(max_score, confidence)
80
-
81
- # Save frame to disk
82
- frame_path = os.path.join(temp_dir, f"frame_{i:04d}.png")
83
- pil_img.save(frame_path)
84
- frame_paths.append(frame_path)
85
- frame_scores.append(max_score)
 
 
 
 
 
 
 
 
 
 
 
86
 
87
  # Clear GPU cache every 10 frames
88
  if i % 10 == 0:
 
11
  import shutil
12
 
13
  # Check if CUDA is available, otherwise use CPU
14
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
 
16
  processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16")
17
+ model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16")
18
+
19
+ # Try to move model to GPU and use half precision
20
+ try:
21
+ model = model.to(device).half()
22
+ except RuntimeError:
23
+ print("GPU out of memory, using CPU instead")
24
+ device = torch.device("cpu")
25
+ model = model.to(device)
26
+
27
 
28
  def process_video(video_path, target, progress=gr.Progress()):
29
  if video_path is None:
 
46
  temp_dir = tempfile.mkdtemp()
47
  frame_paths = []
48
 
49
+ # Try to use GPU with half precision, fall back to CPU if out of memory
50
+ try:
51
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
52
+ model.to(device).half() # Convert model to half precision
53
+ except RuntimeError:
54
+ print("GPU out of memory, falling back to CPU")
55
+ device = torch.device("cpu")
56
+ model.to(device)
57
+
58
+ batch_size = 4 # Process 4 frames at a time
59
+ batch_frames = []
60
+ batch_indices = []
61
+
62
  for i, time in enumerate(progress.tqdm(np.arange(0, video_duration, frame_duration))):
63
  frame_number = int(time * original_fps)
64
  cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
 
70
  img_resized = cv2.resize(img, (640, 360))
71
  pil_img = Image.fromarray(cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB))
72
 
73
+ batch_frames.append(pil_img)
74
+ batch_indices.append(i)
75
+
76
+ if len(batch_frames) == batch_size or i == int(video_duration / frame_duration) - 1:
77
+ # Process batch
78
+ inputs = processor(text=[target] * len(batch_frames), images=batch_frames, return_tensors="pt", padding=True).to(device)
79
+
80
+ with torch.no_grad():
81
+ outputs = model(**inputs)
82
+
83
+ target_sizes = torch.Tensor([pil_img.size[::-1] for _ in batch_frames]).to(device)
84
+ results = processor.post_process_object_detection(outputs, target_sizes=target_sizes)
85
+
86
+ for idx, (pil_img, result) in enumerate(zip(batch_frames, results)):
87
+ draw = ImageDraw.Draw(pil_img)
88
+ max_score = 0
89
+
90
+ try:
91
+ font = ImageFont.truetype("arial.ttf", 20)
92
+ except IOError:
93
+ font = ImageFont.load_default()
94
+
95
+ boxes, scores, labels = result["boxes"], result["scores"], result["labels"]
96
+
97
+ for box, score, label in zip(boxes, scores, labels):
98
+ if score.item() >= 0.5:
99
+ box = [round(i, 2) for i in box.tolist()]
100
+ object_label = target
101
+ confidence = round(score.item(), 3)
102
+ annotation = f"{object_label}: {confidence}"
103
+
104
+ draw.rectangle(box, outline="red", width=2)
105
+ text_position = (box[0], box[1] - 20)
106
+ draw.text(text_position, annotation, fill="white", font=font)
107
+
108
+ max_score = max(max_score, confidence)
109
+
110
+ # Save frame to disk
111
+ frame_path = os.path.join(temp_dir, f"frame_{batch_indices[idx]:04d}.png")
112
+ pil_img.save(frame_path)
113
+ frame_paths.append(frame_path)
114
+ frame_scores.append(max_score)
115
+
116
+ # Clear batch
117
+ batch_frames = []
118
+ batch_indices = []
119
 
120
  # Clear GPU cache every 10 frames
121
  if i % 10 == 0: