aiqcamp commited on
Commit
f289e91
Β·
verified Β·
1 Parent(s): c6df80d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -18
app.py CHANGED
@@ -78,6 +78,9 @@ def visualize(pred_mask, image_path, work_dir):
78
 
79
  @spaces.GPU
80
  def image_vision(image_input_path, prompt):
 
 
 
81
  image_path = image_input_path
82
  text_prompts = f"<image>{prompt}"
83
  image = Image.open(image_path).convert('RGB')
@@ -92,9 +95,16 @@ def image_vision(image_input_path, prompt):
92
  print(return_dict)
93
  answer = return_dict["prediction"] # the text format answer
94
 
 
 
 
 
 
 
 
95
  seg_image = return_dict["prediction_masks"]
96
 
97
- if '[SEG]' in answer and Visualizer is not None:
98
  pred_masks = seg_image[0]
99
  temp_dir = tempfile.mkdtemp()
100
  pred_mask = pred_masks
@@ -106,19 +116,16 @@ def image_vision(image_input_path, prompt):
106
 
107
  @spaces.GPU(duration=80)
108
  def video_vision(video_input_path, prompt, video_interval):
 
 
 
109
  # Open the original video
110
  cap = cv2.VideoCapture(video_input_path)
111
-
112
- # Get original video properties
113
  original_fps = cap.get(cv2.CAP_PROP_FPS)
114
-
115
  frame_skip_factor = video_interval
116
-
117
- # Calculate new FPS
118
  new_fps = original_fps / frame_skip_factor
119
 
120
  vid_frames, image_paths = read_video(video_input_path, video_interval)
121
- # create a question (<image> is a placeholder for the video frames)
122
  question = f"<image>{prompt}"
123
  result = model.predict_forward(
124
  video=vid_frames,
@@ -128,7 +135,13 @@ def video_vision(video_input_path, prompt, video_interval):
128
  prediction = result['prediction']
129
  print(prediction)
130
 
131
- if '[SEG]' in prediction and Visualizer is not None:
 
 
 
 
 
 
132
  _seg_idx = 0
133
  pred_masks = result['prediction_masks'][_seg_idx]
134
  seg_frames = []
@@ -140,29 +153,22 @@ def video_vision(video_input_path, prompt, video_interval):
140
  seg_frames.append(seg_frame)
141
 
142
  output_video = "output_video.mp4"
143
-
144
- # Read the first image to get the size (resolution)
145
  frame = cv2.imread(seg_frames[0])
146
  height, width, layers = frame.shape
147
-
148
- # Define the video codec and create VideoWriter object
149
- fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Codec for MP4
150
  video = cv2.VideoWriter(output_video, fourcc, new_fps, (width, height))
151
 
152
- # Iterate over the image paths and write to the video
153
  for img_path in seg_frames:
154
  frame = cv2.imread(img_path)
155
  video.write(frame)
156
 
157
- # Release the video writer
158
  video.release()
159
-
160
  print(f"Video created successfully at {output_video}")
161
 
162
- return result['prediction'], output_video
163
 
164
  else:
165
- return result['prediction'], None
166
 
167
 
168
 
 
78
 
79
  @spaces.GPU
80
  def image_vision(image_input_path, prompt):
81
+ # μž…λ ₯된 ν”„λ‘¬ν”„νŠΈκ°€ ν•œκΈ€μΈμ§€ 확인
82
+ is_korean = any(ord('κ°€') <= ord(char) <= ord('힣') for char in prompt)
83
+
84
  image_path = image_input_path
85
  text_prompts = f"<image>{prompt}"
86
  image = Image.open(image_path).convert('RGB')
 
95
  print(return_dict)
96
  answer = return_dict["prediction"] # the text format answer
97
 
98
+ # ν•œκΈ€ ν”„λ‘¬ν”„νŠΈμΈ 경우 응닡을 ν•œκΈ€λ‘œ λ³€ν™˜
99
+ if is_korean:
100
+ # κΈ°λ³Έ 응닡 νŒ¨ν„΄μ„ ν•œκΈ€λ‘œ λ³€ν™˜
101
+ answer = answer.replace("Yes", "λ„€")
102
+ answer = answer.replace("No", "μ•„λ‹ˆμ˜€")
103
+ answer = answer.replace("[SEG]", "[λΆ„ν• ]")
104
+
105
  seg_image = return_dict["prediction_masks"]
106
 
107
+ if ('[SEG]' in answer or '[λΆ„ν• ]' in answer) and Visualizer is not None:
108
  pred_masks = seg_image[0]
109
  temp_dir = tempfile.mkdtemp()
110
  pred_mask = pred_masks
 
116
 
117
  @spaces.GPU(duration=80)
118
  def video_vision(video_input_path, prompt, video_interval):
119
+ # μž…λ ₯된 ν”„λ‘¬ν”„νŠΈκ°€ ν•œκΈ€μΈμ§€ 확인
120
+ is_korean = any(ord('κ°€') <= ord(char) <= ord('힣') for char in prompt)
121
+
122
  # Open the original video
123
  cap = cv2.VideoCapture(video_input_path)
 
 
124
  original_fps = cap.get(cv2.CAP_PROP_FPS)
 
125
  frame_skip_factor = video_interval
 
 
126
  new_fps = original_fps / frame_skip_factor
127
 
128
  vid_frames, image_paths = read_video(video_input_path, video_interval)
 
129
  question = f"<image>{prompt}"
130
  result = model.predict_forward(
131
  video=vid_frames,
 
135
  prediction = result['prediction']
136
  print(prediction)
137
 
138
+ # ν•œκΈ€ ν”„λ‘¬ν”„νŠΈμΈ 경우 응닡을 ν•œκΈ€λ‘œ λ³€ν™˜
139
+ if is_korean:
140
+ prediction = prediction.replace("Yes", "λ„€")
141
+ prediction = prediction.replace("No", "μ•„λ‹ˆμ˜€")
142
+ prediction = prediction.replace("[SEG]", "[λΆ„ν• ]")
143
+
144
+ if ('[SEG]' in prediction or '[λΆ„ν• ]' in prediction) and Visualizer is not None:
145
  _seg_idx = 0
146
  pred_masks = result['prediction_masks'][_seg_idx]
147
  seg_frames = []
 
153
  seg_frames.append(seg_frame)
154
 
155
  output_video = "output_video.mp4"
 
 
156
  frame = cv2.imread(seg_frames[0])
157
  height, width, layers = frame.shape
158
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
 
 
159
  video = cv2.VideoWriter(output_video, fourcc, new_fps, (width, height))
160
 
 
161
  for img_path in seg_frames:
162
  frame = cv2.imread(img_path)
163
  video.write(frame)
164
 
 
165
  video.release()
 
166
  print(f"Video created successfully at {output_video}")
167
 
168
+ return prediction, output_video
169
 
170
  else:
171
+ return prediction, None
172
 
173
 
174