Update Gpt4oDemo.py
Browse files- Gpt4oDemo.py +19 -7
Gpt4oDemo.py
CHANGED
@@ -102,32 +102,44 @@ base64Frames = []
|
|
102 |
# '''
|
103 |
transcript=""
|
104 |
|
105 |
-
def process_video(video_path, seconds_per_frame=2):
|
106 |
-
|
107 |
base_video_path, _ = os.path.splitext(video_path)
|
108 |
|
109 |
video = cv2.VideoCapture(video_path)
|
110 |
total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
|
111 |
fps = video.get(cv2.CAP_PROP_FPS)
|
112 |
frames_to_skip = int(fps * seconds_per_frame)
|
113 |
-
curr_frame=0
|
114 |
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
while curr_frame < total_frames - 1:
|
116 |
video.set(cv2.CAP_PROP_POS_FRAMES, curr_frame)
|
117 |
success, frame = video.read()
|
118 |
if not success:
|
119 |
break
|
120 |
-
|
|
|
|
|
|
|
|
|
121 |
base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
|
122 |
curr_frame += frames_to_skip
|
|
|
123 |
video.release()
|
124 |
|
125 |
-
|
|
|
126 |
clip = VideoFileClip(video_path)
|
127 |
clip.audio.write_audiofile(audio_path, bitrate="32k")
|
128 |
clip.audio.close()
|
129 |
clip.close()
|
130 |
-
#
|
131 |
print(f"Extracted {len(base64Frames)} frames")
|
132 |
print(f"Extracted audio to {audio_path}")
|
133 |
return base64Frames, audio_path
|
@@ -189,7 +201,7 @@ def handle_video(video=None):
|
|
189 |
# Load example video
|
190 |
video = "./TEST.mp4"
|
191 |
|
192 |
-
base64Frames, audio_path = process_video(
|
193 |
chat_history.append({
|
194 |
"role": "user",
|
195 |
"content": [
|
|
|
102 |
# '''
|
103 |
transcript=""
|
104 |
|
105 |
+
def process_video(video_path, seconds_per_frame=2, target_width=320, target_height=180):
|
106 |
+
base64Frames = []
|
107 |
base_video_path, _ = os.path.splitext(video_path)
|
108 |
|
109 |
video = cv2.VideoCapture(video_path)
|
110 |
total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
|
111 |
fps = video.get(cv2.CAP_PROP_FPS)
|
112 |
frames_to_skip = int(fps * seconds_per_frame)
|
113 |
+
curr_frame = 0
|
114 |
|
115 |
+
# Retrieve and print the original width and height
|
116 |
+
original_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
|
117 |
+
original_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
118 |
+
print(f"Original width: {original_width}, Original height: {original_height}")
|
119 |
+
|
120 |
+
# Loop through the video and extract frames at specified sampling rate
|
121 |
while curr_frame < total_frames - 1:
|
122 |
video.set(cv2.CAP_PROP_POS_FRAMES, curr_frame)
|
123 |
success, frame = video.read()
|
124 |
if not success:
|
125 |
break
|
126 |
+
|
127 |
+
# Resize the frame
|
128 |
+
resized_frame = cv2.resize(frame, (target_width, target_height))
|
129 |
+
|
130 |
+
_, buffer = cv2.imencode(".jpg", resized_frame)
|
131 |
base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
|
132 |
curr_frame += frames_to_skip
|
133 |
+
|
134 |
video.release()
|
135 |
|
136 |
+
# Extract audio from video
|
137 |
+
audio_path = f"./Task 1.mp3"
|
138 |
clip = VideoFileClip(video_path)
|
139 |
clip.audio.write_audiofile(audio_path, bitrate="32k")
|
140 |
clip.audio.close()
|
141 |
clip.close()
|
142 |
+
#transcribe_video(audio_path)
|
143 |
print(f"Extracted {len(base64Frames)} frames")
|
144 |
print(f"Extracted audio to {audio_path}")
|
145 |
return base64Frames, audio_path
|
|
|
201 |
# Load example video
|
202 |
video = "./TEST.mp4"
|
203 |
|
204 |
+
base64Frames, audio_path = process_video(VIDEO_PATH, seconds_per_frame=1, target_width=320, target_height=180)
|
205 |
chat_history.append({
|
206 |
"role": "user",
|
207 |
"content": [
|