adiv07 commited on
Commit
b143466
·
verified ·
1 Parent(s): 9229b62

Update Gpt4oDemo.py

Browse files
Files changed (1) hide show
  1. Gpt4oDemo.py +19 -7
Gpt4oDemo.py CHANGED
@@ -102,32 +102,44 @@ base64Frames = []
102
  # '''
103
  transcript=""
104
 
105
- def process_video(video_path, seconds_per_frame=2):
106
- global base64Frames, audio_path
107
  base_video_path, _ = os.path.splitext(video_path)
108
 
109
  video = cv2.VideoCapture(video_path)
110
  total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
111
  fps = video.get(cv2.CAP_PROP_FPS)
112
  frames_to_skip = int(fps * seconds_per_frame)
113
- curr_frame=0
114
 
 
 
 
 
 
 
115
  while curr_frame < total_frames - 1:
116
  video.set(cv2.CAP_PROP_POS_FRAMES, curr_frame)
117
  success, frame = video.read()
118
  if not success:
119
  break
120
- _, buffer = cv2.imencode(".jpg", frame)
 
 
 
 
121
  base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
122
  curr_frame += frames_to_skip
 
123
  video.release()
124
 
125
- audio_path = "./TEST.mp3"
 
126
  clip = VideoFileClip(video_path)
127
  clip.audio.write_audiofile(audio_path, bitrate="32k")
128
  clip.audio.close()
129
  clip.close()
130
- # transcribe_video(audio_path)
131
  print(f"Extracted {len(base64Frames)} frames")
132
  print(f"Extracted audio to {audio_path}")
133
  return base64Frames, audio_path
@@ -189,7 +201,7 @@ def handle_video(video=None):
189
  # Load example video
190
  video = "./TEST.mp4"
191
 
192
- base64Frames, audio_path = process_video(video_path=video, seconds_per_frame=100)
193
  chat_history.append({
194
  "role": "user",
195
  "content": [
 
102
  # '''
103
  transcript=""
104
 
105
+ def process_video(video_path, seconds_per_frame=2, target_width=320, target_height=180):
106
+ base64Frames = []
107
  base_video_path, _ = os.path.splitext(video_path)
108
 
109
  video = cv2.VideoCapture(video_path)
110
  total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
111
  fps = video.get(cv2.CAP_PROP_FPS)
112
  frames_to_skip = int(fps * seconds_per_frame)
113
+ curr_frame = 0
114
 
115
+ # Retrieve and print the original width and height
116
+ original_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
117
+ original_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
118
+ print(f"Original width: {original_width}, Original height: {original_height}")
119
+
120
+ # Loop through the video and extract frames at specified sampling rate
121
  while curr_frame < total_frames - 1:
122
  video.set(cv2.CAP_PROP_POS_FRAMES, curr_frame)
123
  success, frame = video.read()
124
  if not success:
125
  break
126
+
127
+ # Resize the frame
128
+ resized_frame = cv2.resize(frame, (target_width, target_height))
129
+
130
+ _, buffer = cv2.imencode(".jpg", resized_frame)
131
  base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
132
  curr_frame += frames_to_skip
133
+
134
  video.release()
135
 
136
+ # Extract audio from video
137
+ audio_path = f"./Task 1.mp3"
138
  clip = VideoFileClip(video_path)
139
  clip.audio.write_audiofile(audio_path, bitrate="32k")
140
  clip.audio.close()
141
  clip.close()
142
+ #transcribe_video(audio_path)
143
  print(f"Extracted {len(base64Frames)} frames")
144
  print(f"Extracted audio to {audio_path}")
145
  return base64Frames, audio_path
 
201
  # Load example video
202
  video = "./TEST.mp4"
203
 
204
+ base64Frames, audio_path = process_video(VIDEO_PATH, seconds_per_frame=1, target_width=320, target_height=180)
205
  chat_history.append({
206
  "role": "user",
207
  "content": [