gpt-4o-video-summarizer-demo

Sleeping

App Files Files Community

MingGatsby commited on Sep 1, 2024

Commit

e7d4e4b

verified ·

1 Parent(s): d8fe757

Create app.py

Browse files

Files changed (1) hide show

app.py +91 -0

app.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import gradio as gr
+from moviepy.editor import VideoFileClip
+import cv2
+import base64
+from openai import OpenAI
+import os
+# 參考: https://cookbook.openai.com/examples/gpt4o/introduction_to_gpt4o
+def process_video(video_path, seconds_per_frame=2):
+    base64Frames = []
+    base_video_path, _ = os.path.splitext(video_path)
+    video = cv2.VideoCapture(video_path)
+    total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
+    fps = video.get(cv2.CAP_PROP_FPS)
+    frames_to_skip = int(fps * seconds_per_frame)
+    curr_frame = 0
+    while curr_frame < total_frames - 1:
+        video.set(cv2.CAP_PROP_POS_FRAMES, curr_frame)
+        success, frame = video.read()
+        if not success:
+            break
+        _, buffer = cv2.imencode(".jpg", frame)
+        base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
+        curr_frame += frames_to_skip
+    video.release()
+    audio_path = f"{base_video_path}.mp3"
+    clip = VideoFileClip(video_path)
+    clip.audio.write_audiofile(audio_path, bitrate="32k")
+    clip.audio.close()
+    clip.close()
+    return base64Frames, audio_path
+def summarize_video(api_key, file_path):
+    client = OpenAI(api_key=api_key)
+    # 抽取幀和音頻（每0.5秒一幀）
+    base64Frames, audio_path = process_video(file_path, seconds_per_frame=0.5)
+    # 使用Whisper進行音頻轉錄
+    transcription = client.audio.transcriptions.create(
+        model="whisper-1", file=open(audio_path, "rb")
+    )
+    # 使用GPT-4o生成摘要
+    response = client.chat.completions.create(
+        model="gpt-4o",
+        messages=[
+            {
+                "role": "system",
+                "content": """您是一名優秀的摘要專家，請根據提供的影片和其轉錄內容生成Markdown格式的摘要。""",
+            },
+            {
+                "role": "user",
+                "content": [
+                    "以下是從影片中提取的幀畫面",
+                    *map(
+                        lambda x: {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": f"data:image/jpg;base64,{x}",
+                                "detail": "low",
+                            },
+                        },
+                        base64Frames,
+                    ),
+                    {
+                        "type": "text",
+                        "text": f"這是影片的轉錄內容: {transcription.text}",
+                    },
+                ],
+            },
+        ],
+        temperature=0,
+    )
+    return response.choices[0].message.content
+demo = gr.Interface(
+    fn=summarize_video,
+    inputs=[gr.Textbox(label="OpenAI API Key"), gr.File(label="上傳影片 (mp4)")],
+    outputs="markdown",
+    title="影片摘要生成器",
+    description="上傳影片，將生成影片的摘要。",
+)
+if __name__ == "__main__":
+    demo.launch()