gpt-4o-video-summarizer-demo

Sleeping

App Files Files Community

MingGatsby commited on Sep 1, 2024

Commit

d8fe757

verified ·

1 Parent(s): 849f759

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -110

app.py DELETED Viewed

@@ -1,110 +0,0 @@
-import gradio as gr
-from moviepy.editor import VideoFileClip
-import cv2
-import base64
-from openai import OpenAI
-import os
-# 這個函數負責處理影片，將影片按指定的秒數提取幀圖片，並提取音訊
-def process_video(video_path, seconds_per_frame=2):
-    print(f"Debug: 開始處理影片 {video_path}")  # Debug訊息
-    base64Frames = []  # 用於儲存幀圖片的Base64編碼列表
-    base_video_path, _ = os.path.splitext(video_path)  # 獲取影片文件的基礎路徑（不含副檔名）
-    video = cv2.VideoCapture(video_path)  # 使用OpenCV打開影片
-    total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))  # 獲取影片總幀數
-    fps = video.get(cv2.CAP_PROP_FPS)  # 獲取影片的幀率（每秒幀數）
-    frames_to_skip = int(fps * seconds_per_frame)  # 計算要跳過的幀數
-    curr_frame = 0  # 初始化當前幀的指標
-    print(f"Debug: 總幀數 {total_frames}, 幀率 {fps}, 每 {seconds_per_frame} 秒處理一幀")  # Debug訊息
-    # 逐幀處理影片
-    while curr_frame < total_frames - 1:
-        video.set(cv2.CAP_PROP_POS_FRAMES, curr_frame)  # 設置要讀取的幀
-        success, frame = video.read()  # 讀取當前幀
-        if not success:
-            print(f"Debug: 讀取幀失敗，當前幀 {curr_frame}")  # Debug訊息
-            break
-        _, buffer = cv2.imencode(".jpg", frame)  # 將幀編碼為JPEG格式
-        base64Frames.append(base64.b64encode(buffer).decode("utf-8"))  # 將JPEG格式的幀轉為Base64編碼並儲存
-        curr_frame += frames_to_skip  # 更新當前幀的指標
-    video.release()  # 釋放影片資源
-    print(f"Debug: 完成幀的提取，共提取了 {len(base64Frames)} 幀")  # Debug訊息
-    # 提取音訊
-    audio_path = f"{base_video_path}.mp3"  # 定義音訊輸出的路徑
-    print(f"Debug: 開始提取音訊到 {audio_path}")  # Debug訊息
-    clip = VideoFileClip(video_path)  # 使用moviepy打開影片
-    clip.audio.write_audiofile(audio_path, bitrate="32k")  # 將音訊寫入MP3文件
-    clip.audio.close()
-    clip.close()
-    print(f"Debug: 音訊提取完成")  # Debug訊息
-    return base64Frames, audio_path
-# 影片摘要生成函數
-def summarize_video(file_path):
-    api_key = os.getenv("OPENAI_API_KEY")
-    print(f"Debug: 使用的API Key為 {api_key}")  # Debug訊息
-    client = OpenAI(api_key=api_key)
-    # 提取影片幀和音訊（每0.5秒提取一幀）
-    print(f"Debug: 開始提取影片幀和音訊")  # Debug訊息
-    base64Frames, audio_path = process_video(file_path, seconds_per_frame=0.5)
-    # 使用 Whisper 進行音訊轉文字
-    print(f"Debug: 開始進行音訊轉文字")  # Debug訊息
-    transcription = client.audio.transcriptions.create(
-        model="whisper-1", file=open(audio_path, "rb")
-    )
-    print(f"Debug: 音訊轉文字完成，轉錄結果長度 {len(transcription.text)} 字元")  # Debug訊息
-    # 使用 GPT-4o 生成摘要
-    print(f"Debug: 開始生成GPT-4o摘要")  # Debug訊息
-    response = client.chat.completions.create(
-        model="gpt-4o",
-        messages=[
-            {
-                "role": "system",
-                "content": """你是一位優秀的摘要撰寫者。請根據提供的影片及其文字轉錄內容撰寫一份 Markdown 格式的摘要。""",
-            },
-            {
-                "role": "user",
-                "content": [
-                    "這些是從影片中獲取的幀圖片",
-                    *map(
-                        lambda x: {
-                            "type": "image_url",
-                            "image_url": {
-                                "url": f"data:image/jpg;base64,{x}",
-                                "detail": "low",
-                            },
-                        },
-                        base64Frames,
-                    ),
-                    {
-                        "type": "text",
-                        "text": f"這是影片的文字轉錄內容: {transcription.text}",
-                    },
-                ],
-            },
-        ],
-        temperature=0,
-    )
-    print(f"Debug: GPT-4o摘要生成完成")  # Debug訊息
-    return response.choices[0].message.content
-# 定義Gradio界面
-demo = gr.Interface(
-    fn=summarize_video,
-    inputs=[gr.File(label="上傳影片 (mp4)")],
-    outputs="markdown",
-    title="影片摘要生成器",
-    description="上傳影片並將會生成摘要。",
-)
-if __name__ == "__main__":
-    print("Debug: 啟動Gradio介面")  # Debug訊息
-    demo.launch()