Spaces:

weiyi01191
/

DeepOperateAI-Video

Running

App Files Files Community

weiyi01191 commited on Jun 11

Commit

e68ca85

verified ·

1 Parent(s): 1f33751

Update app.py

Browse files

Files changed (1) hide show

app.py +107 -16

app.py CHANGED Viewed

@@ -200,7 +200,17 @@ def get_subtitles(video_path):
     try:
         extract_audio(video_path, audio_path)
-        result = whisper_model.transcribe(audio_path, language="en")
         # 创建VTT文件
         with open(subtitle_path, "w", encoding="utf-8") as vtt_file:
@@ -285,11 +295,11 @@ def generate_prediction(video_path, instruction, gen_subtitles=True, stream=Fals
         answers = model.generate(
             prepared_images,
             prompt,
-            max_new_tokens=min(args.max_new_tokens if args else 256, 256),  # 限制最大token数
             do_sample=True,
             lengths=[length],
             num_beams=1,  # 保持beam=1减少计算
-            temperature=0.8,  # 添加温度参数
             top_p=0.9,     # 添加top_p参数
             repetition_penalty=1.1  # 避免重复
         )
@@ -390,12 +400,31 @@ def optimize_gpu_memory():
     """GPU内存优化"""
     print("🔍 开始GPU内存优化...")
-    # 设置环境变量优化内存分配
-    os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:256,garbage_collection_threshold:0.6'
-    os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
     if torch.cuda.is_available():
-        print(f"🔍 GPU: {torch.cuda.get_device_name(0)}")
         print(f"💾 总显存: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
         # 强制清理所有GPU缓存
@@ -403,10 +432,6 @@ def optimize_gpu_memory():
         torch.cuda.ipc_collect()
         gc.collect()
-        # 设置内存增长策略
-        torch.backends.cudnn.benchmark = False
-        torch.backends.cudnn.deterministic = True
         print(f"💾 清理后可用显存: {(torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_allocated(0)) / 1024**3:.1f} GB")
 def get_arguments():
@@ -460,7 +485,9 @@ def load_minigpt4_model():
             print(f"💾 模型加载后显存使用: {torch.cuda.memory_allocated(0) / 1024**3:.1f} GB")
         print("🚀 开始初始化Whisper模型...")
-        whisper_model = whisper.load_model("base").to(f"cuda:{whisper_gpu_id}" if torch.cuda.is_available() else "cpu")
         if torch.cuda.is_available():
             print(f"💾 全部加载后显存使用: {torch.cuda.memory_allocated(0) / 1024**3:.1f} GB")
@@ -512,12 +539,16 @@ def analyze_video_with_minigpt4(video_file, instruction):
         # 使用MiniGPT4-Video进行真实分析
         if not instruction or instruction.strip() == "":
-            instruction = "请详细分析这个视频的内容，包括场景、人物、动作、对话等，并描述所有可见和可听的元素。"
         # 调用MiniGPT4-Video的生成函数
         prediction = generate_prediction(
             video_path=temp_video_path,
-            instruction=instruction,
             gen_subtitles=True,  # 生成字幕
             stream=False
         )
@@ -580,7 +611,7 @@ def create_app():
             gr.Video(label="上传视频文件"),
             gr.Textbox(
                 label="分析指令",
-                value="请详细分析这个视频的内容，包括场景、人物、动作、对话等，并描述所有可见和可听的元素。",
                 placeholder="输入您希望AI如何分析这个视频...",
                 lines=3
             )
@@ -650,5 +681,65 @@ def main():
         show_error=True
     )
 if __name__ == "__main__":
     main()

     try:
         extract_audio(video_path, audio_path)
+        # 🔧 优化中文语音识别
+        result = whisper_model.transcribe(
+            audio_path,
+            language="zh",  # 明确指定中文
+            task="transcribe",  # 明确指定转录任务
+            temperature=0.0,  # 降低随机性
+            best_of=5,  # 使用最佳结果
+            beam_size=5,  # 增加beam搜索
+            patience=2.0,  # 增加耐心参数
+            initial_prompt="以下是一段中文视频的语音内容："  # 中文提示
+        )
         # 创建VTT文件
         with open(subtitle_path, "w", encoding="utf-8") as vtt_file:
         answers = model.generate(
             prepared_images,
             prompt,
+            max_new_tokens=512,  # 增加token数以获得更详细的分析
             do_sample=True,
             lengths=[length],
             num_beams=1,  # 保持beam=1减少计算
+            temperature=0.7,  # 稍微降低温度获得更稳定输出
             top_p=0.9,     # 添加top_p参数
             repetition_penalty=1.1  # 避免重复
         )
     """GPU内存优化"""
     print("🔍 开始GPU内存优化...")
     if torch.cuda.is_available():
+        gpu_name = torch.cuda.get_device_name(0)
+        print(f"🔍 GPU: {gpu_name}")
+        # 🔧 H200特定优化
+        if "H200" in gpu_name:
+            print("🚀 检测到H200显卡，应用特定优化...")
+            # H200优化设置
+            os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128,garbage_collection_threshold:0.8,expandable_segments:True'
+            os.environ['CUDA_LAUNCH_BLOCKING'] = '0'  # H200上设置为0
+            os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'  # H200 cuBLAS优化
+            os.environ['NCCL_AVOID_RECORD_STREAMS'] = '1'  # 避免H200内存问题
+            # 设置混合精度优化
+            torch.backends.cudnn.allow_tf32 = True  # 启用TF32提升H200性能
+            torch.backends.cuda.matmul.allow_tf32 = True
+            torch.backends.cudnn.benchmark = True  # H200上启用benchmark
+        else:
+            # 标准设置（A100等）
+            os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:256,garbage_collection_threshold:0.6'
+            os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
+            torch.backends.cudnn.benchmark = False
+            torch.backends.cudnn.deterministic = True
         print(f"💾 总显存: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
         # 强制清理所有GPU缓存
         torch.cuda.ipc_collect()
         gc.collect()
         print(f"💾 清理后可用显存: {(torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_allocated(0)) / 1024**3:.1f} GB")
 def get_arguments():
             print(f"💾 模型加载后显存使用: {torch.cuda.memory_allocated(0) / 1024**3:.1f} GB")
         print("🚀 开始初始化Whisper模型...")
+        # 🔧 使用更强的Whisper模型以提升中文识别
+        whisper_model = whisper.load_model("medium").to(f"cuda:{whisper_gpu_id}" if torch.cuda.is_available() else "cpu")
+        print("✅ Whisper模型加载完成 (medium版本，优化中文识别)")
         if torch.cuda.is_available():
             print(f"💾 全部加载后显存使用: {torch.cuda.memory_allocated(0) / 1024**3:.1f} GB")
         # 使用MiniGPT4-Video进行真实分析
         if not instruction or instruction.strip() == "":
+            instruction = "请详细分析这个视频的内容，包括场景、人物、动作、对话等。请用中文输出，并详细记录视频中谁说了什么话。"
+        # 🧠 使用智能规则感知指令
+        intelligent_instruction = create_intelligent_instruction(instruction)
+        print(f"🧠 使用智能规则感知指令进行分析...")
         # 调用MiniGPT4-Video的生成函数
         prediction = generate_prediction(
             video_path=temp_video_path,
+            instruction=intelligent_instruction,  # 使用智能指令
             gen_subtitles=True,  # 生成字幕
             stream=False
         )
             gr.Video(label="上传视频文件"),
             gr.Textbox(
                 label="分析指令",
+                value="请详细分析这个视频的内容，包括场景、人物、动作、对话等。请用中文输出，并详细记录视频中谁说了什么话。",
                 placeholder="输入您希望AI如何分析这个视频...",
                 lines=3
             )
         show_error=True
     )
+def create_intelligent_instruction(original_instruction):
+    """创建具备规则理解能力的智能分析指令"""
+    # 核心禁投规则摘要 - 让AI知道需要检测什么
+    rules_summary = """
+请特别注意以下巨量引擎禁投内容（如发现请在描述中明确指出）：
+🚨 **高危违规内容 (P3)**:
+- 医疗器械、药品、保健品、医美服务
+- 烟草制品、电子烟相关产品
+- 虚拟货币、区块链、NFT、数字藏品
+- 违法出版物、政治敏感内容
+- 贷款、信贷、金融投资、股票
+- 赌博、博彩、棋牌游戏
+⚠️ **中危违规内容 (P2)**:
+- 房地产买卖、租赁、中介服务
+- 工具软件、刷机、破解软件
+- 教育培训、学历提升、考试代办
+- 翡翠、玉石、文玩、珠宝盲盒
+- 黄金回收、贵金属投资
+💭 **低危违规内容 (P1)**:
+- 化妆品中的特殊功效产品
+- 汽车修复、代办服务
+- 游戏账号交易、代练
+- 特殊食品、减肥产品
+"""
+    intelligent_instruction = f"""
+你是专业的巨量引擎广告内容审核专家。请用中文详细分析这个视频，包括：
+📹 **视频内容详细描述**:
+- 场景环境：描述视频拍摄场所、背景环境
+- 人物信息：谁在视频中出现，年龄、性别、穿着特征
+- 关键动作：详细描述人物的具体动作和行为
+- 产品展示：如有产品展示，请详细描述产品外观、材质、用途
+- 文字信息：视频中出现的任何文字、标识、品牌名称
+🎙️ **语音对话内容**:
+- 详细记录视频中的所有对话内容
+- 明确标注"谁说了什么话"
+- 记录任何产品介绍、价格信息、功效宣传
+- 注意推销话术、营销用语
+🔍 **潜在违规风险分析**:
+{rules_summary}
+🎯 **分析要求**:
+1. 用中文输出所有内容
+2. 对于任何可能涉及上述违规内容的元素，请明确指出
+3. 重点关注翡翠、玉石、珠宝等文玩制品
+4. 注意医疗、金融、房产、教育等敏感行业
+5. 记录所有营销宣传语句
+原始指令：{original_instruction}
+"""
+    return intelligent_instruction
 if __name__ == "__main__":
     main()