#!/usr/bin/env python3
"""
🎥 Video Content Safety Analysis - MiniGPT4-Video + 巨量引擎规则集成版
基于MiniGPT4-Video的真实视频内容分析 + 巨量引擎299条禁投规则检测
"""
import os
import gradio as gr
import torch
import gc
import whisper
import argparse
import yaml
import random
import numpy as np
import torch.backends.cudnn as cudnn
from minigpt4.common.eval_utils import init_model
from minigpt4.conversation.conversation import CONV_VISION
import tempfile
import shutil
import cv2
import webvtt
import moviepy.editor as mp
from torchvision import transforms
from datetime import timedelta
from moviepy.editor import VideoFileClip

# 导入巨量引擎禁投规则引擎
from prohibited_rules import ProhibitedRulesEngine

# 设置中国镜像
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'

# ZeroGPU装饰器
try:
    import spaces
    GPU_AVAILABLE = True
    print("✅ ZeroGPU spaces 可用")
except ImportError:
    print("⚠️ ZeroGPU spaces 不可用，使用CPU模式")
    GPU_AVAILABLE = False
    # 创建一个空的装饰器
    class spaces:
        @staticmethod
        def GPU(duration=60):
            def decorator(func):
                return func
            return decorator

# 全局变量
model = None
vis_processor = None
whisper_model = None
args = None
seed = 42

# 初始化巨量引擎规则引擎
rules_engine = ProhibitedRulesEngine()
print("✅ 巨量引擎299条禁投规则引擎初始化完成")

# ======================== MiniGPT4-Video 核心函数 ========================

def format_timestamp(seconds):
    """格式化时间戳为VTT格式"""
    td = timedelta(seconds=seconds)
    total_seconds = int(td.total_seconds())
    milliseconds = int(td.microseconds / 1000)
    hours, remainder = divmod(total_seconds, 3600)
    minutes, seconds = divmod(remainder, 60)
    return f"{hours:02}:{minutes:02}:{seconds:02}.{milliseconds:03}"

def extract_video_info(video_path, max_images_length):
    """提取视频信息"""
    clip = VideoFileClip(video_path)
    total_num_frames = int(clip.duration * clip.fps)
    clip.close()
    sampling_interval = int(total_num_frames / max_images_length)
    if sampling_interval == 0:
        sampling_interval = 1
    return sampling_interval, clip.fps

def time_to_milliseconds(time_str):
    """将时间格式转换为毫秒"""
    h, m, s = map(float, time_str.split(':'))
    return int((h * 3600 + m * 60 + s) * 1000)

def extract_subtitles(subtitle_path):
    """提取字幕"""
    if not subtitle_path or not os.path.exists(subtitle_path):
        return []
    
    subtitles = []
    try:
        for caption in webvtt.read(subtitle_path):
            start_ms = time_to_milliseconds(caption.start)
            end_ms = time_to_milliseconds(caption.end)
            text = caption.text.strip().replace('\n', ' ')
            subtitles.append((start_ms, end_ms, text))
    except:
        return []
    return subtitles

def find_subtitle(subtitles, frame_count, fps):
    """查找对应帧的字幕"""
    if not subtitles:
        return None
        
    frame_time = (frame_count / fps) * 1000
    left, right = 0, len(subtitles) - 1
    
    while left <= right:
        mid = (left + right) // 2
        start, end, subtitle_text = subtitles[mid]
        if start <= frame_time <= end:
            return subtitle_text
        elif frame_time < start:
            right = mid - 1
        else:
            left = mid + 1
    
    return None

def match_frames_and_subtitles(video_path, subtitles, sampling_interval, max_sub_len, fps, max_frames):
    """匹配视频帧和字幕"""
    global vis_processor
    
    cap = cv2.VideoCapture(video_path)
    images = []
    frame_count = 0
    img_placeholder = ""
    subtitle_text_in_interval = ""
    history_subtitles = {}
    number_of_words = 0
    
    transform = transforms.Compose([
        transforms.ToPILImage(),
    ])
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
            
        if len(subtitles) > 0:
            frame_subtitle = find_subtitle(subtitles, frame_count, fps)
            if frame_subtitle and not history_subtitles.get(frame_subtitle, False):
                subtitle_text_in_interval += frame_subtitle + " "
                history_subtitles[frame_subtitle] = True
                
        if frame_count % sampling_interval == 0:
            frame = transform(frame[:,:,::-1])  # 转换为RGB
            frame = vis_processor(frame)
            images.append(frame)
            img_placeholder += '<Img><ImageHere>'
            
            if subtitle_text_in_interval != "" and number_of_words < max_sub_len:
                img_placeholder += f'<Cap>{subtitle_text_in_interval}'
                number_of_words += len(subtitle_text_in_interval.split(' '))
                subtitle_text_in_interval = ""
                
        frame_count += 1
        if len(images) >= max_frames:
            break
            
    cap.release()
    cv2.destroyAllWindows()
    
    if len(images) == 0:
        return None, None
        
    images = torch.stack(images)
    return images, img_placeholder

def extract_audio(video_path, audio_path):
    """提取音频"""
    video_clip = mp.VideoFileClip(video_path)
    audio_clip = video_clip.audio
    audio_clip.write_audiofile(audio_path, codec="libmp3lame", bitrate="320k", verbose=False, logger=None)
    video_clip.close()

def get_subtitles(video_path):
    """生成字幕"""
    global whisper_model
    
    if whisper_model is None:
        return None
        
    audio_dir = "workspace/inference_subtitles/mp3"
    subtitle_dir = "workspace/inference_subtitles"
    os.makedirs(subtitle_dir, exist_ok=True)
    os.makedirs(audio_dir, exist_ok=True)
    
    video_id = video_path.split('/')[-1].split('.')[0]
    audio_path = f"{audio_dir}/{video_id}.mp3"
    subtitle_path = f"{subtitle_dir}/{video_id}.vtt"
    
    # 如果字幕已存在，直接返回
    if os.path.exists(subtitle_path):
        return subtitle_path
        
    try:
        extract_audio(video_path, audio_path)
        result = whisper_model.transcribe(audio_path, language="en")
        
        # 创建VTT文件
        with open(subtitle_path, "w", encoding="utf-8") as vtt_file:
            vtt_file.write("WEBVTT\n\n")
            for segment in result['segments']:
                start = format_timestamp(segment['start'])
                end = format_timestamp(segment['end'])
                text = segment['text']
                vtt_file.write(f"{start} --> {end}\n{text}\n\n")
                
        return subtitle_path
    except Exception as e:
        print(f"字幕生成错误: {e}")
        return None

def prepare_input(video_path, subtitle_path, instruction):
    """准备输入"""
    global args
    
    # 根据模型设置参数
    if args and "mistral" in args.ckpt:
        max_frames = 90
        max_sub_len = 800
    else:
        max_frames = 45
        max_sub_len = 400
    
    sampling_interval, fps = extract_video_info(video_path, max_frames)
    subtitles = extract_subtitles(subtitle_path)
    frames_features, input_placeholder = match_frames_and_subtitles(
        video_path, subtitles, sampling_interval, max_sub_len, fps, max_frames
    )
    
    if input_placeholder:
        input_placeholder += "\n" + instruction
    else:
        input_placeholder = instruction
        
    return frames_features, input_placeholder

def model_generate(*model_args, **kwargs):
    """模型生成函数"""
    global model
    
    with model.maybe_autocast():
        output = model.llama_model.generate(*model_args, **kwargs)
    return output

def generate_prediction(video_path, instruction, gen_subtitles=True, stream=False):
    """生成预测结果"""
    global model, args, seed
    
    if gen_subtitles:
        subtitle_path = get_subtitles(video_path)
    else:
        subtitle_path = None
        
    prepared_images, prepared_instruction = prepare_input(video_path, subtitle_path, instruction)
    
    if prepared_images is None:
        return "视频无法打开，请检查视频路径"
        
    length = len(prepared_images)
    prepared_images = prepared_images.unsqueeze(0)
    
    conv = CONV_VISION.copy()
    conv.system = ""
    conv.append_message(conv.roles[0], prepared_instruction)
    conv.append_message(conv.roles[1], None)
    prompt = [conv.get_prompt()]
    
    # 设置随机种子
    setup_seeds(seed)
    
    try:
        answers = model.generate(
            prepared_images, 
            prompt, 
            max_new_tokens=args.max_new_tokens if args else 512, 
            do_sample=True, 
            lengths=[length],
            num_beams=1
        )
        return answers[0]
    except Exception as e:
        return f"生成预测时出错: {str(e)}"

# ======================== 巨量引擎规则检测函数 ========================

def format_violations_report(violations_result):
    """格式化违规检测报告"""
    if not violations_result["has_violations"]:
        return """
🛡️ **巨量引擎规则检测结果**: ✅ 无违规内容
- 已检测规则: 299条巨量引擎禁投规则
- 检测维度: 低危(P1) + 中危(P2) + 高危(P3)
- 检测结果: 内容符合平台规范
        """
    
    report = f"""
🚨 **巨量引擎规则检测结果**: ⚠️ 发现 {violations_result["total_violations"]} 项违规

📊 **违规统计**:
- 🔴 高危违规(P3): {violations_result["high_risk"]["count"]} 项
- 🟡 中危违规(P2): {violations_result["medium_risk"]["count"]} 项  
- 🟠 低危违规(P1): {violations_result["low_risk"]["count"]} 项

📋 **详细违规列表**:
    """
    
    # 按风险等级排序显示违规
    for violation in sorted(violations_result["all_violations"], 
                          key=lambda x: {"P3": 3, "P2": 2, "P1": 1}[x["risk_level"]], 
                          reverse=True):
        risk_icon = {"P3": "🚨", "P2": "⚠️", "P1": "💭"}[violation["risk_level"]]
        report += f"""
{risk_icon} **{violation["risk_level"]} - {violation["category"]}**
   规则: {violation["description"]}
   匹配词: "{violation["matched_keyword"]}"
   规则ID: {violation["rule_id"]}
        """
    
    return report

def get_overall_risk_level(violations_result):
    """获取综合风险等级"""
    if not violations_result["has_violations"]:
        return "✅ P3 (安全) - 内容健康，符合平台规范"
    
    if violations_result["high_risk"]["count"] > 0:
        return f"🚨 P0 (极高危) - 发现 {violations_result['high_risk']['count']} 项高危违规，禁止投放"
    elif violations_result["medium_risk"]["count"] > 2:
        return f"⚠️ P1 (高危) - 发现 {violations_result['medium_risk']['count']} 项中危违规，需严格审核"
    elif violations_result["medium_risk"]["count"] > 0:
        return f"⚠️ P1 (中危) - 发现 {violations_result['medium_risk']['count']} 项中危违规，需要审核"
    else:
        return f"⚡ P2 (低危) - 发现 {violations_result['low_risk']['count']} 项低危违规，建议关注"

# ======================== 应用主要函数 ========================

def setup_seeds(seed):
    """设置随机种子"""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    cudnn.benchmark = False
    cudnn.deterministic = True

def optimize_gpu_memory():
    """GPU内存优化"""
    print("🔍 开始GPU内存优化...")
    
    # 设置环境变量优化内存分配
    os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:256,garbage_collection_threshold:0.6'
    os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
    
    if torch.cuda.is_available():
        print(f"🔍 GPU: {torch.cuda.get_device_name(0)}")
        print(f"💾 总显存: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
        
        # 强制清理所有GPU缓存
        torch.cuda.empty_cache()
        torch.cuda.ipc_collect()
        gc.collect()
        
        # 设置内存增长策略
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True
        
        print(f"💾 清理后可用显存: {(torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_allocated(0)) / 1024**3:.1f} GB")

def get_arguments():
    """获取参数配置"""
    parser = argparse.ArgumentParser(description="MiniGPT4-Video参数")
    parser.add_argument("--cfg-path", help="配置文件路径", 
                       default="test_configs/minigpt4_optimized_config.yaml")
    parser.add_argument("--ckpt", type=str, 
                       default='checkpoints/video_llama_checkpoint_last.pth', 
                       help="模型检查点路径")
    parser.add_argument("--max_new_tokens", type=int, default=512, 
                       help="最大生成token数")
    parser.add_argument("--lora_r", type=int, default=96, help="LoRA rank")
    parser.add_argument("--lora_alpha", type=int, default=24, help="LoRA alpha")
    parser.add_argument("--options", nargs="+", help="覆盖配置选项")
    return parser.parse_args()

def load_minigpt4_model():
    """加载MiniGPT4-Video模型"""
    global model, vis_processor, whisper_model, args, seed
    
    if model is not None:
        return model, vis_processor, whisper_model
    
    try:
        print("🔄 正在加载MiniGPT4-Video模型...")
        
        # 获取参数
        args = get_arguments()
        
        # 加载配置
        config_path = args.cfg_path
        if not os.path.exists(config_path):
            config_path = "test_configs/llama2_test_config.yaml"  # 回退到默认配置
            
        with open(config_path) as file:
            config = yaml.load(file, Loader=yaml.FullLoader)
        
        seed = config['run']['seed']
        setup_seeds(seed)
        
        # GPU内存优化
        optimize_gpu_memory()
        
        print("🚀 开始初始化MiniGPT4-Video模型...")
        model, vis_processor, whisper_gpu_id, minigpt4_gpu_id, answer_module_gpu_id = init_model(args)
        
        # 清理缓存
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
            print(f"💾 模型加载后显存使用: {torch.cuda.memory_allocated(0) / 1024**3:.1f} GB")
        
        print("🚀 开始初始化Whisper模型...")
        whisper_model = whisper.load_model("base").to(f"cuda:{whisper_gpu_id}" if torch.cuda.is_available() else "cpu")
        
        if torch.cuda.is_available():
            print(f"💾 全部加载后显存使用: {torch.cuda.memory_allocated(0) / 1024**3:.1f} GB")
        
        print("✅ 所有模型加载完成!")
        return model, vis_processor, whisper_model
        
    except Exception as e:
        print(f"❌ 模型加载失败: {e}")
        print("🔄 回退到模拟模式...")
        return None, None, None

@spaces.GPU(duration=120)
def analyze_video_with_minigpt4(video_file, instruction):
    """使用MiniGPT4-Video分析视频内容并进行巨量引擎规则检测"""
    if video_file is None:
        return "❌ 请上传视频文件", "无法评估"
    
    try:
        # 加载模型
        model_loaded, vis_proc, whisper_loaded = load_minigpt4_model()
        
        if model_loaded is None:
            # 模拟模式
            return f"""
🎬 **视频内容分析结果 (模拟模式)**

📋 **基本信息**:
- 视频文件: {video_file}
- 分析指令: {instruction}

⚠️ **注意**: 当前运行在模拟模式，真实模型加载失败
请检查模型文件和配置是否正确

🛡️ **巨量引擎规则检测**: 仅在真实模式下可用
            """, "⚠️ 模拟模式"
        
        print(f"🔄 开始分析视频: {video_file}")
        print(f"📝 分析指令: {instruction}")
        
        # 复制视频到临时路径（如果需要）
        temp_video_path = video_file
        if not os.path.exists(video_file):
            # 如果是Gradio的临时文件，复制到工作目录
            temp_dir = "workspace/tmp"
            os.makedirs(temp_dir, exist_ok=True)
            temp_video_path = os.path.join(temp_dir, "analysis_video.mp4")
            shutil.copy2(video_file, temp_video_path)
        
        # 使用MiniGPT4-Video进行真实分析
        if not instruction or instruction.strip() == "":
            instruction = "请详细分析这个视频的内容，包括场景、人物、动作、对话等，并描述所有可见和可听的元素。"
        
        # 调用MiniGPT4-Video的生成函数
        prediction = generate_prediction(
            video_path=temp_video_path,
            instruction=instruction,
            gen_subtitles=True,  # 生成字幕
            stream=False
        )
        
        # 🚨 巨量引擎规则检测 🚨
        print("🔍 开始巨量引擎299条规则检测...")
        violations_result = rules_engine.check_all_content(prediction, instruction)
        
        # 格式化完整分析报告
        enhanced_result = f"""
🎬 **MiniGPT4-Video 视频内容分析 + 巨量引擎规则检测报告**

📋 **基本信息**:
- 视频文件: {os.path.basename(video_file)}
- 分析设备: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU模式'}
- 分析指令: {instruction}

🔍 **视频内容描述**:
{prediction}

{format_violations_report(violations_result)}

📊 **技术信息**:
- 内容理解: MiniGPT4-Video + Whisper  
- 规则引擎: 巨量引擎299条禁投规则
- 检测等级: P1(低危) + P2(中危) + P3(高危)
- 分析模式: 多模态理解 (视觉+语音+文本)

💡 **说明**: 
基于MiniGPT4-Video的深度内容理解，结合巨量引擎完整禁投规则库进行专业违规检测。
        """
        
        # 获取综合风险等级
        safety_score = get_overall_risk_level(violations_result)
        
        return enhanced_result, safety_score
        
    except Exception as e:
        error_msg = f"""
❌ **分析过程中出错**

错误信息: {str(e)}

🔄 **可能的解决方案**:
1. 检查视频文件格式 (建议MP4)
2. 确认模型文件是否正确加载
3. 检查GPU内存是否充足
4. 验证配置文件路径

💡 **提示**: 如果问题持续，请检查模型和依赖项安装
        """
        return error_msg, "⚠️ 错误"

def create_app():
    """创建Gradio应用"""
    
    interface = gr.Interface(
        fn=analyze_video_with_minigpt4,
        inputs=[
            gr.Video(label="上传视频文件"),
            gr.Textbox(
                label="分析指令", 
                value="请详细分析这个视频的内容，包括场景、人物、动作、对话等，并描述所有可见和可听的元素。",
                placeholder="输入您希望AI如何分析这个视频...",
                lines=3
            )
        ],
        outputs=[
            gr.Textbox(label="MiniGPT4-Video 内容分析 + 巨量引擎规则检测", lines=20),
            gr.Textbox(label="巨量引擎风险评级")
        ],
        title="🎥 智能视频内容安全分析 - MiniGPT4-Video + 巨量引擎",
        description="""
        ## 🎬 基于MiniGPT4-Video + 巨量引擎299条禁投规则的专业视频安全检测系统
        
        ⚡ **ZeroGPU加速** | 🎬 **MiniGPT4-Video** | 🎙️ **Whisper语音** | 🛡️ **巨量引擎299条规则**
        
        **🔥 核心功能:**
        - 🎞️ **深度视频理解**: MiniGPT4-Video多模态分析
        - 🎙️ **语音转文字**: Whisper自动生成字幕
        - 🛡️ **专业违规检测**: 巨量引擎完整禁投规则库
        - 📊 **智能风险评级**: P0-P3四级风险等级
        
        **🎯 检测维度:**
        - **高危(P3)**: 违法出版物、烟草、医疗等严重违规 
        - **中危(P2)**: 赌博周边、房地产、金融等中等风险
        - **低危(P1)**: 化妆品、汽车、游戏等轻微风险
        
        **📋 规则覆盖:**
        涵盖化妆品类、汽车类、游戏类、赌博类、房地产类、工具软件类、教育培训类、
        金融类、医疗类、烟草类等全部299条巨量引擎禁投规则
        """,
        examples=[
            [None, "分析这个视频是否包含禁投内容"],
            [None, "检测视频中是否有巨量引擎禁止的产品或服务"],
            [None, "评估视频内容的投放风险等级"],
            [None, "详细描述视频内容并进行合规检查"]
        ],
        cache_examples=False
    )
    
    return interface

def main():
    """主函数"""
    print("🚀 启动MiniGPT4-Video + 巨量引擎视频安全分析应用")
    print("🎬 MiniGPT4-Video: 深度视频内容理解")
    print("🛡️ 巨量引擎: 299条禁投规则检测")
    
    if torch.cuda.is_available():
        print(f"✅ GPU可用: {torch.cuda.get_device_name(0)}")
    else:
        print("⚠️ 使用CPU模式")
    
    # 创建必要的目录
    os.makedirs("workspace/tmp", exist_ok=True)
    os.makedirs("workspace/inference_subtitles", exist_ok=True)
    os.makedirs("workspace/inference_subtitles/mp3", exist_ok=True)
    
    print("📁 工作目录准备完成")
    print("🚀 正在启动Gradio应用...")
    
    app = create_app()
    
    # 启动应用
    app.launch(
        share=True,
        server_name="0.0.0.0",
        server_port=7860,
        show_error=True
    )

if __name__ == "__main__":
    main()