import gradio as gr
import io
import numpy as np
import torch
from decord import cpu, VideoReader, bridge
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import BitsAndBytesConfig


MODEL_PATH = "THUDM/cogvlm2-llama3-caption"
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
TORCH_TYPE = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8 else torch.float16


DELAY_REASONS = {
    "Step 1": ["Delay in Bead Insertion","Lack of raw material"],
    "Step 2": ["Inner Liner Adjustment by Technician","Person rebuilding defective Tire Sections"],
    "Step 3": ["Manual Adjustment in Ply1 apply","Technician repairing defective Tire Sections"],
    "Step 4": ["Delay in Bead set","Lack of raw material"],
    "Step 5": ["Delay in Turnup","Lack of raw material"],
    "Step 6": ["Person Repairing sidewall","Person rebuilding defective Tire Sections"],
    "Step 7": ["Delay in sidewall stitching","Lack of raw material"],
    "Step 8": ["No person available to load Carcass","No person available to collect tire"]
}

def load_video(video_data, strategy='chat'):
    """Loads and processes video data into a format suitable for model input."""
    bridge.set_bridge('torch')
    num_frames = 24
    
    if isinstance(video_data, str): 
        decord_vr = VideoReader(video_data, ctx=cpu(0))
    else:  
        decord_vr = VideoReader(io.BytesIO(video_data), ctx=cpu(0))
    
    frame_id_list = []
    total_frames = len(decord_vr)
    timestamps = [i[0] for i in decord_vr.get_frame_timestamp(np.arange(total_frames))]
    max_second = round(max(timestamps)) + 1
    
    for second in range(max_second):
        closest_num = min(timestamps, key=lambda x: abs(x - second))
        index = timestamps.index(closest_num)
        frame_id_list.append(index)
        if len(frame_id_list) >= num_frames:
            break

    video_data = decord_vr.get_batch(frame_id_list)
    video_data = video_data.permute(3, 0, 1, 2)
    return video_data

def load_model():
    """Loads the pre-trained model and tokenizer with quantization configurations."""
    quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=TORCH_TYPE,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4"
    )
    
    tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_PATH,
        torch_dtype=TORCH_TYPE,
        trust_remote_code=True,
        quantization_config=quantization_config,
        device_map="auto"
    ).eval()
    
    return model, tokenizer

def predict(prompt, video_data, temperature, model, tokenizer):
    """Generates predictions based on the video and textual prompt."""
    video = load_video(video_data, strategy='chat')
    
    inputs = model.build_conversation_input_ids(
        tokenizer=tokenizer,
        query=prompt,
        images=[video],
        history=[],
        template_version='chat'
    )
    
    inputs = {
        'input_ids': inputs['input_ids'].unsqueeze(0).to(DEVICE),
        'token_type_ids': inputs['token_type_ids'].unsqueeze(0).to(DEVICE),
        'attention_mask': inputs['attention_mask'].unsqueeze(0).to(DEVICE),
        'images': [[inputs['images'][0].to(DEVICE).to(TORCH_TYPE)]],
    }
    
    gen_kwargs = {
        "max_new_tokens": 2048,
        "pad_token_id": 128002,
        "top_k": 1,
        "do_sample": False,
        "top_p": 0.1,
        "temperature": temperature,
    }
    
    with torch.no_grad():
        outputs = model.generate(**inputs, **gen_kwargs)
        outputs = outputs[:, inputs['input_ids'].shape[1]:]
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    return response

def get_analysis_prompt(step_number, possible_reasons):
    """
    Constructs a robust prompt for analyzing delay reasons based on the selected manufacturing step.
    Args:
        step_number (int): The manufacturing step being analyzed.
        possible_reasons (list): A list of possible delay reasons for this step.
    Returns:
        str: A highly detailed and robust analysis prompt tailored to the given step and reasons.
    """
    return f"""
    You are a highly advanced AI system specializing in the analysis of tire manufacturing processes to identify and diagnose production delays. You are tasked with analyzing video footage from Step {step_number}, where a delay has been detected. Your goal is to determine the most accurate cause of the delay based on the visual evidence.

    ### Task Context:
    - Manufacturing Step: {step_number}
    - Delay Detected: Yes
    - Possible Reasons for Delay: {', '.join(possible_reasons)}

    ### Required Analysis:
    Carefully examine the video footage frame by frame, focusing on the following aspects:

    #### Technician Presence and Role:
    - **Technician Availability:**
        - Determine if a technician is visibly present during the step.
        - If no technician is present, classify absence as a possible delay cause.
    - **Technician Actions:**
        - If a technician is present, observe their actions:
            - Are they collecting or loading a carcass? Ensure the task is executed efficiently.
            - Are they repairing the inner liner or sidewall? This indicates an issue with material application or alignment.
            - Are they manually adjusting components or reworking parts? This suggests equipment malfunction or material misalignment.

    #### Material and Process Observations:
    - Identify signs of material defects such as:
        - **Misaligned layers**: Visualize if any tire layer is improperly positioned.
        - **Damaged materials**: Check for tears, wrinkles, or missing parts.
        - **Incomplete processes**: Confirm whether all steps were executed correctly (e.g., liner application, bead insertion).
    - Look for excessive manual handling, which might indicate inadequate machine performance.

    #### Equipment and Machine Performance:
    - Evaluate machine operation for:
        - Pauses, stutters, or complete stoppages.
        - Improper alignment during automatic processes.
        - Speed inconsistencies compared to the standard time.

    #### Task-Specific Indicators:
    - **Carcass Handling**: Ensure technicians are promptly collecting and loading carcasses when required.
    - **Inner Liner Repair**: Note if technicians are involved in patching or reapplying the inner liner.
    - **Sidewall Repair**: Identify if technicians are working to fix damaged or misaligned sidewalls.

    ### Output Requirements:
    Your analysis must be detailed and structured in the following format:
    1. **Selected Reason**: [State the most likely reason for the delay from the provided options.]
    2. **Visual Evidence**: [Describe specific frames, activities, or anomalies that support your conclusion.]
    3. **Reasoning**: [Provide a thorough explanation linking visual observations to the selected reason.]
    4. **Alternative Analysis**: [Explain why other reasons are less likely, citing specific evidence or its absence.]
    5. **Recommendations**: [Suggest corrective actions to address the identified delay cause, such as equipment maintenance, technician training, or material quality checks.]

    ### Key Considerations:
    - **Observe Frame-by-Frame**: Carefully analyze each frame to capture subtleties, such as technician actions, material defects, or machine behavior.
    - **Focus on Visual Evidence**: Base your analysis entirely on observable details from the footage. Avoid unverified assumptions.
    - **Evaluate Standard Times**: Compare observed task durations with the standard time for this step. Identify where delays occurred and why.

    ### Note:
    - Prioritize identifying technician involvement in carcass handling, inner liner, or sidewall repair, as these are critical delay causes.
    - Highlight any deviation from expected machine or process performance.
    """


# Load model globally
model, tokenizer = load_model()

def inference(video, step_number):
    """Analyzes video to predict the most likely cause of delay in the selected manufacturing step."""
    try:
        if not video:
            return "Please upload a video first."
        
        possible_reasons = DELAY_REASONS[step_number]
        prompt = get_analysis_prompt(step_number, possible_reasons)
        temperature = 0.8
        response = predict(prompt, video, temperature, model, tokenizer)
        
        return response
    except Exception as e:
        return f"An error occurred during analysis: {str(e)}"

def create_interface():
    """Creates the Gradio interface for the Manufacturing Delay Analysis System with examples."""
    with gr.Blocks() as demo:
        gr.Markdown("""
        # Manufacturing Delay Analysis System
        Upload a video of the manufacturing step and select the step number. 
        The system will analyze the video and determine the most likely cause of delay.
        """)
        
        with gr.Row():
            with gr.Column():
                video = gr.Video(label="Upload Manufacturing Video", sources=["upload"])
                step_number = gr.Dropdown(
                    choices=list(DELAY_REASONS.keys()),
                    label="Manufacturing Step"
                )
                analyze_btn = gr.Button("Analyze Delay", variant="primary")
            
            with gr.Column():
                output = gr.Textbox(label="Analysis Result", lines=10)
        
        # Add examples
        examples = [
            ["7838_step2_2_eval.mp4", "Step 2"],
            ["7838_step6_2_eval.mp4", "Step 6"],
            ["7838_step8_1_eval.mp4", "Step 8"],
            ["7993_step6_3_eval.mp4", "Step 6"],
            ["7993_step8_3_eval.mp4", "Step 8"]
            
        ]
        
        gr.Examples(
            examples=examples,
            inputs=[video, step_number],
            cache_examples=False
        )
        
        analyze_btn.click(
            fn=inference,
            inputs=[video, step_number],
            outputs=[output]
        )
    
    return demo

if __name__ == "__main__":
    demo = create_interface()
    demo.queue().launch(share=True)