# app.py（主入口简化版）
import gradio as gr
from src.model_loader import load_model
from src.video_utils import process_video_for_internvl3

# === 初始化模型 ===
tokenizer, model = load_model()

# === 推理接口 ===
def evaluate_ar(video):
    pixel_values, num_patches_list, prompt = process_video_for_internvl3(video)
    generation_config = dict(max_new_tokens=512)
    output, _ = model.chat(
        tokenizer,
        pixel_values,
        prompt,
        generation_config=generation_config,
        num_patches_list=num_patches_list,
        history=None,
        return_history=True
    )
    return output

# === Gradio 接口 ===
gr.Interface(
    fn=evaluate_ar,
    inputs=gr.Video(label="Upload your AR video"),
    outputs="text",
    title="InternVL3 AR Evaluation (Single-turn)",
    description="Upload a short AR video clip. The model will sample frames and assess occlusion/rendering quality."
).launch()