# app.py(主入口简化版) import gradio as gr from src.model_loader import load_model from src.video_utils import process_video_for_internvl3 # === 初始化模型 === tokenizer, model = load_model() # === 推理接口 === def evaluate_ar(video): pixel_values, num_patches_list, prompt = process_video_for_internvl3(video) generation_config = dict(max_new_tokens=512) output, _ = model.chat( tokenizer, pixel_values, prompt, generation_config=generation_config, num_patches_list=num_patches_list, history=None, return_history=True ) return output # === Gradio 接口 === gr.Interface( fn=evaluate_ar, inputs=gr.Video(label="Upload your AR video"), outputs="text", title="InternVL3 AR Evaluation (Single-turn)", description="Upload a short AR video clip. The model will sample frames and assess occlusion/rendering quality." ).launch()