import torch
import gradio as gr
from transformers import AutoModel, pipeline, AutoTokenizer

path = "radna/Triton-InternVL2-2B"
model = (
    AutoModel.from_pretrained(
        path, torch_dtype=torch.bfloat16, low_cpu_mem_usage=True, trust_remote_code=True
    )
    .eval()
    .cuda()
)

tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True)
inference = pipeline(task="visual-question-answering", model=model, tokenizer=tokenizer)


def predict(input_img, questions):
    try:
        predictions = inference(question=questions, image=input_img)
        return str(predictions)
    except Exception as e:
        # 捕获异常，并将错误信息转换为字符串
        error_message = str(e)
        # 抛出gradio.Error来展示错误弹窗
        raise gr.Error(error_message, duration=25)


gradio_app = gr.Interface(
    predict,
    inputs=[
        gr.Image(label="Select A Image", sources=["upload", "webcam"], type="pil"),
        "text",
    ],
    outputs="text",
    title="Plz ask my anything",
)

if __name__ == "__main__":
    gradio_app.launch(show_error=True, debug=True)