from transformers import AutoTokenizer, AutoModelForSequenceClassification
import gradio as gr
import torch

# Load the model and tokenizer from Hugging Face Hub
model_name = "julian-schelb/PhilBerta-latin-intertextuality"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)


def predict_intertextuality(sentence1, sentence2):
    """
    Predict intertextuality using the specified model.
    """
    # Prepare input for the model
    inputs = tokenizer(
        sentence1,
        sentence2,
        return_tensors="pt",
        truncation=True,
        padding="max_length",
        max_length=512  # Adjust based on model's configuration
    ).to(device)

    # Perform inference
    model.eval()
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        probs = torch.softmax(logits, dim=1).squeeze().cpu().numpy()

    # Map probabilities to labels
    return {"Yes": probs[1], "No": probs[0]}


# Define the Gradio interface
inputs = [
    gr.Textbox(label="Latin Sentence 1"),
    gr.Textbox(label="Latin Sentence 2")
]
outputs = gr.Label(label="Intertextuality Probabilities", num_top_classes=2)

gradio_app = gr.Interface(
    fn=predict_intertextuality,
    inputs=inputs,
    outputs=outputs,
    title="Latin Intertextuality Checker",
    description="Enter two Latin sentences to get the probabilities for 'Yes' (intertextual) or 'No' (not intertextual).",
    # flagging="never"  # Disable the flag button
)

if __name__ == "__main__":
    gradio_app.launch()