import gradio as gr
from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer

# Define the model name
MODEL_NAME = "impresso-project/ner-stacked-bert-multilingual"

# Load the tokenizer and model using the pipeline
ner_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

ner_pipeline = pipeline("generic-ner", model=MODEL_NAME, 
                        tokenizer=ner_tokenizer, 
                        trust_remote_code=True,
                        device='cpu')

# Helper function to print entities nicely
def print_nicely(entities):
    entity_details = []
    for entity in entities:
        entity_info = f"Entity: {entity['entity']} | Confidence: {entity['score']:.2f}% | Text: {entity['word'].strip()} | Start: {entity['start']} | End: {entity['end']}"
        entity_details.append(entity_info)
    return "\n".join(entity_details)

# Function to process the sentence and extract entities
def extract_entities(sentence):
    results = ner_pipeline(sentence)
    entity_results = []
    
    # Extract and format the entities
    for key in results.keys():
        entity_results.append(print_nicely(results[key]))
    
    return "\n".join(entity_results)

# Create Gradio interface
def ner_app_interface():
    input_sentence = gr.Textbox(lines=5, label="Input Sentence", placeholder="Enter a sentence for NER...")
    output_entities = gr.Textbox(label="Extracted Entities")

    # Interface definition
    interface = gr.Interface(
        fn=extract_entities,
        inputs=input_sentence,
        outputs=output_entities,
        title="Named Entity Recognition",
        description="Enter a sentence to extract named entities using the NER model from the Impresso project."
    )
    
    interface.launch()

# Run the app
if __name__ == "__main__":
    ner_app_interface()