Spaces:

LampOfSocrates
/

hf-nlp-cw-group27

Sleeping

File size: 3,869 Bytes

import streamlit as st
import wandb
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForTokenClassification

x = st.slider('Select a value')
st.write(x, 'squared is', x * x)

@st.cache_resource()
def load_trained_model():
    
    tokenizer = AutoTokenizer.from_pretrained("LampOfSocrates/bert-cased-plodcw-sourav")
    model = AutoModelForTokenClassification.from_pretrained("LampOfSocrates/bert-cased-plodcw-sourav")
    # Mapping labels
    id2label = model.config.id2label
    # Print the label mapping
    print(f"Can recognise the following labels {id2label}")

    # Load the NER model and tokenizer from Hugging Face
    #ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")
    ner_pipeline = pipeline("ner", model=model, tokenizer = tokenizer)
    return ner_pipeline

def load_data():
    from datasets import load_dataset
    dat_CW = load_dataset("surrey-nlp/PLOD-CW")


def render_entities(tokens, entities):
    """
    Renders a page with a 2-column table showing the entity corresponding to each token.
    """
    # Page configuration
    st.set_page_config(page_title="NER Token Entities", layout="centered")

    # Custom CSS for chilled and cool theme
    st.markdown("""
        <style>
        body {
            font-family: 'Arial', sans-serif;
            background-color: #f0f0f5;
            color: #333333;
        }
        table {
            width: 100%;
            border-collapse: collapse;
        }
        th, td {
            padding: 12px;
            text-align: left;
            border-bottom: 1px solid #dddddd;
        }
        th {
            background-color: #4CAF50;
            color: white;
        }
        tr:hover {
            background-color: #f5f5f5;
        }
        </style>
        """, unsafe_allow_html=True)

    # Title and description
    st.title("Token Entities Table")
    st.write("This table shows the entity corresponding to each token in a cool and chilled theme.")

    # Create the table
    table_data = {"Token": tokens, "Entity": entities}
    st.table(table_data)

def prep_page():
    model = load_trained_model()

    # Streamlit app
    st.title("Named Entity Recognition with BERT on PLOD-CW")
    st.write("Enter a sentence to see the named entities recognized by the model.")

    # Text input
    text = st.text_area("Enter your sentence here:")

    # Perform NER and display results
    if text:
        st.write("Entities recognized:")
        entities = model(text)
    
        # Create a dictionary to map entity labels to colors
        label_colors = {
            'B-LF': 'lightblue',
            'B-O': 'lightgreen',
            'B-AC': 'lightcoral',
            'I-LF': 'lightyellow'
        }
    
        # Prepare the HTML output with styled entities
        def get_entity_html(text, entities):
            html = ""
            last_idx = 0
            for entity in entities:
                start = entity['start']
                end = entity['end']
                label = entity['entity']
                entity_text = text[start:end]
                color = label_colors.get(label, 'lightgray')
            
                # Append the text before the entity
                html += text[last_idx:start]
                # Append the entity with styling
                html += f'<mark style="background-color: {color}; border-radius: 3px;">{entity_text}</mark>'
                last_idx = end
        
            # Append any remaining text after the last entity
            html += text[last_idx:]
            return html
    
        # Generate and display the styled HTML
        styled_text = get_entity_html(text, entities)
        
        st.markdown(styled_text, unsafe_allow_html=True)

        render_entities(text, entities)

if __name__ == '__main__':
    
    prep_page()