File size: 3,869 Bytes
14fa848
30d04c6
 
 
14fa848
 
30d04c6
 
 
 
 
b5dd5bc
 
30d04c6
b5dd5bc
30d04c6
b5dd5bc
30d04c6
 
 
 
 
 
100e8bf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30d04c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5dd5bc
 
 
 
30d04c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100e8bf
30d04c6
 
b2cc9b7
30d04c6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import streamlit as st
import wandb
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForTokenClassification

x = st.slider('Select a value')
st.write(x, 'squared is', x * x)

@st.cache_resource()
def load_trained_model():
    
    tokenizer = AutoTokenizer.from_pretrained("LampOfSocrates/bert-cased-plodcw-sourav")
    model = AutoModelForTokenClassification.from_pretrained("LampOfSocrates/bert-cased-plodcw-sourav")
    # Mapping labels
    id2label = model.config.id2label
    # Print the label mapping
    print(f"Can recognise the following labels {id2label}")

    # Load the NER model and tokenizer from Hugging Face
    #ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")
    ner_pipeline = pipeline("ner", model=model, tokenizer = tokenizer)
    return ner_pipeline

def load_data():
    from datasets import load_dataset
    dat_CW = load_dataset("surrey-nlp/PLOD-CW")


def render_entities(tokens, entities):
    """
    Renders a page with a 2-column table showing the entity corresponding to each token.
    """
    # Page configuration
    st.set_page_config(page_title="NER Token Entities", layout="centered")

    # Custom CSS for chilled and cool theme
    st.markdown("""
        <style>
        body {
            font-family: 'Arial', sans-serif;
            background-color: #f0f0f5;
            color: #333333;
        }
        table {
            width: 100%;
            border-collapse: collapse;
        }
        th, td {
            padding: 12px;
            text-align: left;
            border-bottom: 1px solid #dddddd;
        }
        th {
            background-color: #4CAF50;
            color: white;
        }
        tr:hover {
            background-color: #f5f5f5;
        }
        </style>
        """, unsafe_allow_html=True)

    # Title and description
    st.title("Token Entities Table")
    st.write("This table shows the entity corresponding to each token in a cool and chilled theme.")

    # Create the table
    table_data = {"Token": tokens, "Entity": entities}
    st.table(table_data)

def prep_page():
    model = load_trained_model()

    # Streamlit app
    st.title("Named Entity Recognition with BERT on PLOD-CW")
    st.write("Enter a sentence to see the named entities recognized by the model.")

    # Text input
    text = st.text_area("Enter your sentence here:")

    # Perform NER and display results
    if text:
        st.write("Entities recognized:")
        entities = model(text)
    
        # Create a dictionary to map entity labels to colors
        label_colors = {
            'B-LF': 'lightblue',
            'B-O': 'lightgreen',
            'B-AC': 'lightcoral',
            'I-LF': 'lightyellow'
        }
    
        # Prepare the HTML output with styled entities
        def get_entity_html(text, entities):
            html = ""
            last_idx = 0
            for entity in entities:
                start = entity['start']
                end = entity['end']
                label = entity['entity']
                entity_text = text[start:end]
                color = label_colors.get(label, 'lightgray')
            
                # Append the text before the entity
                html += text[last_idx:start]
                # Append the entity with styling
                html += f'<mark style="background-color: {color}; border-radius: 3px;">{entity_text}</mark>'
                last_idx = end
        
            # Append any remaining text after the last entity
            html += text[last_idx:]
            return html
    
        # Generate and display the styled HTML
        styled_text = get_entity_html(text, entities)
        
        st.markdown(styled_text, unsafe_allow_html=True)

        render_entities(text, entities)

if __name__ == '__main__':
    
    prep_page()