Spaces:

LampOfSocrates
/

hf-nlp-cw-group27

Sleeping

App Files Files Community

Lamp Socrates commited on May 21, 2024

Commit

7551cdd

1 Parent(s): 5c82e3e

latest

Browse files

Files changed (1) hide show

app.py +26 -11

app.py CHANGED Viewed

@@ -2,6 +2,8 @@ import streamlit as st
 from transformers import pipeline
 from transformers import AutoTokenizer, AutoModelForTokenClassification
 import pandas as pd
 @st.cache_resource()
 def load_trained_model():
@@ -18,6 +20,13 @@ def load_trained_model():
     ner_pipeline = pipeline("ner", model=model, tokenizer = tokenizer)
     return ner_pipeline
 def load_random_examples(dataset_name, num_examples=5):
     """
     Load random examples from the specified Hugging Face dataset.
@@ -28,11 +37,11 @@ def load_random_examples(dataset_name, num_examples=5):
         pd.DataFrame: A DataFrame containing the random examples.
     """
     # Load the dataset
-    from datasets import load_dataset
-    dataset = load_dataset("surrey-nlp/PLOD-CW")
     # Convert the dataset to a pandas DataFrame
-    df = pd.DataFrame(dataset['test'])
     # Select random examples
     random_examples = df.sample(n=1)
@@ -162,6 +171,8 @@ def prep_page():
     if text:
         st.write("Entities recognized:")
         entities = model(text)
         # Create a dictionary to map entity labels to colors
         label_colors = {
@@ -173,7 +184,7 @@ def prep_page():
         # Prepare the HTML output with styled entities
         def get_entity_html(text, entities):
-            html = ""
             last_idx = 0
             for entity in entities:
                 start = entity['start']
@@ -181,17 +192,18 @@ def prep_page():
                 label = entity['entity']
                 entity_text = text[start:end]
                 color = label_colors.get(label, 'lightgray')
                 # Append the text before the entity
-                html += text[last_idx:start]
                 # Append the entity with styling
-                html += f'<mark style="background-color: {color}; border-radius: 3px;">{entity_text}</mark>'
                 last_idx = end
             # Append any remaining text after the last entity
-            html += text[last_idx:]
             return html
         # Generate and display the styled HTML
         styled_text = get_entity_html(text, entities)
@@ -209,7 +221,10 @@ if __name__ == '__main__':
     if 'api' in query_params:
         sentence = query_params.get('sentence')
         entities = predict_using_trained(sentence)
-        st.write({"sentence" : sentence , "entities" : entities})
     else:
         prep_page()

 from transformers import pipeline
 from transformers import AutoTokenizer, AutoModelForTokenClassification
 import pandas as pd
+from pprint import pprint
 @st.cache_resource()
 def load_trained_model():
     ner_pipeline = pipeline("ner", model=model, tokenizer = tokenizer)
     return ner_pipeline
+@st.cache_data()
+def load_plod_cw_dataset():
+    from datasets import load_dataset
+    dataset = load_dataset("surrey-nlp/PLOD-CW")
+    return dataset
 def load_random_examples(dataset_name, num_examples=5):
     """
     Load random examples from the specified Hugging Face dataset.
         pd.DataFrame: A DataFrame containing the random examples.
     """
     # Load the dataset
+    dat = load_plod_cw_dataset()
     # Convert the dataset to a pandas DataFrame
+    df = pd.DataFrame(dat['test'])
     # Select random examples
     random_examples = df.sample(n=1)
     if text:
         st.write("Entities recognized:")
         entities = model(text)
+        pprint(entities)
         # Create a dictionary to map entity labels to colors
         label_colors = {
         # Prepare the HTML output with styled entities
         def get_entity_html(text, entities):
+            html = "<div>"
             last_idx = 0
             for entity in entities:
                 start = entity['start']
                 label = entity['entity']
                 entity_text = text[start:end]
                 color = label_colors.get(label, 'lightgray')
                 # Append the text before the entity
+                html += text[last_idx:start].replace(" ", "<br>")
                 # Append the entity with styling
+                html += f'<div style="background-color: {color}; padding: 5px; border-radius: 3px; margin: 5px 0;">{entity_text}</div>'
                 last_idx = end
             # Append any remaining text after the last entity
+            html += text[last_idx:].replace(" ", "<br>")
+            html += "</div>"
             return html
         # Generate and display the styled HTML
         styled_text = get_entity_html(text, entities)
     if 'api' in query_params:
         sentence = query_params.get('sentence')
         entities = predict_using_trained(sentence)
+        response = {"sentence" : sentence , "entities" : entities}
+        pprint(response)
+        st.write(response)
     else:
         prep_page()