Spaces:

maximuspowers
/

bias-detection-ner

Running

App Files Files Community

maximuspowers commited on Dec 15, 2024

Commit

34ab835

verified ·

1 Parent(s): 2c53668

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -23

app.py CHANGED Viewed

@@ -3,11 +3,13 @@ import torch
 from transformers import BertTokenizerFast, BertForTokenClassification
 import gradio as gr
 tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
 model = BertForTokenClassification.from_pretrained('maximuspowers/bias-detection-ner')
 model.eval()
 model.to('cuda' if torch.cuda.is_available() else 'cpu')
 id2label = {
     0: 'O',
     1: 'B-STEREO',
@@ -20,23 +22,26 @@ id2label = {
 label2id = {v: k for k, v in id2label.items()}
 label_colors = {
-    "STEREO": "rgba(255, 0, 0, 0.2)",
-    "GEN": "rgba(0, 0, 255, 0.2)",
-    "UNFAIR": "rgba(0, 255, 0, 0.2)"
 }
 def post_process_entities(result):
     prev_entity_type = None
-    for i, token_data in enumerate(result):
         labels = token_data["labels"]
         labels = list(set(labels))
         for entity_type in ["GEN", "UNFAIR", "STEREO"]:
             if f"B-{entity_type}" in labels and f"I-{entity_type}" in labels:
                 labels.remove(f"I-{entity_type}")
         current_entity_type = None
         current_label = None
         for label in labels:
@@ -48,19 +53,18 @@ def post_process_entities(result):
             if current_label.startswith("B-") and prev_entity_type == current_entity_type:
                 labels.remove(current_label)
                 labels.append(f"I-{current_entity_type}")
             if current_label.startswith("I-") and prev_entity_type != current_entity_type:
                 labels.remove(current_label)
                 labels.append(f"B-{current_entity_type}")
             prev_entity_type = current_entity_type
         else:
-            prev_entity_type = None
         token_data["labels"] = labels
     return result
 def generate_json(sentence):
     inputs = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True, max_length=128)
     input_ids = inputs['input_ids'].to(model.device)
@@ -84,12 +88,13 @@ def generate_json(sentence):
     return json.dumps(result, indent=4)
 def predict_ner_tags_with_json(sentence):
     json_result = generate_json(sentence)
     result = json.loads(json_result)
-    word_row = []
     stereo_row = []
     gen_row = []
     unfair_row = []
@@ -141,16 +146,30 @@ def predict_ner_tags_with_json(sentence):
     return f"{matrix_html}<br><pre>{json_result}</pre>"
-iface = gr.Interface(
-    fn=predict_ner_tags_with_json,
-    inputs=[gr.Textbox(label="Input Sentence")],
-    outputs=[gr.HTML(label="Entity Matrix and JSON Output")],
-    title="Social Bias Named Entity Recognition (with BERT) 🕵",
-    description=("Enter a sentence to predict biased parts of speech tags. This model uses multi-label BertForTokenClassification, to label the entities: (GEN)eralizations, (UNFAIR)ness, and (STEREO)types. Labels follow BIO format. Try it out :)."
-                 "<br><br>Read more about how this model was trained in this <a href='https://huggingface.co/blog/maximuspowers/bias-entity-recognition' target='_blank'>blog post</a>."
-                 "<br>Model Page: <a href='https://huggingface.co/maximuspowers/bias-detection-ner' target='_blank'>Bias Detection NER</a>."),
-    allow_flagging="never"
-)
-if __name__ == "__main__":
-    iface.launch(share=True)

 from transformers import BertTokenizerFast, BertForTokenClassification
 import gradio as gr
+# Initialize tokenizer and model
 tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
 model = BertForTokenClassification.from_pretrained('maximuspowers/bias-detection-ner')
 model.eval()
 model.to('cuda' if torch.cuda.is_available() else 'cpu')
+# Mapping IDs to labels
 id2label = {
     0: 'O',
     1: 'B-STEREO',
 label2id = {v: k for k, v in id2label.items()}
+# Entity colors for highlights
 label_colors = {
+    "STEREO": "rgba(255, 0, 0, 0.2)",  # Light Red
+    "GEN": "rgba(0, 0, 255, 0.2)",     # Light Blue
+    "UNFAIR": "rgba(0, 255, 0, 0.2)"   # Light Green
 }
+# Post-process entity tags
 def post_process_entities(result):
     prev_entity_type = None
+    for token_data in result:
         labels = token_data["labels"]
         labels = list(set(labels))
+        # Handle conflicting B- and I- tags for the same entity
         for entity_type in ["GEN", "UNFAIR", "STEREO"]:
             if f"B-{entity_type}" in labels and f"I-{entity_type}" in labels:
                 labels.remove(f"I-{entity_type}")
+        # Handle sequence rules
         current_entity_type = None
         current_label = None
         for label in labels:
             if current_label.startswith("B-") and prev_entity_type == current_entity_type:
                 labels.remove(current_label)
                 labels.append(f"I-{current_entity_type}")
             if current_label.startswith("I-") and prev_entity_type != current_entity_type:
                 labels.remove(current_label)
                 labels.append(f"B-{current_entity_type}")
             prev_entity_type = current_entity_type
         else:
+            prev_entity_type = None
         token_data["labels"] = labels
     return result
+# Generate JSON results
 def generate_json(sentence):
     inputs = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True, max_length=128)
     input_ids = inputs['input_ids'].to(model.device)
     return json.dumps(result, indent=4)
+# Predict function
 def predict_ner_tags_with_json(sentence):
     json_result = generate_json(sentence)
     result = json.loads(json_result)
+    word_row = []
     stereo_row = []
     gen_row = []
     unfair_row = []
     return f"{matrix_html}<br><pre>{json_result}</pre>"
+# Gradio Interface
+iface = gr.Blocks()
+with iface:
+    with gr.Row():
+        gr.Markdown(
+            """
+            # Social Bias Named Entity Recognition (with BERT) 🕵
+            Enter a sentence to predict biased parts of speech tags. This model uses multi-label `BertForTokenClassification` to label the entities:
+            - **Generalizations (GEN)**
+            - **Unfairness (UNFAIR)**
+            - **Stereotypes (STEREO)**
+            Labels follow the BIO format. Try it out!
+            - **[Blog Post](https://huggingface.co/blog/maximuspowers/bias-entity-recognition)**
+            - **[Model Page](https://huggingface.co/maximuspowers/bias-detection-ner)**
+            """
+        )
+    with gr.Row():
+        input_box = gr.Textbox(label="Input Sentence")
+    with gr.Row():
+        output_box = gr.HTML(label="Entity Matrix and JSON Output")
+    input_box.change(predict_ner_tags_with_json, inputs=[input_box], outputs=[output_box])
+iface.launch(share=True)