Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,11 +2,13 @@ import torch
|
|
2 |
from transformers import BertTokenizerFast, BertForTokenClassification
|
3 |
import gradio as gr
|
4 |
|
|
|
5 |
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
|
6 |
model = BertForTokenClassification.from_pretrained('maximuspowers/bias-detection-ner')
|
7 |
-
model.eval()
|
8 |
-
model.to('cuda' if torch.cuda.is_available() else 'cpu')
|
9 |
|
|
|
10 |
id2label = {
|
11 |
0: 'O',
|
12 |
1: 'B-STEREO',
|
@@ -18,10 +20,12 @@ id2label = {
|
|
18 |
}
|
19 |
|
20 |
def predict_ner_tags(sentence):
|
|
|
21 |
inputs = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True, max_length=128)
|
22 |
input_ids = inputs['input_ids'].to(model.device)
|
23 |
attention_mask = inputs['attention_mask'].to(model.device)
|
24 |
|
|
|
25 |
with torch.no_grad():
|
26 |
outputs = model(input_ids=input_ids, attention_mask=attention_mask)
|
27 |
logits = outputs.logits
|
@@ -39,19 +43,31 @@ def predict_ner_tags(sentence):
|
|
39 |
return result
|
40 |
|
41 |
def format_output(result):
|
42 |
-
|
|
|
43 |
for token, labels in result:
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
return formatted_output
|
46 |
|
47 |
iface = gr.Interface(
|
48 |
fn=predict_ner_tags,
|
49 |
inputs="text",
|
50 |
-
outputs="
|
51 |
title="Named Entity Recognition with BERT",
|
52 |
-
description="Enter a sentence to predict NER tags using BERT model trained for multi-label classification.",
|
53 |
examples=["Tall men are so clumsy."],
|
54 |
-
allow_flagging="never"
|
|
|
55 |
)
|
56 |
|
57 |
if __name__ == "__main__":
|
|
|
2 |
from transformers import BertTokenizerFast, BertForTokenClassification
|
3 |
import gradio as gr
|
4 |
|
5 |
+
# Load tokenizer and model
|
6 |
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
|
7 |
model = BertForTokenClassification.from_pretrained('maximuspowers/bias-detection-ner')
|
8 |
+
model.eval() # Set the model to evaluation mode
|
9 |
+
model.to('cuda' if torch.cuda.is_available() else 'cpu') # Move model to appropriate device
|
10 |
|
11 |
+
# Define label mappings
|
12 |
id2label = {
|
13 |
0: 'O',
|
14 |
1: 'B-STEREO',
|
|
|
20 |
}
|
21 |
|
22 |
def predict_ner_tags(sentence):
|
23 |
+
# Tokenize the sentence
|
24 |
inputs = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True, max_length=128)
|
25 |
input_ids = inputs['input_ids'].to(model.device)
|
26 |
attention_mask = inputs['attention_mask'].to(model.device)
|
27 |
|
28 |
+
# Predict using the model
|
29 |
with torch.no_grad():
|
30 |
outputs = model(input_ids=input_ids, attention_mask=attention_mask)
|
31 |
logits = outputs.logits
|
|
|
43 |
return result
|
44 |
|
45 |
def format_output(result):
|
46 |
+
# Create HTML content with formatted output
|
47 |
+
formatted_output = "<div style='font-family: Arial;'>"
|
48 |
for token, labels in result:
|
49 |
+
styles = []
|
50 |
+
if "B-STEREO" in labels or "I-STEREO" in labels:
|
51 |
+
styles.append("border-bottom: 2px solid blue;")
|
52 |
+
if "B-GEN" in labels or "I-GEN" in labels:
|
53 |
+
styles.append("background-color: green; color: white;")
|
54 |
+
if "B-UNFAIR" in labels or "I-UNFAIR" in labels:
|
55 |
+
styles.append("border: 2px dashed red;")
|
56 |
+
|
57 |
+
style_string = " ".join(styles) if styles else ""
|
58 |
+
formatted_output += f"<span style='{style_string} padding: 3px; margin: 2px;'>{token}</span> "
|
59 |
+
formatted_output += "</div>"
|
60 |
return formatted_output
|
61 |
|
62 |
iface = gr.Interface(
|
63 |
fn=predict_ner_tags,
|
64 |
inputs="text",
|
65 |
+
outputs=gr.outputs.HTML(label="Output"),
|
66 |
title="Named Entity Recognition with BERT",
|
67 |
+
description="Enter a sentence to predict NER tags using a BERT model trained for multi-label classification. Different styles represent different entity types.",
|
68 |
examples=["Tall men are so clumsy."],
|
69 |
+
allow_flagging="never",
|
70 |
+
theme="default"
|
71 |
)
|
72 |
|
73 |
if __name__ == "__main__":
|