Commit
·
e9c0cd2
1
Parent(s):
b3b05b5
lets see how the hihgt works
Browse files
app.py
CHANGED
@@ -41,13 +41,18 @@ def format_entities_as_html(entities):
|
|
41 |
return html_output
|
42 |
|
43 |
|
44 |
-
#
|
45 |
-
def
|
|
|
|
|
|
|
|
|
|
|
46 |
entities = []
|
47 |
seen_spans = set() # Track the spans we have already added to avoid overlaps
|
48 |
|
49 |
# Print debug info about tokenization
|
50 |
-
print(f"Original text: {
|
51 |
print("Results:", results)
|
52 |
# it should look like:
|
53 |
# [{'entity': 'org.ent.pressagency.Reuters', 'score': np.float32(98.47), 'index': 78, 'text': 'Reuters', 'start': 440, 'end': 447}]
|
@@ -57,29 +62,18 @@ def prepare_entities_for_highlight(text, results):
|
|
57 |
# Only add non-overlapping entities
|
58 |
if entity_span not in seen_spans:
|
59 |
seen_spans.add(entity_span)
|
60 |
-
entity_text =
|
61 |
entity["start"] : entity["end"]
|
62 |
].strip() # Ensure we're working with the correct portion of the text
|
63 |
entity["surface"] = entity_text
|
|
|
64 |
print(f"Entity text: {entity}")
|
65 |
|
66 |
entities.append(entity)
|
67 |
|
68 |
# Sort entities by their start position
|
69 |
entities = sorted(entities, key=lambda x: x["start"])
|
70 |
-
|
71 |
-
return format_entities_as_html(entities)
|
72 |
-
|
73 |
-
|
74 |
-
# Function to process the sentence and extract entities
|
75 |
-
def extract_entities(sentence):
|
76 |
-
results = ner_pipeline(sentence)
|
77 |
-
|
78 |
-
# Debugging the result format
|
79 |
-
print(f"NER results: {results}")
|
80 |
-
|
81 |
-
# Format the results for HighlightedText
|
82 |
-
return prepare_entities_for_highlight(sentence, results)
|
83 |
|
84 |
|
85 |
# Create Gradio interface
|
@@ -93,7 +87,8 @@ def ner_app_interface():
|
|
93 |
interface = gr.Interface(
|
94 |
fn=extract_entities,
|
95 |
inputs=input_sentence,
|
96 |
-
outputs=
|
|
|
97 |
title="Named Entity Recognition",
|
98 |
description="Enter a sentence to extract named entities using the NER model from the Impresso project.",
|
99 |
examples=[
|
|
|
41 |
return html_output
|
42 |
|
43 |
|
44 |
+
# Function to process the sentence and extract entities
|
45 |
+
def extract_entities(sentence):
|
46 |
+
results = ner_pipeline(sentence)
|
47 |
+
|
48 |
+
# Debugging the result format
|
49 |
+
print(f"NER results: {results}")
|
50 |
+
|
51 |
entities = []
|
52 |
seen_spans = set() # Track the spans we have already added to avoid overlaps
|
53 |
|
54 |
# Print debug info about tokenization
|
55 |
+
print(f"Original text: {sentence}")
|
56 |
print("Results:", results)
|
57 |
# it should look like:
|
58 |
# [{'entity': 'org.ent.pressagency.Reuters', 'score': np.float32(98.47), 'index': 78, 'text': 'Reuters', 'start': 440, 'end': 447}]
|
|
|
62 |
# Only add non-overlapping entities
|
63 |
if entity_span not in seen_spans:
|
64 |
seen_spans.add(entity_span)
|
65 |
+
entity_text = sentence[
|
66 |
entity["start"] : entity["end"]
|
67 |
].strip() # Ensure we're working with the correct portion of the text
|
68 |
entity["surface"] = entity_text
|
69 |
+
entity["entity"] = entity["type"]
|
70 |
print(f"Entity text: {entity}")
|
71 |
|
72 |
entities.append(entity)
|
73 |
|
74 |
# Sort entities by their start position
|
75 |
entities = sorted(entities, key=lambda x: x["start"])
|
76 |
+
return entities
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
|
78 |
|
79 |
# Create Gradio interface
|
|
|
87 |
interface = gr.Interface(
|
88 |
fn=extract_entities,
|
89 |
inputs=input_sentence,
|
90 |
+
outputs=[gr.HighlightedText(label="Text with mentions")],
|
91 |
+
# outputs=output_entities,
|
92 |
title="Named Entity Recognition",
|
93 |
description="Enter a sentence to extract named entities using the NER model from the Impresso project.",
|
94 |
examples=[
|