Commit
·
dd1b5ba
1
Parent(s):
53e96e8
update app
Browse files
app.py
CHANGED
@@ -16,11 +16,14 @@ ner_pipeline = pipeline(
|
|
16 |
)
|
17 |
|
18 |
|
19 |
-
# Helper function to
|
20 |
def prepare_entities_for_highlight(text, results):
|
21 |
entities = []
|
22 |
seen_spans = set() # Track the spans we have already added to avoid overlaps
|
23 |
|
|
|
|
|
|
|
24 |
for category, entity_list in results.items():
|
25 |
for entity in entity_list:
|
26 |
entity_span = (entity["start"], entity["end"])
|
@@ -28,6 +31,12 @@ def prepare_entities_for_highlight(text, results):
|
|
28 |
# Only add non-overlapping entities
|
29 |
if entity_span not in seen_spans:
|
30 |
seen_spans.add(entity_span)
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
entities.append(
|
32 |
{
|
33 |
"start": entity["start"],
|
@@ -46,6 +55,9 @@ def prepare_entities_for_highlight(text, results):
|
|
46 |
def extract_entities(sentence):
|
47 |
results = ner_pipeline(sentence)
|
48 |
|
|
|
|
|
|
|
49 |
# Format the results for HighlightedText
|
50 |
return prepare_entities_for_highlight(sentence, results)
|
51 |
|
|
|
16 |
)
|
17 |
|
18 |
|
19 |
+
# Helper function to align entities correctly and debug tokenization
|
20 |
def prepare_entities_for_highlight(text, results):
|
21 |
entities = []
|
22 |
seen_spans = set() # Track the spans we have already added to avoid overlaps
|
23 |
|
24 |
+
# Print debug info about tokenization
|
25 |
+
print(f"Original text: {text}")
|
26 |
+
|
27 |
for category, entity_list in results.items():
|
28 |
for entity in entity_list:
|
29 |
entity_span = (entity["start"], entity["end"])
|
|
|
31 |
# Only add non-overlapping entities
|
32 |
if entity_span not in seen_spans:
|
33 |
seen_spans.add(entity_span)
|
34 |
+
entity_text = text[
|
35 |
+
entity["start"] : entity["end"]
|
36 |
+
].strip() # Ensure we're working with the correct portion of the text
|
37 |
+
print(
|
38 |
+
f"Entity text: {entity_text}, Start: {entity['start']}, End: {entity['end']}, Type: {entity['entity']}"
|
39 |
+
)
|
40 |
entities.append(
|
41 |
{
|
42 |
"start": entity["start"],
|
|
|
55 |
def extract_entities(sentence):
|
56 |
results = ner_pipeline(sentence)
|
57 |
|
58 |
+
# Debugging the result format
|
59 |
+
print(f"NER results: {results}")
|
60 |
+
|
61 |
# Format the results for HighlightedText
|
62 |
return prepare_entities_for_highlight(sentence, results)
|
63 |
|