emanuelaboros commited on
Commit
dd1b5ba
·
1 Parent(s): 53e96e8

update app

Browse files
Files changed (1) hide show
  1. app.py +13 -1
app.py CHANGED
@@ -16,11 +16,14 @@ ner_pipeline = pipeline(
16
  )
17
 
18
 
19
- # Helper function to flatten entities and prepare them for HighlightedText
20
  def prepare_entities_for_highlight(text, results):
21
  entities = []
22
  seen_spans = set() # Track the spans we have already added to avoid overlaps
23
 
 
 
 
24
  for category, entity_list in results.items():
25
  for entity in entity_list:
26
  entity_span = (entity["start"], entity["end"])
@@ -28,6 +31,12 @@ def prepare_entities_for_highlight(text, results):
28
  # Only add non-overlapping entities
29
  if entity_span not in seen_spans:
30
  seen_spans.add(entity_span)
 
 
 
 
 
 
31
  entities.append(
32
  {
33
  "start": entity["start"],
@@ -46,6 +55,9 @@ def prepare_entities_for_highlight(text, results):
46
  def extract_entities(sentence):
47
  results = ner_pipeline(sentence)
48
 
 
 
 
49
  # Format the results for HighlightedText
50
  return prepare_entities_for_highlight(sentence, results)
51
 
 
16
  )
17
 
18
 
19
+ # Helper function to align entities correctly and debug tokenization
20
  def prepare_entities_for_highlight(text, results):
21
  entities = []
22
  seen_spans = set() # Track the spans we have already added to avoid overlaps
23
 
24
+ # Print debug info about tokenization
25
+ print(f"Original text: {text}")
26
+
27
  for category, entity_list in results.items():
28
  for entity in entity_list:
29
  entity_span = (entity["start"], entity["end"])
 
31
  # Only add non-overlapping entities
32
  if entity_span not in seen_spans:
33
  seen_spans.add(entity_span)
34
+ entity_text = text[
35
+ entity["start"] : entity["end"]
36
+ ].strip() # Ensure we're working with the correct portion of the text
37
+ print(
38
+ f"Entity text: {entity_text}, Start: {entity['start']}, End: {entity['end']}, Type: {entity['entity']}"
39
+ )
40
  entities.append(
41
  {
42
  "start": entity["start"],
 
55
  def extract_entities(sentence):
56
  results = ner_pipeline(sentence)
57
 
58
+ # Debugging the result format
59
+ print(f"NER results: {results}")
60
+
61
  # Format the results for HighlightedText
62
  return prepare_entities_for_highlight(sentence, results)
63