emanuelaboros commited on
Commit
53e96e8
·
1 Parent(s): c5b3453

update app

Browse files
Files changed (1) hide show
  1. app.py +17 -12
app.py CHANGED
@@ -19,20 +19,25 @@ ner_pipeline = pipeline(
19
  # Helper function to flatten entities and prepare them for HighlightedText
20
  def prepare_entities_for_highlight(text, results):
21
  entities = []
 
 
22
  for category, entity_list in results.items():
23
  for entity in entity_list:
24
- # Debugging information to check character positions
25
- print(
26
- f"Entity: {entity['word']}, Start: {entity['start']}, End: {entity['end']}, Type: {entity['entity']}"
27
- )
28
- # Append entity with character indices
29
- entities.append(
30
- {
31
- "start": entity["start"],
32
- "end": entity["end"],
33
- "label": f"{entity['entity']}", # ({entity['score']:.2f}%)
34
- }
35
- )
 
 
 
36
 
37
  return {"text": text, "entities": entities}
38
 
 
19
  # Helper function to flatten entities and prepare them for HighlightedText
20
  def prepare_entities_for_highlight(text, results):
21
  entities = []
22
+ seen_spans = set() # Track the spans we have already added to avoid overlaps
23
+
24
  for category, entity_list in results.items():
25
  for entity in entity_list:
26
+ entity_span = (entity["start"], entity["end"])
27
+
28
+ # Only add non-overlapping entities
29
+ if entity_span not in seen_spans:
30
+ seen_spans.add(entity_span)
31
+ entities.append(
32
+ {
33
+ "start": entity["start"],
34
+ "end": entity["end"],
35
+ "label": f"{entity['entity']}",
36
+ }
37
+ )
38
+
39
+ # Sort entities by their start position
40
+ entities = sorted(entities, key=lambda x: x["start"])
41
 
42
  return {"text": text, "entities": entities}
43