emanuelaboros commited on
Commit
e9c0cd2
·
1 Parent(s): b3b05b5

lets see how the hihgt works

Browse files
Files changed (1) hide show
  1. app.py +13 -18
app.py CHANGED
@@ -41,13 +41,18 @@ def format_entities_as_html(entities):
41
  return html_output
42
 
43
 
44
- # Helper function to align entities correctly and debug tokenization
45
- def prepare_entities_for_highlight(text, results):
 
 
 
 
 
46
  entities = []
47
  seen_spans = set() # Track the spans we have already added to avoid overlaps
48
 
49
  # Print debug info about tokenization
50
- print(f"Original text: {text}")
51
  print("Results:", results)
52
  # it should look like:
53
  # [{'entity': 'org.ent.pressagency.Reuters', 'score': np.float32(98.47), 'index': 78, 'text': 'Reuters', 'start': 440, 'end': 447}]
@@ -57,29 +62,18 @@ def prepare_entities_for_highlight(text, results):
57
  # Only add non-overlapping entities
58
  if entity_span not in seen_spans:
59
  seen_spans.add(entity_span)
60
- entity_text = text[
61
  entity["start"] : entity["end"]
62
  ].strip() # Ensure we're working with the correct portion of the text
63
  entity["surface"] = entity_text
 
64
  print(f"Entity text: {entity}")
65
 
66
  entities.append(entity)
67
 
68
  # Sort entities by their start position
69
  entities = sorted(entities, key=lambda x: x["start"])
70
-
71
- return format_entities_as_html(entities)
72
-
73
-
74
- # Function to process the sentence and extract entities
75
- def extract_entities(sentence):
76
- results = ner_pipeline(sentence)
77
-
78
- # Debugging the result format
79
- print(f"NER results: {results}")
80
-
81
- # Format the results for HighlightedText
82
- return prepare_entities_for_highlight(sentence, results)
83
 
84
 
85
  # Create Gradio interface
@@ -93,7 +87,8 @@ def ner_app_interface():
93
  interface = gr.Interface(
94
  fn=extract_entities,
95
  inputs=input_sentence,
96
- outputs=output_entities,
 
97
  title="Named Entity Recognition",
98
  description="Enter a sentence to extract named entities using the NER model from the Impresso project.",
99
  examples=[
 
41
  return html_output
42
 
43
 
44
+ # Function to process the sentence and extract entities
45
+ def extract_entities(sentence):
46
+ results = ner_pipeline(sentence)
47
+
48
+ # Debugging the result format
49
+ print(f"NER results: {results}")
50
+
51
  entities = []
52
  seen_spans = set() # Track the spans we have already added to avoid overlaps
53
 
54
  # Print debug info about tokenization
55
+ print(f"Original text: {sentence}")
56
  print("Results:", results)
57
  # it should look like:
58
  # [{'entity': 'org.ent.pressagency.Reuters', 'score': np.float32(98.47), 'index': 78, 'text': 'Reuters', 'start': 440, 'end': 447}]
 
62
  # Only add non-overlapping entities
63
  if entity_span not in seen_spans:
64
  seen_spans.add(entity_span)
65
+ entity_text = sentence[
66
  entity["start"] : entity["end"]
67
  ].strip() # Ensure we're working with the correct portion of the text
68
  entity["surface"] = entity_text
69
+ entity["entity"] = entity["type"]
70
  print(f"Entity text: {entity}")
71
 
72
  entities.append(entity)
73
 
74
  # Sort entities by their start position
75
  entities = sorted(entities, key=lambda x: x["start"])
76
+ return entities
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
 
79
  # Create Gradio interface
 
87
  interface = gr.Interface(
88
  fn=extract_entities,
89
  inputs=input_sentence,
90
+ outputs=[gr.HighlightedText(label="Text with mentions")],
91
+ # outputs=output_entities,
92
  title="Named Entity Recognition",
93
  description="Enter a sentence to extract named entities using the NER model from the Impresso project.",
94
  examples=[