hassoudi commited on
Commit
b9cfeca
·
verified ·
1 Parent(s): f33c0ca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -11
app.py CHANGED
@@ -1,23 +1,79 @@
1
  import gradio as gr
2
  from huggingface_hub import login
 
3
  import os
 
 
 
 
 
4
 
5
  def load_healthcare_ner():
6
- login(token=os.environ["HF_TOKEN"])
7
- model = AutoModelForTokenClassification.from_pretrained(
8
- "TypicaAI/HealthcareNER-Fr",
9
- token=os.environ["HF_TOKEN"]
10
- )
11
- return model
 
 
 
 
12
 
13
  def process_text(text):
14
- entities = model(text)
15
- # Format results with highlighting
 
 
 
 
 
 
 
16
  html_output = highlight_entities(text, entities)
17
- # Track usage for marketing insights
 
18
  log_demo_usage(text, len(entities))
 
19
  return html_output
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  demo = gr.Interface(
22
  fn=process_text,
23
  inputs=gr.Textbox(
@@ -42,7 +98,7 @@ demo = gr.Interface(
42
  ]
43
  )
44
 
45
- # Add conversion elements
46
  with gr.Blocks() as marketing_elements:
47
  gr.Markdown("""
48
  ### 📖 Get the Complete Guide
@@ -61,4 +117,9 @@ with gr.Blocks() as marketing_elements:
61
  label="Get the French Healthcare NER Dataset",
62
  placeholder="Enter your business email"
63
  )
64
- submit_btn = gr.Button("Access Dataset")
 
 
 
 
 
 
1
  import gradio as gr
2
  from huggingface_hub import login
3
+ from transformers import AutoModelForTokenClassification, AutoTokenizer
4
  import os
5
+ import torch
6
+
7
+ # Initialize global model and tokenizer
8
+ model = None
9
+ tokenizer = None
10
 
11
  def load_healthcare_ner():
12
+ """Load the Healthcare NER model and tokenizer."""
13
+ global model, tokenizer
14
+ if model is None or tokenizer is None:
15
+ login(token=os.environ["HF_TOKEN"])
16
+ model = AutoModelForTokenClassification.from_pretrained(
17
+ "TypicaAI/HealthcareNER-Fr",
18
+ use_auth_token=os.environ["HF_TOKEN"]
19
+ )
20
+ tokenizer = AutoTokenizer.from_pretrained("TypicaAI/HealthcareNER-Fr")
21
+ return model, tokenizer
22
 
23
  def process_text(text):
24
+ """Process input text and return highlighted entities."""
25
+ model, tokenizer = load_healthcare_ner()
26
+ inputs = tokenizer(text, return_tensors="pt", truncation=True)
27
+ outputs = model(**inputs)
28
+
29
+ # Decode entities from outputs
30
+ entities = extract_entities(outputs, tokenizer, text)
31
+
32
+ # Highlight entities in the text
33
  html_output = highlight_entities(text, entities)
34
+
35
+ # Log usage
36
  log_demo_usage(text, len(entities))
37
+
38
  return html_output
39
 
40
+ def extract_entities(outputs, tokenizer, text):
41
+ """Extract entities from model outputs."""
42
+ tokens = tokenizer.tokenize(text)
43
+ predictions = torch.argmax(outputs.logits, dim=2).squeeze().tolist()
44
+
45
+ entities = []
46
+ current_entity = None
47
+ for token, prediction in zip(tokens, predictions):
48
+ label = model.config.id2label[prediction]
49
+ if label.startswith("B-"):
50
+ if current_entity:
51
+ entities.append(current_entity)
52
+ current_entity = {"entity": label[2:], "text": token, "start": len(text)}
53
+ elif label.startswith("I-") and current_entity:
54
+ current_entity["text"] += f" {token}"
55
+ elif current_entity:
56
+ entities.append(current_entity)
57
+ current_entity = None
58
+ if current_entity:
59
+ entities.append(current_entity)
60
+ return entities
61
+
62
+ def highlight_entities(text, entities):
63
+ """Highlight identified entities in the input text."""
64
+ highlighted_text = text
65
+ for entity in entities:
66
+ highlighted_text = highlighted_text.replace(
67
+ entity["text"],
68
+ f'<mark style="background-color: yellow;">{entity["text"]}</mark>'
69
+ )
70
+ return f"<p>{highlighted_text}</p>"
71
+
72
+ def log_demo_usage(text, num_entities):
73
+ """Log demo usage for analytics."""
74
+ print(f"Processed text: {text[:50]}... | Entities found: {num_entities}")
75
+
76
+ # Define the Gradio interface
77
  demo = gr.Interface(
78
  fn=process_text,
79
  inputs=gr.Textbox(
 
98
  ]
99
  )
100
 
101
+ # Add marketing elements
102
  with gr.Blocks() as marketing_elements:
103
  gr.Markdown("""
104
  ### 📖 Get the Complete Guide
 
117
  label="Get the French Healthcare NER Dataset",
118
  placeholder="Enter your business email"
119
  )
120
+ submit_btn = gr.Button("Access Dataset")
121
+
122
+ # Launch the Gradio demo
123
+ if __name__ == "__main__":
124
+ demo.launch()
125
+