ajit commited on
Commit
2485275
·
1 Parent(s): 854a552
Files changed (1) hide show
  1. app.py +76 -75
app.py CHANGED
@@ -18,23 +18,24 @@ SPECIFIC_TAG=":__entity__"
18
 
19
 
20
 
 
21
  @st.cache(suppress_st_warning=True, allow_output_mutation=True)
22
  def POS_get_model(model_name):
23
  val = SequenceTagger.load(model_name) # Load the model
24
  return val
25
-
26
  def getPos(s: Sentence):
27
  texts = []
28
  labels = []
29
  for t in s.tokens:
30
  for label in t.annotation_layers.keys():
31
  texts.append(t.text)
32
- labels.append(t.get_labels(label)[0].value)
33
  return texts, labels
34
-
35
  def getDictFromPOS(texts, labels):
36
  return [["dummy",t,l,"dummy","dummy" ] for t, l in zip(texts, labels)]
37
-
38
  def decode(tokenizer, pred_idx, top_clean):
39
  ignore_tokens = string.punctuation + '[PAD]'
40
  tokens = []
@@ -74,9 +75,9 @@ def get_bert_prediction(input_text,top_k):
74
  def load_pos_model():
75
  checkpoint = "flair/pos-english"
76
  return POS_get_model(checkpoint)
77
-
78
 
79
-
 
80
 
81
  def init_session_states():
82
  if 'top_k' not in st.session_state:
@@ -94,8 +95,8 @@ def init_session_states():
94
  if 'aggr' not in st.session_state:
95
  st.session_state['aggr'] = None
96
 
97
-
98
-
99
  def get_pos_arr(input_text,display_area):
100
  if (st.session_state['pos_model'] is None):
101
  display_area.text("Loading model 3 of 3.Loading POS model...")
@@ -105,37 +106,37 @@ def get_pos_arr(input_text,display_area):
105
  texts, labels = getPos(s)
106
  pos_results = getDictFromPOS(texts, labels)
107
  return pos_results
108
-
109
  def perform_inference(text,display_area):
110
-
111
  if (st.session_state['bio_model'] is None):
112
  display_area.text("Loading model 1 of 3. Bio model...")
113
  st.session_state['bio_model'] = bd.BatchInference("bio/desc_a100_config.json",'ajitrajasekharan/biomedical',False,False,DEFAULT_TOP_K,True,True, "bio/","bio/a100_labels.txt",False)
114
-
115
  if (st.session_state['phi_model'] is None):
116
  display_area.text("Loading model 2 of 3. PHI model...")
117
  st.session_state['phi_model'] = bd.BatchInference("bbc/desc_bbc_config.json",'bert-base-cased',False,False,DEFAULT_TOP_K,True,True, "bbc/","bbc/bbc_labels.txt",False)
118
-
119
  #Load POS model if needed and gets POS tags
120
  if (SPECIFIC_TAG not in text):
121
  pos_arr = get_pos_arr(text,display_area)
122
  else:
123
  pos_arr = None
124
-
125
  if (st.session_state['ner_bio'] is None):
126
  display_area.text("Initializing BIO module...")
127
  st.session_state['ner_bio'] = ner.UnsupNER("bio/ner_a100_config.json")
128
-
129
  if (st.session_state['ner_phi'] is None):
130
  display_area.text("Initializing PHI module...")
131
  st.session_state['ner_phi'] = ner.UnsupNER("bbc/ner_bbc_config.json")
132
-
133
  if (st.session_state['aggr'] is None):
134
  display_area.text("Initializing Aggregation modeule...")
135
  st.session_state['aggr'] = aggr.AggregateNER("./ensemble_config.json")
136
-
137
-
138
-
139
  display_area.text("Getting results from BIO model...")
140
  bio_descs = st.session_state['bio_model'].get_descriptors(text,pos_arr)
141
  display_area.text("Getting results from PHI model...")
@@ -143,65 +144,65 @@ def perform_inference(text,display_area):
143
  display_area.text("Aggregating BIO & PHI results...")
144
  bio_ner = st.session_state['ner_bio'].tag_sentence_service(text,bio_descs)
145
  phi_ner = st.session_state['ner_phi'].tag_sentence_service(text,phi_results)
146
-
147
  combined_arr = [json.loads(bio_ner),json.loads(phi_ner)]
148
 
149
  aggregate_results = st.session_state['aggr'].fetch_all(text,combined_arr)
150
  return aggregate_results
151
-
152
 
153
  sent_arr = [
154
- "Lou Gehrig who works for XCorp and lives in New York suffers from Parkinson's ",
155
- "Parkinson who works for XCorp and lives in New York suffers from Lou Gehrig's",
156
- "lou gehrig was diagnosed with Parkinson's ",
157
- "A eGFR below 60 indicates chronic kidney disease",
158
- "Overexpression of EGFR occurs across a wide range of different cancers",
159
- "Stanford called",
160
- "He was diagnosed with non small cell lung cancer",
161
- "I met my girl friends at the pub ",
162
- "I met my New York friends at the pub",
163
- "I met my XCorp friends at the pub",
164
- "I met my two friends at the pub",
165
- "Bio-Techne's genomic tools include advanced tissue-based in-situ hybridization assays sold under the ACD brand as well as a portfolio of assays for prostate cancer diagnosis ",
166
- "There are no treatment options specifically indicated for ACD and physicians must utilize agents approved for other dermatology conditions", "As ACD has been implicated in apoptosis-resistant glioblastoma (GBM), there is a high medical need for identifying novel ACD-inducing drugs ",
167
- "Located in the heart of Dublin , in the family home of acclaimed writer Oscar Wilde , ACD provides the perfect backdrop to inspire Irish (and Irish-at-heart) students to excel in business and the arts",
168
- "Patients treated with anticancer chemotherapy drugs ( ACD ) are vulnerable to infectious diseases due to immunosuppression and to the direct impact of ACD on their intestinal microbiota ",
169
- "In the LASOR trial , increasing daily imatinib dose from 400 to 600mg induced MMR at 12 and 24 months in 25% and 36% of the patients, respectively, who had suboptimal cytogenetic responses ",
170
- "The sky turned dark in advance of the storm that was coming from the east ",
171
- "She loves to watch Sunday afternoon football with her family ",
172
- "Paul Erdos died at 83 "
173
  ]
174
 
175
 
176
  sent_arr_masked = [
177
- "Lou Gehrig:__entity__ who works for XCorp:__entity__ and lives in New:__entity__ York:__entity__ suffers from Parkinson's:__entity__ ",
178
- "Parkinson:__entity__ who works for XCorp:__entity__ and lives in New:__entity__ York:__entity__ suffers from Lou Gehrig's:__entity__",
179
- "lou:__entity__ gehrig:__entity__ was diagnosed with Parkinson's:__entity__ ",
180
- "A eGFR:__entity__ below 60 indicates chronic kidney disease",
181
- "Overexpression of EGFR:__entity__ occurs across a wide range of different cancers",
182
- "Stanford:__entity__ called",
183
- "He was diagnosed with non:__entity__ small:__entity__ cell:__entity__ lung:__entity__ cancer:__entity__",
184
- "I met my girl:__entity__ friends at the pub ",
185
- "I met my New:__entity__ York:__entity__ friends at the pub",
186
- "I met my XCorp:__entity__ friends at the pub",
187
- "I met my two:__entity__ friends at the pub",
188
- "Bio-Techne's genomic tools include advanced tissue-based in-situ hybridization assays sold under the ACD:__entity__ brand as well as a portfolio of assays for prostate cancer diagnosis ",
189
- "There are no treatment options specifically indicated for ACD:__entity__ and physicians must utilize agents approved for other dermatology conditions",
190
- "As ACD:__entity__ has been implicated in apoptosis-resistant glioblastoma (GBM), there is a high medical need for identifying novel ACD-inducing drugs ",
191
- "Located in the heart of Dublin , in the family home of acclaimed writer Oscar Wilde , ACD:__entity__ provides the perfect backdrop to inspire Irish (and Irish-at-heart) students to excel in business and the arts",
192
- "Patients treated with anticancer chemotherapy drugs ( ACD:__entity__ ) are vulnerable to infectious diseases due to immunosuppression and to the direct impact of ACD on their intestinal microbiota ",
193
- "In the LASOR:__entity__ trial:__entity__ , increasing daily imatinib dose from 400 to 600mg induced MMR at 12 and 24 months in 25% and 36% of the patients, respectively, who had suboptimal cytogenetic responses ",
194
- "The sky turned dark:__entity__ in advance of the storm that was coming from the east ",
195
- "She loves to watch Sunday afternoon football:__entity__ with her family ",
196
- "Paul:__entity__ Erdos:__entity__ died at 83:__entity__ "
197
  ]
198
 
199
  def init_selectbox():
200
  return st.selectbox(
201
  'Choose any of the sentences in pull-down below',
202
  sent_arr,key='my_choice')
203
-
204
-
205
  def on_text_change():
206
  text = st.session_state.my_text
207
  print("in callback: " + text)
@@ -211,22 +212,22 @@ def main():
211
  try:
212
 
213
  init_session_states()
214
-
215
  st.markdown("<h3 style='text-align: center;'>NER using pretrained models with <a href='https://ajitrajasekharan.github.io/2021/01/02/my-first-post.html'>no fine tuning</a></h3>", unsafe_allow_html=True)
216
  #st.markdown("""
217
  #<h3 style="font-size:16px; color: #ff0000; text-align: center"><b>App under construction... (not in working condition yet)</b></h3>
218
  #""", unsafe_allow_html=True)
219
-
220
-
221
  st.markdown("""
222
  <p style="text-align:center;"><img src="https://ajitrajasekharan.github.io/images/1.png" width="700"></p>
223
  <br/>
224
  <br/>
225
  """, unsafe_allow_html=True)
226
-
227
  st.write("This app uses 3 models. Two Pretrained Bert models (**no fine tuning**) and a POS tagger")
228
-
229
-
230
  with st.form('my_form'):
231
  selected_sentence = init_selectbox()
232
  text_input = st.text_area(label='Type any sentence below',value="")
@@ -243,16 +244,16 @@ def main():
243
  with display_area.container():
244
  st.text(f"prediction took {time.time() - start:.2f}s")
245
  st.json(results)
246
-
247
-
248
-
249
-
250
 
251
  #input_text = st.text_area(
252
  # label="Type any sentence",
253
  # on_change=on_text_change,key='my_text'
254
  # )
255
-
256
  st.markdown("""
257
  <small style="font-size:16px; color: #7f7f7f; text-align: left"><br/><br/>Models used: <br/>(1) <a href='https://huggingface.co/ajitrajasekharan/biomedical' target='_blank'>Biomedical model</a> pretrained on Pubmed,Clinical trials and BookCorpus subset.<br/>(2) Bert-base-cased (for PHI entities - Person/location/organization etc.)<br/>(3) Flair POS tagger</small>
258
  #""", unsafe_allow_html=True)
@@ -264,8 +265,8 @@ def main():
264
  """, unsafe_allow_html=True)
265
 
266
  except Exception as e:
267
- print("Some error occurred in main")
268
- st.exception(e)
269
-
270
  if __name__ == "__main__":
271
  main()
 
18
 
19
 
20
 
21
+
22
  @st.cache(suppress_st_warning=True, allow_output_mutation=True)
23
  def POS_get_model(model_name):
24
  val = SequenceTagger.load(model_name) # Load the model
25
  return val
26
+
27
  def getPos(s: Sentence):
28
  texts = []
29
  labels = []
30
  for t in s.tokens:
31
  for label in t.annotation_layers.keys():
32
  texts.append(t.text)
33
+ labels.append(t.get_labels(label)[0].value)
34
  return texts, labels
35
+
36
  def getDictFromPOS(texts, labels):
37
  return [["dummy",t,l,"dummy","dummy" ] for t, l in zip(texts, labels)]
38
+
39
  def decode(tokenizer, pred_idx, top_clean):
40
  ignore_tokens = string.punctuation + '[PAD]'
41
  tokens = []
 
75
  def load_pos_model():
76
  checkpoint = "flair/pos-english"
77
  return POS_get_model(checkpoint)
 
78
 
79
+
80
+
81
 
82
  def init_session_states():
83
  if 'top_k' not in st.session_state:
 
95
  if 'aggr' not in st.session_state:
96
  st.session_state['aggr'] = None
97
 
98
+
99
+
100
  def get_pos_arr(input_text,display_area):
101
  if (st.session_state['pos_model'] is None):
102
  display_area.text("Loading model 3 of 3.Loading POS model...")
 
106
  texts, labels = getPos(s)
107
  pos_results = getDictFromPOS(texts, labels)
108
  return pos_results
109
+
110
  def perform_inference(text,display_area):
111
+
112
  if (st.session_state['bio_model'] is None):
113
  display_area.text("Loading model 1 of 3. Bio model...")
114
  st.session_state['bio_model'] = bd.BatchInference("bio/desc_a100_config.json",'ajitrajasekharan/biomedical',False,False,DEFAULT_TOP_K,True,True, "bio/","bio/a100_labels.txt",False)
115
+
116
  if (st.session_state['phi_model'] is None):
117
  display_area.text("Loading model 2 of 3. PHI model...")
118
  st.session_state['phi_model'] = bd.BatchInference("bbc/desc_bbc_config.json",'bert-base-cased',False,False,DEFAULT_TOP_K,True,True, "bbc/","bbc/bbc_labels.txt",False)
119
+
120
  #Load POS model if needed and gets POS tags
121
  if (SPECIFIC_TAG not in text):
122
  pos_arr = get_pos_arr(text,display_area)
123
  else:
124
  pos_arr = None
125
+
126
  if (st.session_state['ner_bio'] is None):
127
  display_area.text("Initializing BIO module...")
128
  st.session_state['ner_bio'] = ner.UnsupNER("bio/ner_a100_config.json")
129
+
130
  if (st.session_state['ner_phi'] is None):
131
  display_area.text("Initializing PHI module...")
132
  st.session_state['ner_phi'] = ner.UnsupNER("bbc/ner_bbc_config.json")
133
+
134
  if (st.session_state['aggr'] is None):
135
  display_area.text("Initializing Aggregation modeule...")
136
  st.session_state['aggr'] = aggr.AggregateNER("./ensemble_config.json")
137
+
138
+
139
+
140
  display_area.text("Getting results from BIO model...")
141
  bio_descs = st.session_state['bio_model'].get_descriptors(text,pos_arr)
142
  display_area.text("Getting results from PHI model...")
 
144
  display_area.text("Aggregating BIO & PHI results...")
145
  bio_ner = st.session_state['ner_bio'].tag_sentence_service(text,bio_descs)
146
  phi_ner = st.session_state['ner_phi'].tag_sentence_service(text,phi_results)
147
+
148
  combined_arr = [json.loads(bio_ner),json.loads(phi_ner)]
149
 
150
  aggregate_results = st.session_state['aggr'].fetch_all(text,combined_arr)
151
  return aggregate_results
152
+
153
 
154
  sent_arr = [
155
+ "Lou Gehrig who works for XCorp and lives in New York suffers from Parkinson's ",
156
+ "Parkinson who works for XCorp and lives in New York suffers from Lou Gehrig's",
157
+ "lou gehrig was diagnosed with Parkinson's ",
158
+ "A eGFR below 60 indicates chronic kidney disease",
159
+ "Overexpression of EGFR occurs across a wide range of different cancers",
160
+ "Stanford called",
161
+ "He was diagnosed with non small cell lung cancer",
162
+ "I met my girl friends at the pub ",
163
+ "I met my New York friends at the pub",
164
+ "I met my XCorp friends at the pub",
165
+ "I met my two friends at the pub",
166
+ "Bio-Techne's genomic tools include advanced tissue-based in-situ hybridization assays sold under the ACD brand as well as a portfolio of assays for prostate cancer diagnosis ",
167
+ "There are no treatment options specifically indicated for ACD and physicians must utilize agents approved for other dermatology conditions", "As ACD has been implicated in apoptosis-resistant glioblastoma (GBM), there is a high medical need for identifying novel ACD-inducing drugs ",
168
+ "Located in the heart of Dublin , in the family home of acclaimed writer Oscar Wilde , ACD provides the perfect backdrop to inspire Irish (and Irish-at-heart) students to excel in business and the arts",
169
+ "Patients treated with anticancer chemotherapy drugs ( ACD ) are vulnerable to infectious diseases due to immunosuppression and to the direct impact of ACD on their intestinal microbiota ",
170
+ "In the LASOR trial , increasing daily imatinib dose from 400 to 600mg induced MMR at 12 and 24 months in 25% and 36% of the patients, respectively, who had suboptimal cytogenetic responses ",
171
+ "The sky turned dark in advance of the storm that was coming from the east ",
172
+ "She loves to watch Sunday afternoon football with her family ",
173
+ "Paul Erdos died at 83 "
174
  ]
175
 
176
 
177
  sent_arr_masked = [
178
+ "Lou Gehrig:__entity__ who works for XCorp:__entity__ and lives in New:__entity__ York:__entity__ suffers from Parkinson's:__entity__ ",
179
+ "Parkinson:__entity__ who works for XCorp:__entity__ and lives in New:__entity__ York:__entity__ suffers from Lou Gehrig's:__entity__",
180
+ "lou:__entity__ gehrig:__entity__ was diagnosed with Parkinson's:__entity__ ",
181
+ "A eGFR:__entity__ below 60 indicates chronic kidney disease",
182
+ "Overexpression of EGFR:__entity__ occurs across a wide range of different cancers",
183
+ "Stanford:__entity__ called",
184
+ "He was diagnosed with non:__entity__ small:__entity__ cell:__entity__ lung:__entity__ cancer:__entity__",
185
+ "I met my girl:__entity__ friends at the pub ",
186
+ "I met my New:__entity__ York:__entity__ friends at the pub",
187
+ "I met my XCorp:__entity__ friends at the pub",
188
+ "I met my two:__entity__ friends at the pub",
189
+ "Bio-Techne's genomic tools include advanced tissue-based in-situ hybridization assays sold under the ACD:__entity__ brand as well as a portfolio of assays for prostate cancer diagnosis ",
190
+ "There are no treatment options specifically indicated for ACD:__entity__ and physicians must utilize agents approved for other dermatology conditions",
191
+ "As ACD:__entity__ has been implicated in apoptosis-resistant glioblastoma (GBM), there is a high medical need for identifying novel ACD-inducing drugs ",
192
+ "Located in the heart of Dublin , in the family home of acclaimed writer Oscar Wilde , ACD:__entity__ provides the perfect backdrop to inspire Irish (and Irish-at-heart) students to excel in business and the arts",
193
+ "Patients treated with anticancer chemotherapy drugs ( ACD:__entity__ ) are vulnerable to infectious diseases due to immunosuppression and to the direct impact of ACD on their intestinal microbiota ",
194
+ "In the LASOR:__entity__ trial:__entity__ , increasing daily imatinib dose from 400 to 600mg induced MMR at 12 and 24 months in 25% and 36% of the patients, respectively, who had suboptimal cytogenetic responses ",
195
+ "The sky turned dark:__entity__ in advance of the storm that was coming from the east ",
196
+ "She loves to watch Sunday afternoon football:__entity__ with her family ",
197
+ "Paul:__entity__ Erdos:__entity__ died at 83:__entity__ "
198
  ]
199
 
200
  def init_selectbox():
201
  return st.selectbox(
202
  'Choose any of the sentences in pull-down below',
203
  sent_arr,key='my_choice')
204
+
205
+
206
  def on_text_change():
207
  text = st.session_state.my_text
208
  print("in callback: " + text)
 
212
  try:
213
 
214
  init_session_states()
215
+
216
  st.markdown("<h3 style='text-align: center;'>NER using pretrained models with <a href='https://ajitrajasekharan.github.io/2021/01/02/my-first-post.html'>no fine tuning</a></h3>", unsafe_allow_html=True)
217
  #st.markdown("""
218
  #<h3 style="font-size:16px; color: #ff0000; text-align: center"><b>App under construction... (not in working condition yet)</b></h3>
219
  #""", unsafe_allow_html=True)
220
+
221
+
222
  st.markdown("""
223
  <p style="text-align:center;"><img src="https://ajitrajasekharan.github.io/images/1.png" width="700"></p>
224
  <br/>
225
  <br/>
226
  """, unsafe_allow_html=True)
227
+
228
  st.write("This app uses 3 models. Two Pretrained Bert models (**no fine tuning**) and a POS tagger")
229
+
230
+
231
  with st.form('my_form'):
232
  selected_sentence = init_selectbox()
233
  text_input = st.text_area(label='Type any sentence below',value="")
 
244
  with display_area.container():
245
  st.text(f"prediction took {time.time() - start:.2f}s")
246
  st.json(results)
247
+
248
+
249
+
250
+
251
 
252
  #input_text = st.text_area(
253
  # label="Type any sentence",
254
  # on_change=on_text_change,key='my_text'
255
  # )
256
+
257
  st.markdown("""
258
  <small style="font-size:16px; color: #7f7f7f; text-align: left"><br/><br/>Models used: <br/>(1) <a href='https://huggingface.co/ajitrajasekharan/biomedical' target='_blank'>Biomedical model</a> pretrained on Pubmed,Clinical trials and BookCorpus subset.<br/>(2) Bert-base-cased (for PHI entities - Person/location/organization etc.)<br/>(3) Flair POS tagger</small>
259
  #""", unsafe_allow_html=True)
 
265
  """, unsafe_allow_html=True)
266
 
267
  except Exception as e:
268
+ print("Some error occurred in main")
269
+ st.exception(e)
270
+
271
  if __name__ == "__main__":
272
  main()