Spaces:
Runtime error
Runtime error
Commit
·
77a72db
1
Parent(s):
44803cb
Update extract_abs.py
Browse files- extract_abs.py +24 -21
extract_abs.py
CHANGED
|
@@ -302,27 +302,30 @@ def streamlit_extraction(search_term:Union[int,str], maxResults:int, filtering:s
|
|
| 302 |
|
| 303 |
#Gather title+abstracts into a dictionary {pmid:abstract}
|
| 304 |
pmid_abs = classify_abs.search_getAbs(search_term_list, maxResults, filtering)
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
|
|
|
|
|
|
|
|
|
| 326 |
|
| 327 |
#Identical to search_term_extraction, except it returns a JSON object instead of a df
|
| 328 |
def API_extraction(search_term:Union[int,str], maxResults:int, filtering:str, #for abstract search
|
|
|
|
| 302 |
|
| 303 |
#Gather title+abstracts into a dictionary {pmid:abstract}
|
| 304 |
pmid_abs = classify_abs.search_getAbs(search_term_list, maxResults, filtering)
|
| 305 |
+
if len(pmid_abs)==0:
|
| 306 |
+
st.error('No results were gathered. Enter a new search term.')
|
| 307 |
+
else:
|
| 308 |
+
st.write("Gathered " +str(len(pmid_abs))+" PubMed IDs. Classifying and extracting epidemiology information...")
|
| 309 |
+
|
| 310 |
+
i = 0
|
| 311 |
+
my_bar = st.progress(i)
|
| 312 |
+
percent_at_step = 100/len(pmid_abs)
|
| 313 |
+
for pmid, abstract in pmid_abs.items():
|
| 314 |
+
epi_prob, isEpi = classify_abs.getTextPredictions(abstract, classify_model_vars)
|
| 315 |
+
if isEpi:
|
| 316 |
+
#Preprocessing Functions for Extraction
|
| 317 |
+
sentences = str2sents(abstract)
|
| 318 |
+
model_outputs = [NER_pipeline(sent) for sent in sentences]
|
| 319 |
+
extraction = parse_info(sentences, model_outputs, entity_classes, extract_diseases, GARD_dict, max_length)
|
| 320 |
+
if extraction:
|
| 321 |
+
extraction.update({'PMID':pmid, 'ABSTRACT':abstract, 'EPI_PROB':epi_prob, 'IsEpi':isEpi})
|
| 322 |
+
#Slow dataframe update
|
| 323 |
+
results = results.append(extraction, ignore_index=True)
|
| 324 |
+
i+=1
|
| 325 |
+
my_bar.progress(round(i*percent_at_step/100,1))
|
| 326 |
+
|
| 327 |
+
st.write(len(results),'abstracts classified as epidemiological.')
|
| 328 |
+
return results.sort_values('EPI_PROB', ascending=False)
|
| 329 |
|
| 330 |
#Identical to search_term_extraction, except it returns a JSON object instead of a df
|
| 331 |
def API_extraction(search_term:Union[int,str], maxResults:int, filtering:str, #for abstract search
|