Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,11 +1,5 @@
|
|
1 |
import os
|
2 |
|
3 |
-
os.environ["CUDA_VISIBLE_DEVICES"] = "1,6" # to use the GPUs 3,4 only
|
4 |
-
|
5 |
-
os.environ["HF_HUB_CACHE"] = "/eos/jeodpp/home/users/consose/cache/huggingface/hub"
|
6 |
-
os.environ["HUGGINGFACE_HUB_CACHE"] = "/eos/jeodpp/home/users/consose/cache/huggingface/hub"
|
7 |
-
os.environ["HF_HOME"] = "/eos/jeodpp/home/users/consose/cache/huggingface/hub"
|
8 |
-
|
9 |
from transformers import file_utils
|
10 |
print(file_utils.default_cache_path)
|
11 |
|
@@ -21,11 +15,11 @@ from transformers.pipelines.pt_utils import KeyDataset
|
|
21 |
|
22 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
23 |
|
24 |
-
|
25 |
-
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
|
26 |
|
27 |
import torch
|
28 |
-
torch.cuda.empty_cache() # Clear cache ot torch
|
29 |
|
30 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
31 |
print(f"Device: {device}...")
|
@@ -353,12 +347,13 @@ def annotate(df, args, pipeInner, tokenizerGliner, modelGliner, modelGlinerBio,
|
|
353 |
#https://data.bioontology.org/documentation#nav_annotator
|
354 |
#https://bioportal.bioontology.org/annotatorplus
|
355 |
|
356 |
-
key_bioportal = ""
|
357 |
-
if args.bioportalkey_filename:
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
|
|
362 |
df_annot = pd.DataFrame()
|
363 |
for drm_idx, row in tqdm(df.iterrows()):
|
364 |
df_BioPortalAnnotation=process_row_BioPortal_api(args, key_bioportal, row)
|
@@ -910,11 +905,12 @@ def getUrlBioAndAllOtherBioConcepts(word, args, key_virtuoso, cache_map_virtuoso
|
|
910 |
entityBioeUrl = None
|
911 |
ALLURIScontext = []
|
912 |
|
913 |
-
key_bioportal = ""
|
914 |
-
if args.bioportalkey_filename:
|
915 |
-
|
916 |
-
|
917 |
-
|
|
|
918 |
|
919 |
# Check if args.KG_restriction exists and is not empty
|
920 |
if getattr(args, 'KG_restriction', None):
|
@@ -1992,9 +1988,9 @@ def nerBio(text, ModelsSelection, CategoriesSelection, ScoreFilt, EntityLinking,
|
|
1992 |
parser.add_argument("--num_cores_Gliner", type=int, default=num_cores_Gliner_forDemo, help="parallel processing for Gliner annotation") # 0 means use the GPU for Gliner !
|
1993 |
|
1994 |
parser.add_argument("--entity_linking", type=str, default=EnableNEL, help="whether to make entities linking or not")
|
1995 |
-
parser.add_argument("--geonameskey_filename", type=str, default="
|
1996 |
-
parser.add_argument("--virtuosokey_filename", type=str, default="
|
1997 |
-
parser.add_argument("--bioportalkey_filename", type=str, default="
|
1998 |
|
1999 |
parser.add_argument("--USE_CACHE", type=str, default="False", help="whether to use cache for the NER and NEL tasks or not")
|
2000 |
parser.add_argument("--num_cores_eLinking", type=int, default=1, help="parallel processing for the entity linking process")
|
@@ -2126,11 +2122,12 @@ def nerBio(text, ModelsSelection, CategoriesSelection, ScoreFilt, EntityLinking,
|
|
2126 |
else:
|
2127 |
cache_map_geonames = {}
|
2128 |
|
2129 |
-
key_geonames = ""
|
2130 |
-
if args.geonameskey_filename:
|
2131 |
-
|
2132 |
-
|
2133 |
-
|
|
|
2134 |
|
2135 |
cache_map_virtuoso = None
|
2136 |
if strtobool(args.USE_CACHE):
|
@@ -2141,11 +2138,12 @@ def nerBio(text, ModelsSelection, CategoriesSelection, ScoreFilt, EntityLinking,
|
|
2141 |
else:
|
2142 |
cache_map_virtuoso = {}
|
2143 |
|
2144 |
-
key_virtuoso = ""
|
2145 |
-
if args.virtuosokey_filename:
|
2146 |
-
|
2147 |
-
|
2148 |
-
|
|
|
2149 |
|
2150 |
df_annotated_combined, cache_map_geonames_AFTER, cache_map_virtuoso_AFTER, load_map_query_input_output_AFTER = elinking(df_annotated_combined,
|
2151 |
text_splitter, args, key_geonames,
|
|
|
1 |
import os
|
2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
from transformers import file_utils
|
4 |
print(file_utils.default_cache_path)
|
5 |
|
|
|
15 |
|
16 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
17 |
|
18 |
+
##os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
|
19 |
+
#os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
|
20 |
|
21 |
import torch
|
22 |
+
#torch.cuda.empty_cache() # Clear cache ot torch
|
23 |
|
24 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
25 |
print(f"Device: {device}...")
|
|
|
347 |
#https://data.bioontology.org/documentation#nav_annotator
|
348 |
#https://bioportal.bioontology.org/annotatorplus
|
349 |
|
350 |
+
#key_bioportal = ""
|
351 |
+
#if args.bioportalkey_filename:
|
352 |
+
# fkeyname = args.bioportalkey_filename
|
353 |
+
# with open(fkeyname) as f:
|
354 |
+
# key_bioportal = f.read()
|
355 |
+
key_bioportal = os.environ['key_bioportal']
|
356 |
+
|
357 |
df_annot = pd.DataFrame()
|
358 |
for drm_idx, row in tqdm(df.iterrows()):
|
359 |
df_BioPortalAnnotation=process_row_BioPortal_api(args, key_bioportal, row)
|
|
|
905 |
entityBioeUrl = None
|
906 |
ALLURIScontext = []
|
907 |
|
908 |
+
#key_bioportal = ""
|
909 |
+
#if args.bioportalkey_filename:
|
910 |
+
# fkeyname = args.bioportalkey_filename
|
911 |
+
# with open(fkeyname) as f:
|
912 |
+
# key_bioportal = f.read()
|
913 |
+
key_bioportal = os.environ['key_bioportal']
|
914 |
|
915 |
# Check if args.KG_restriction exists and is not empty
|
916 |
if getattr(args, 'KG_restriction', None):
|
|
|
1988 |
parser.add_argument("--num_cores_Gliner", type=int, default=num_cores_Gliner_forDemo, help="parallel processing for Gliner annotation") # 0 means use the GPU for Gliner !
|
1989 |
|
1990 |
parser.add_argument("--entity_linking", type=str, default=EnableNEL, help="whether to make entities linking or not")
|
1991 |
+
parser.add_argument("--geonameskey_filename", type=str, default="", help="file location where it is stored the geonames api key")
|
1992 |
+
parser.add_argument("--virtuosokey_filename", type=str, default="", help="file location where it is stored the virtuoso endpoint dba pwd")
|
1993 |
+
parser.add_argument("--bioportalkey_filename", type=str, default="", help="file location where it is stored the NCBO BioPortal api key")
|
1994 |
|
1995 |
parser.add_argument("--USE_CACHE", type=str, default="False", help="whether to use cache for the NER and NEL tasks or not")
|
1996 |
parser.add_argument("--num_cores_eLinking", type=int, default=1, help="parallel processing for the entity linking process")
|
|
|
2122 |
else:
|
2123 |
cache_map_geonames = {}
|
2124 |
|
2125 |
+
#key_geonames = ""
|
2126 |
+
#if args.geonameskey_filename:
|
2127 |
+
# fkeyname = args.geonameskey_filename
|
2128 |
+
# with open(fkeyname) as f:
|
2129 |
+
# key_geonames = f.read()
|
2130 |
+
key_geonames = os.environ['key_geonames']
|
2131 |
|
2132 |
cache_map_virtuoso = None
|
2133 |
if strtobool(args.USE_CACHE):
|
|
|
2138 |
else:
|
2139 |
cache_map_virtuoso = {}
|
2140 |
|
2141 |
+
#key_virtuoso = ""
|
2142 |
+
#if args.virtuosokey_filename:
|
2143 |
+
# fkeyname = args.virtuosokey_filename
|
2144 |
+
# with open(fkeyname) as f:
|
2145 |
+
# key_virtuoso = f.read()
|
2146 |
+
key_virtuoso = os.environ['key_virtuoso']
|
2147 |
|
2148 |
df_annotated_combined, cache_map_geonames_AFTER, cache_map_virtuoso_AFTER, load_map_query_input_output_AFTER = elinking(df_annotated_combined,
|
2149 |
text_splitter, args, key_geonames,
|