Spaces:

jrc-ai
/

MultiNER-simplified

Running

App Files Files Community

jattokatarratto commited on Feb 4

Commit

8a237d2

verified ·

1 Parent(s): 9eeed4f

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -33

app.py CHANGED Viewed

@@ -1,11 +1,5 @@
 import os
-os.environ["CUDA_VISIBLE_DEVICES"] = "1,6" # to use the GPUs 3,4 only
-os.environ["HF_HUB_CACHE"] = "/eos/jeodpp/home/users/consose/cache/huggingface/hub"
-os.environ["HUGGINGFACE_HUB_CACHE"] = "/eos/jeodpp/home/users/consose/cache/huggingface/hub"
-os.environ["HF_HOME"] = "/eos/jeodpp/home/users/consose/cache/huggingface/hub"
 from transformers import file_utils
 print(file_utils.default_cache_path)
@@ -21,11 +15,11 @@ from transformers.pipelines.pt_utils import KeyDataset
 from concurrent.futures import ThreadPoolExecutor, as_completed
-#os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
-os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
 import torch
-torch.cuda.empty_cache()  # Clear cache ot torch
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 print(f"Device: {device}...")
@@ -353,12 +347,13 @@ def annotate(df, args, pipeInner, tokenizerGliner, modelGliner, modelGlinerBio,
         #https://data.bioontology.org/documentation#nav_annotator
         #https://bioportal.bioontology.org/annotatorplus
-        key_bioportal = ""
-        if args.bioportalkey_filename:
-            fkeyname = args.bioportalkey_filename
-            with open(fkeyname) as f:
-                key_bioportal = f.read()
         df_annot = pd.DataFrame()
         for drm_idx, row in tqdm(df.iterrows()):
             df_BioPortalAnnotation=process_row_BioPortal_api(args, key_bioportal, row)
@@ -910,11 +905,12 @@ def getUrlBioAndAllOtherBioConcepts(word, args, key_virtuoso, cache_map_virtuoso
     entityBioeUrl = None
     ALLURIScontext = []
-    key_bioportal = ""
-    if args.bioportalkey_filename:
-        fkeyname = args.bioportalkey_filename
-        with open(fkeyname) as f:
-            key_bioportal = f.read()
     # Check if args.KG_restriction exists and is not empty
     if getattr(args, 'KG_restriction', None):
@@ -1992,9 +1988,9 @@ def nerBio(text, ModelsSelection, CategoriesSelection, ScoreFilt, EntityLinking,
     parser.add_argument("--num_cores_Gliner", type=int, default=num_cores_Gliner_forDemo, help="parallel processing for Gliner annotation")  # 0 means use the GPU for Gliner !
     parser.add_argument("--entity_linking", type=str, default=EnableNEL, help="whether to make entities linking or not")
-    parser.add_argument("--geonameskey_filename", type=str, default="GEONAMES-API.key", help="file location where it is stored the geonames api key")
-    parser.add_argument("--virtuosokey_filename", type=str, default="VIRTUOSO-dba.key", help="file location where it is stored the virtuoso endpoint dba pwd")
-    parser.add_argument("--bioportalkey_filename", type=str, default="NCBO-BioPortal.key", help="file location where it is stored the NCBO  BioPortal api key")
     parser.add_argument("--USE_CACHE", type=str, default="False", help="whether to use cache for the NER and NEL tasks or not")
     parser.add_argument("--num_cores_eLinking", type=int, default=1, help="parallel processing for the entity linking process")
@@ -2126,11 +2122,12 @@ def nerBio(text, ModelsSelection, CategoriesSelection, ScoreFilt, EntityLinking,
                 else:
                     cache_map_geonames = {}
-            key_geonames = ""
-            if args.geonameskey_filename:
-                fkeyname = args.geonameskey_filename
-                with open(fkeyname) as f:
-                    key_geonames = f.read()
             cache_map_virtuoso = None
             if strtobool(args.USE_CACHE):
@@ -2141,11 +2138,12 @@ def nerBio(text, ModelsSelection, CategoriesSelection, ScoreFilt, EntityLinking,
                 else:
                     cache_map_virtuoso = {}
-            key_virtuoso = ""
-            if args.virtuosokey_filename:
-                fkeyname = args.virtuosokey_filename
-                with open(fkeyname) as f:
-                    key_virtuoso = f.read()
             df_annotated_combined, cache_map_geonames_AFTER, cache_map_virtuoso_AFTER, load_map_query_input_output_AFTER = elinking(df_annotated_combined,
                                                                                                                                  text_splitter, args, key_geonames,

 import os
 from transformers import file_utils
 print(file_utils.default_cache_path)
 from concurrent.futures import ThreadPoolExecutor, as_completed
+##os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
+#os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
 import torch
+#torch.cuda.empty_cache()  # Clear cache ot torch
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 print(f"Device: {device}...")
         #https://data.bioontology.org/documentation#nav_annotator
         #https://bioportal.bioontology.org/annotatorplus
+        #key_bioportal = ""
+        #if args.bioportalkey_filename:
+        #    fkeyname = args.bioportalkey_filename
+        #    with open(fkeyname) as f:
+        #        key_bioportal = f.read()
+        key_bioportal = os.environ['key_bioportal']
         df_annot = pd.DataFrame()
         for drm_idx, row in tqdm(df.iterrows()):
             df_BioPortalAnnotation=process_row_BioPortal_api(args, key_bioportal, row)
     entityBioeUrl = None
     ALLURIScontext = []
+    #key_bioportal = ""
+    #if args.bioportalkey_filename:
+    #    fkeyname = args.bioportalkey_filename
+    #    with open(fkeyname) as f:
+    #        key_bioportal = f.read()
+    key_bioportal = os.environ['key_bioportal']
     # Check if args.KG_restriction exists and is not empty
     if getattr(args, 'KG_restriction', None):
     parser.add_argument("--num_cores_Gliner", type=int, default=num_cores_Gliner_forDemo, help="parallel processing for Gliner annotation")  # 0 means use the GPU for Gliner !
     parser.add_argument("--entity_linking", type=str, default=EnableNEL, help="whether to make entities linking or not")
+    parser.add_argument("--geonameskey_filename", type=str, default="", help="file location where it is stored the geonames api key")
+    parser.add_argument("--virtuosokey_filename", type=str, default="", help="file location where it is stored the virtuoso endpoint dba pwd")
+    parser.add_argument("--bioportalkey_filename", type=str, default="", help="file location where it is stored the NCBO  BioPortal api key")
     parser.add_argument("--USE_CACHE", type=str, default="False", help="whether to use cache for the NER and NEL tasks or not")
     parser.add_argument("--num_cores_eLinking", type=int, default=1, help="parallel processing for the entity linking process")
                 else:
                     cache_map_geonames = {}
+            #key_geonames = ""
+            #if args.geonameskey_filename:
+            #    fkeyname = args.geonameskey_filename
+            #    with open(fkeyname) as f:
+            #        key_geonames = f.read()
+            key_geonames = os.environ['key_geonames']
             cache_map_virtuoso = None
             if strtobool(args.USE_CACHE):
                 else:
                     cache_map_virtuoso = {}
+            #key_virtuoso = ""
+            #if args.virtuosokey_filename:
+            #    fkeyname = args.virtuosokey_filename
+            #    with open(fkeyname) as f:
+            #        key_virtuoso = f.read()
+            key_virtuoso = os.environ['key_virtuoso']
             df_annotated_combined, cache_map_geonames_AFTER, cache_map_virtuoso_AFTER, load_map_query_input_output_AFTER = elinking(df_annotated_combined,
                                                                                                                                  text_splitter, args, key_geonames,