jattokatarratto commited on
Commit
8a237d2
·
verified ·
1 Parent(s): 9eeed4f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -33
app.py CHANGED
@@ -1,11 +1,5 @@
1
  import os
2
 
3
- os.environ["CUDA_VISIBLE_DEVICES"] = "1,6" # to use the GPUs 3,4 only
4
-
5
- os.environ["HF_HUB_CACHE"] = "/eos/jeodpp/home/users/consose/cache/huggingface/hub"
6
- os.environ["HUGGINGFACE_HUB_CACHE"] = "/eos/jeodpp/home/users/consose/cache/huggingface/hub"
7
- os.environ["HF_HOME"] = "/eos/jeodpp/home/users/consose/cache/huggingface/hub"
8
-
9
  from transformers import file_utils
10
  print(file_utils.default_cache_path)
11
 
@@ -21,11 +15,11 @@ from transformers.pipelines.pt_utils import KeyDataset
21
 
22
  from concurrent.futures import ThreadPoolExecutor, as_completed
23
 
24
- #os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
25
- os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
26
 
27
  import torch
28
- torch.cuda.empty_cache() # Clear cache ot torch
29
 
30
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
31
  print(f"Device: {device}...")
@@ -353,12 +347,13 @@ def annotate(df, args, pipeInner, tokenizerGliner, modelGliner, modelGlinerBio,
353
  #https://data.bioontology.org/documentation#nav_annotator
354
  #https://bioportal.bioontology.org/annotatorplus
355
 
356
- key_bioportal = ""
357
- if args.bioportalkey_filename:
358
- fkeyname = args.bioportalkey_filename
359
- with open(fkeyname) as f:
360
- key_bioportal = f.read()
361
-
 
362
  df_annot = pd.DataFrame()
363
  for drm_idx, row in tqdm(df.iterrows()):
364
  df_BioPortalAnnotation=process_row_BioPortal_api(args, key_bioportal, row)
@@ -910,11 +905,12 @@ def getUrlBioAndAllOtherBioConcepts(word, args, key_virtuoso, cache_map_virtuoso
910
  entityBioeUrl = None
911
  ALLURIScontext = []
912
 
913
- key_bioportal = ""
914
- if args.bioportalkey_filename:
915
- fkeyname = args.bioportalkey_filename
916
- with open(fkeyname) as f:
917
- key_bioportal = f.read()
 
918
 
919
  # Check if args.KG_restriction exists and is not empty
920
  if getattr(args, 'KG_restriction', None):
@@ -1992,9 +1988,9 @@ def nerBio(text, ModelsSelection, CategoriesSelection, ScoreFilt, EntityLinking,
1992
  parser.add_argument("--num_cores_Gliner", type=int, default=num_cores_Gliner_forDemo, help="parallel processing for Gliner annotation") # 0 means use the GPU for Gliner !
1993
 
1994
  parser.add_argument("--entity_linking", type=str, default=EnableNEL, help="whether to make entities linking or not")
1995
- parser.add_argument("--geonameskey_filename", type=str, default="GEONAMES-API.key", help="file location where it is stored the geonames api key")
1996
- parser.add_argument("--virtuosokey_filename", type=str, default="VIRTUOSO-dba.key", help="file location where it is stored the virtuoso endpoint dba pwd")
1997
- parser.add_argument("--bioportalkey_filename", type=str, default="NCBO-BioPortal.key", help="file location where it is stored the NCBO BioPortal api key")
1998
 
1999
  parser.add_argument("--USE_CACHE", type=str, default="False", help="whether to use cache for the NER and NEL tasks or not")
2000
  parser.add_argument("--num_cores_eLinking", type=int, default=1, help="parallel processing for the entity linking process")
@@ -2126,11 +2122,12 @@ def nerBio(text, ModelsSelection, CategoriesSelection, ScoreFilt, EntityLinking,
2126
  else:
2127
  cache_map_geonames = {}
2128
 
2129
- key_geonames = ""
2130
- if args.geonameskey_filename:
2131
- fkeyname = args.geonameskey_filename
2132
- with open(fkeyname) as f:
2133
- key_geonames = f.read()
 
2134
 
2135
  cache_map_virtuoso = None
2136
  if strtobool(args.USE_CACHE):
@@ -2141,11 +2138,12 @@ def nerBio(text, ModelsSelection, CategoriesSelection, ScoreFilt, EntityLinking,
2141
  else:
2142
  cache_map_virtuoso = {}
2143
 
2144
- key_virtuoso = ""
2145
- if args.virtuosokey_filename:
2146
- fkeyname = args.virtuosokey_filename
2147
- with open(fkeyname) as f:
2148
- key_virtuoso = f.read()
 
2149
 
2150
  df_annotated_combined, cache_map_geonames_AFTER, cache_map_virtuoso_AFTER, load_map_query_input_output_AFTER = elinking(df_annotated_combined,
2151
  text_splitter, args, key_geonames,
 
1
  import os
2
 
 
 
 
 
 
 
3
  from transformers import file_utils
4
  print(file_utils.default_cache_path)
5
 
 
15
 
16
  from concurrent.futures import ThreadPoolExecutor, as_completed
17
 
18
+ ##os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
19
+ #os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
20
 
21
  import torch
22
+ #torch.cuda.empty_cache() # Clear cache ot torch
23
 
24
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
25
  print(f"Device: {device}...")
 
347
  #https://data.bioontology.org/documentation#nav_annotator
348
  #https://bioportal.bioontology.org/annotatorplus
349
 
350
+ #key_bioportal = ""
351
+ #if args.bioportalkey_filename:
352
+ # fkeyname = args.bioportalkey_filename
353
+ # with open(fkeyname) as f:
354
+ # key_bioportal = f.read()
355
+ key_bioportal = os.environ['key_bioportal']
356
+
357
  df_annot = pd.DataFrame()
358
  for drm_idx, row in tqdm(df.iterrows()):
359
  df_BioPortalAnnotation=process_row_BioPortal_api(args, key_bioportal, row)
 
905
  entityBioeUrl = None
906
  ALLURIScontext = []
907
 
908
+ #key_bioportal = ""
909
+ #if args.bioportalkey_filename:
910
+ # fkeyname = args.bioportalkey_filename
911
+ # with open(fkeyname) as f:
912
+ # key_bioportal = f.read()
913
+ key_bioportal = os.environ['key_bioportal']
914
 
915
  # Check if args.KG_restriction exists and is not empty
916
  if getattr(args, 'KG_restriction', None):
 
1988
  parser.add_argument("--num_cores_Gliner", type=int, default=num_cores_Gliner_forDemo, help="parallel processing for Gliner annotation") # 0 means use the GPU for Gliner !
1989
 
1990
  parser.add_argument("--entity_linking", type=str, default=EnableNEL, help="whether to make entities linking or not")
1991
+ parser.add_argument("--geonameskey_filename", type=str, default="", help="file location where it is stored the geonames api key")
1992
+ parser.add_argument("--virtuosokey_filename", type=str, default="", help="file location where it is stored the virtuoso endpoint dba pwd")
1993
+ parser.add_argument("--bioportalkey_filename", type=str, default="", help="file location where it is stored the NCBO BioPortal api key")
1994
 
1995
  parser.add_argument("--USE_CACHE", type=str, default="False", help="whether to use cache for the NER and NEL tasks or not")
1996
  parser.add_argument("--num_cores_eLinking", type=int, default=1, help="parallel processing for the entity linking process")
 
2122
  else:
2123
  cache_map_geonames = {}
2124
 
2125
+ #key_geonames = ""
2126
+ #if args.geonameskey_filename:
2127
+ # fkeyname = args.geonameskey_filename
2128
+ # with open(fkeyname) as f:
2129
+ # key_geonames = f.read()
2130
+ key_geonames = os.environ['key_geonames']
2131
 
2132
  cache_map_virtuoso = None
2133
  if strtobool(args.USE_CACHE):
 
2138
  else:
2139
  cache_map_virtuoso = {}
2140
 
2141
+ #key_virtuoso = ""
2142
+ #if args.virtuosokey_filename:
2143
+ # fkeyname = args.virtuosokey_filename
2144
+ # with open(fkeyname) as f:
2145
+ # key_virtuoso = f.read()
2146
+ key_virtuoso = os.environ['key_virtuoso']
2147
 
2148
  df_annotated_combined, cache_map_geonames_AFTER, cache_map_virtuoso_AFTER, load_map_query_input_output_AFTER = elinking(df_annotated_combined,
2149
  text_splitter, args, key_geonames,