Consoli Sergio commited on
Commit
5a9842d
·
1 Parent(s): 193f79d

other sync changes

Browse files
Files changed (2) hide show
  1. app-demo-myMultiNER.py +20 -12
  2. nerBio.py +16 -12
app-demo-myMultiNER.py CHANGED
@@ -1,13 +1,13 @@
1
  import os
2
 
3
- #os.environ["CUDA_VISIBLE_DEVICES"] = "1,6" # to use the GPUs 3,4 only
4
  #
5
- #os.environ["HF_HUB_CACHE"] = "/eos/jeodpp/home/users/consose/cache/huggingface/hub"
6
- #os.environ["HUGGINGFACE_HUB_CACHE"] = "/eos/jeodpp/home/users/consose/cache/huggingface/hub"
7
- #os.environ["HF_HOME"] = "/eos/jeodpp/home/users/consose/cache/huggingface/hub"
8
 
9
  from transformers import file_utils
10
- #print(file_utils.default_cache_path)
11
 
12
  import pandas as pd
13
  from tqdm import tqdm
@@ -19,12 +19,12 @@ from collections import Counter
19
  from transformers import pipeline, AutoTokenizer
20
 
21
  #os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
22
- #os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
23
 
24
  #import html
25
 
26
  import torch
27
- #torch.cuda.empty_cache() # Clear cache ot torch
28
 
29
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
30
  print(f"Device: {device}...")
@@ -48,6 +48,14 @@ from nerBio import annotate, entitiesFusion, is_cross_inside, elinking
48
  from llmqueryNer import call_model, call_model_with_caching, process_list, setup_gptjrc, api_call_gptjrc, model_list_gptjrc
49
 
50
 
 
 
 
 
 
 
 
 
51
 
52
 
53
 
@@ -68,8 +76,8 @@ examples = [
68
 
69
 
70
 
71
- #models_List = ["FacebookAI/xlm-roberta-large-finetuned-conll03-english", "Babelscape/wikineural-multilingual-ner", "blaze999/Medical-NER", "urchade/gliner_large-v2.1", "urchade/gliner_large_bio-v0.1", "NCBO/BioPortal" ] # "urchade/gliner_large-v2.1", "knowledgator/gliner-multitask-large-v0.5"
72
- models_List = ["Babelscape/wikineural-multilingual-ner", "urchade/gliner_large-v2.1", "NCBO/BioPortal" ] # "urchade/gliner_large-v2.1", "knowledgator/gliner-multitask-large-v0.5"
73
  #models_List = ["NCBO/BioPortal" ]
74
 
75
  #categories_List = ["MED","LOC","PER","ORG","DATE","MISC"]
@@ -216,7 +224,7 @@ def nerBio(text, ModelsSelection, CategoriesSelection, ScoreFilt, EntityLinking,
216
  help="List of ontologies to which restrict the entity linking task.")
217
  #consose 20250502:
218
  if Counter(KGchoices) == Counter(POSSIBLE_KGchoices_List):
219
- parser.add_argument("--USE_CACHE", type=str, default="False",
220
  help="whether to use cache for the NER and NEL tasks or not")
221
  else:
222
  #print("Lists do not have the same elements")
@@ -384,7 +392,7 @@ def nerBio(text, ModelsSelection, CategoriesSelection, ScoreFilt, EntityLinking,
384
  cache_map_geonames = {}
385
 
386
  key_geonames = ""
387
- if args.geonameskey_filename:
388
  fkeyname = args.geonameskey_filename
389
  with open(fkeyname) as f:
390
  key_geonames = f.read()
@@ -401,7 +409,7 @@ def nerBio(text, ModelsSelection, CategoriesSelection, ScoreFilt, EntityLinking,
401
  cache_map_virtuoso = {}
402
 
403
  key_virtuoso = ""
404
- if args.virtuosokey_filename:
405
  fkeyname = args.virtuosokey_filename
406
  with open(fkeyname) as f:
407
  key_virtuoso = f.read()
 
1
  import os
2
 
3
+ # os.environ["CUDA_VISIBLE_DEVICES"] = "1,6" # to use the GPUs 3,4 only
4
  #
5
+ # os.environ["HF_HUB_CACHE"] = "/eos/jeodpp/home/users/consose/cache/huggingface/hub"
6
+ # os.environ["HUGGINGFACE_HUB_CACHE"] = "/eos/jeodpp/home/users/consose/cache/huggingface/hub"
7
+ # os.environ["HF_HOME"] = "/eos/jeodpp/home/users/consose/cache/huggingface/hub"
8
 
9
  from transformers import file_utils
10
+ print(file_utils.default_cache_path)
11
 
12
  import pandas as pd
13
  from tqdm import tqdm
 
19
  from transformers import pipeline, AutoTokenizer
20
 
21
  #os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
22
+ os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
23
 
24
  #import html
25
 
26
  import torch
27
+ torch.cuda.empty_cache() # Clear cache ot torch
28
 
29
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
30
  print(f"Device: {device}...")
 
48
  from llmqueryNer import call_model, call_model_with_caching, process_list, setup_gptjrc, api_call_gptjrc, model_list_gptjrc
49
 
50
 
51
+ from joblib import Memory
52
+
53
+ cachedir = 'cached'
54
+ mem = Memory(cachedir, verbose=False)
55
+
56
+ # this is to completely delete the cache:
57
+ # mem.clear(warn=False)
58
+
59
 
60
 
61
 
 
76
 
77
 
78
 
79
+ models_List = ["FacebookAI/xlm-roberta-large-finetuned-conll03-english", "Babelscape/wikineural-multilingual-ner", "blaze999/Medical-NER", "urchade/gliner_large-v2.1", "urchade/gliner_large_bio-v0.1", "NCBO/BioPortal" ] # "urchade/gliner_large-v2.1", "knowledgator/gliner-multitask-large-v0.5"
80
+ #models_List = ["Babelscape/wikineural-multilingual-ner", "urchade/gliner_large-v2.1", "NCBO/BioPortal" ] # "urchade/gliner_large-v2.1", "knowledgator/gliner-multitask-large-v0.5"
81
  #models_List = ["NCBO/BioPortal" ]
82
 
83
  #categories_List = ["MED","LOC","PER","ORG","DATE","MISC"]
 
224
  help="List of ontologies to which restrict the entity linking task.")
225
  #consose 20250502:
226
  if Counter(KGchoices) == Counter(POSSIBLE_KGchoices_List):
227
+ parser.add_argument("--USE_CACHE", type=str, default="True",
228
  help="whether to use cache for the NER and NEL tasks or not")
229
  else:
230
  #print("Lists do not have the same elements")
 
392
  cache_map_geonames = {}
393
 
394
  key_geonames = ""
395
+ if args.geonameskey_filename and os.path.exists(args.geonameskey_filename):
396
  fkeyname = args.geonameskey_filename
397
  with open(fkeyname) as f:
398
  key_geonames = f.read()
 
409
  cache_map_virtuoso = {}
410
 
411
  key_virtuoso = ""
412
+ if args.virtuosokey_filename and os.path.exists(args.virtuosokey_filename):
413
  fkeyname = args.virtuosokey_filename
414
  with open(fkeyname) as f:
415
  key_virtuoso = f.read()
nerBio.py CHANGED
@@ -1,10 +1,10 @@
1
  import os
2
 
3
- #os.environ["CUDA_VISIBLE_DEVICES"] = "1,6" #GPUs to use
4
- #
5
- #os.environ["HF_HUB_CACHE"] = "/eos/jeodpp/home/users/consose/cache/huggingface/hub"
6
- #os.environ["HUGGINGFACE_HUB_CACHE"] = "/eos/jeodpp/home/users/consose/cache/huggingface/hub"
7
- #os.environ["HF_HOME"] = "/eos/jeodpp/home/users/consose/cache/huggingface/hub"
8
 
9
  from transformers import file_utils
10
 
@@ -21,10 +21,10 @@ from collections import Counter
21
  from gliner import GLiNER, GLiNERConfig, data_processing
22
 
23
  #os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
24
- #os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
25
 
26
  import torch
27
- #torch.cuda.empty_cache() # Clear cache ot torch
28
 
29
  import logging
30
 
@@ -67,6 +67,10 @@ import numpy as np
67
 
68
  from retrieverRAG_testing import RAG_retrieval_Base, RAG_retrieval_Z_scores, RAG_retrieval_Percentile, RAG_retrieval_TopK
69
 
 
 
 
 
70
 
71
  # this is to completely delete the cache:
72
  # mem.clear(warn=False)
@@ -384,7 +388,7 @@ def annotate(df, args, pipeInner, tokenizerGliner, modelGliner, modelGlinerBio,
384
  #https://bioportal.bioontology.org/annotatorplus
385
 
386
  key_bioportal = ""
387
- if args.bioportalkey_filename:
388
  fkeyname = args.bioportalkey_filename
389
  with open(fkeyname) as f:
390
  key_bioportal = f.read()
@@ -1200,7 +1204,7 @@ def getUrlBioAndAllOtherBioConcepts(word, args, key_virtuoso, cache_map_virtuoso
1200
  ALLURIScontext = []
1201
 
1202
  key_bioportal = ""
1203
- if args.bioportalkey_filename:
1204
  fkeyname = args.bioportalkey_filename
1205
  with open(fkeyname) as f:
1206
  key_bioportal = f.read()
@@ -2321,7 +2325,7 @@ if __name__ == '__main__':
2321
  # cache_map_bioportal = {}
2322
  #
2323
  # key_bioportal = ""
2324
- # if args.bioportalkey_filename:
2325
  # fkeyname = args.bioportalkey_filename
2326
  # with open(fkeyname) as f:
2327
  # key_bioportal = f.read()
@@ -2441,7 +2445,7 @@ if __name__ == '__main__':
2441
  cache_map_geonames = {}
2442
 
2443
  key_geonames = ""
2444
- if args.geonameskey_filename:
2445
  fkeyname = args.geonameskey_filename
2446
  with open(fkeyname) as f:
2447
  key_geonames = f.read()
@@ -2458,7 +2462,7 @@ if __name__ == '__main__':
2458
  cache_map_virtuoso = {}
2459
 
2460
  key_virtuoso = ""
2461
- if args.virtuosokey_filename:
2462
  fkeyname = args.virtuosokey_filename
2463
  with open(fkeyname) as f:
2464
  key_virtuoso = f.read()
 
1
  import os
2
 
3
+ os.environ["CUDA_VISIBLE_DEVICES"] = "1,6" #GPUs to use
4
+
5
+ os.environ["HF_HUB_CACHE"] = "/eos/jeodpp/home/users/consose/cache/huggingface/hub"
6
+ os.environ["HUGGINGFACE_HUB_CACHE"] = "/eos/jeodpp/home/users/consose/cache/huggingface/hub"
7
+ os.environ["HF_HOME"] = "/eos/jeodpp/home/users/consose/cache/huggingface/hub"
8
 
9
  from transformers import file_utils
10
 
 
21
  from gliner import GLiNER, GLiNERConfig, data_processing
22
 
23
  #os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
24
+ os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
25
 
26
  import torch
27
+ torch.cuda.empty_cache() # Clear cache ot torch
28
 
29
  import logging
30
 
 
67
 
68
  from retrieverRAG_testing import RAG_retrieval_Base, RAG_retrieval_Z_scores, RAG_retrieval_Percentile, RAG_retrieval_TopK
69
 
70
+ from joblib import Memory
71
+
72
+ cachedir = 'cached'
73
+ mem = Memory(cachedir, verbose=False)
74
 
75
  # this is to completely delete the cache:
76
  # mem.clear(warn=False)
 
388
  #https://bioportal.bioontology.org/annotatorplus
389
 
390
  key_bioportal = ""
391
+ if args.bioportalkey_filename and os.path.exists(args.bioportalkey_filename):
392
  fkeyname = args.bioportalkey_filename
393
  with open(fkeyname) as f:
394
  key_bioportal = f.read()
 
1204
  ALLURIScontext = []
1205
 
1206
  key_bioportal = ""
1207
+ if args.bioportalkey_filename and os.path.exists(args.bioportalkey_filename):
1208
  fkeyname = args.bioportalkey_filename
1209
  with open(fkeyname) as f:
1210
  key_bioportal = f.read()
 
2325
  # cache_map_bioportal = {}
2326
  #
2327
  # key_bioportal = ""
2328
+ # if args.bioportalkey_filename and os.path.exists(args.bioportalkey_filename):
2329
  # fkeyname = args.bioportalkey_filename
2330
  # with open(fkeyname) as f:
2331
  # key_bioportal = f.read()
 
2445
  cache_map_geonames = {}
2446
 
2447
  key_geonames = ""
2448
+ if args.geonameskey_filename and os.path.exists(args.geonameskey_filename):
2449
  fkeyname = args.geonameskey_filename
2450
  with open(fkeyname) as f:
2451
  key_geonames = f.read()
 
2462
  cache_map_virtuoso = {}
2463
 
2464
  key_virtuoso = ""
2465
+ if args.virtuosokey_filename and os.path.exists(args.virtuosokey_filename):
2466
  fkeyname = args.virtuosokey_filename
2467
  with open(fkeyname) as f:
2468
  key_virtuoso = f.read()