jattokatarratto commited on
Commit
ce4ad6d
·
verified ·
1 Parent(s): 309d907

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -151
app.py CHANGED
@@ -754,8 +754,11 @@ def entitiesFusion(df_annotated, args):
754
  # df_annotated = df_annotated[(df_annotated['ToLink'] == df_annotated['word']) | df_annotated['ToLink'].isna()]
755
 
756
  # in all the rows having a value not null for the column "ToLink", compare this value to that of the column "word". If they are different, set the value in "ToLink" to None
 
 
757
  df_annotated.loc[
758
- (~df_annotated['ToLink'].isnull()) & (df_annotated['ToLink'] != df_annotated['word']), 'ToLink'] = None
 
759
 
760
  # now fill all the values of the column "toLink" that are empty with the values of the row "word":
761
  # df_annotated['ToLink'] = df_annotated['ToLink'].fillna(df_annotated['word'])
@@ -944,10 +947,11 @@ def getUrlBioAndAllOtherBioConcepts(word, args, key_virtuoso, cache_map_virtuoso
944
 
945
  # Construct the full SPARQL query
946
  query = f"""
 
947
  SELECT ?concept ?label (COUNT(?edge) AS ?score)
948
  {from_clauses}
949
  WHERE {{
950
- ?concept skos:prefLabel|rdfs:label|skos:altLabel|obo:hasRelatedSynonym ?label .
951
  FILTER (LCASE(STR(?label)) = "{word.lower()}")
952
  ?concept ?edge ?o .
953
  }}
@@ -988,9 +992,10 @@ def getUrlBioAndAllOtherBioConcepts(word, args, key_virtuoso, cache_map_virtuoso
988
  print("KG_restriction is not provided or empty - Consider all the KGs in the virtuoso endpoint")
989
 
990
  query = f"""
 
991
  SELECT ?concept ?label (COUNT(?edge) AS ?score)
992
  WHERE {{
993
- ?concept skos:prefLabel|rdfs:label|skos:altLabel|obo:hasRelatedSynonym ?label .
994
  FILTER (LCASE(STR(?label)) = "{word.lower()}")
995
  ?concept ?edge ?o .
996
  }}
@@ -1343,96 +1348,7 @@ def virtuoso_api_call(word, text_splitter, args, key_virtuoso, cache_map_virtuos
1343
 
1344
  return None, None, None, None, None, None, cache_map_virtuoso, load_map_query_input_output
1345
 
1346
- # # Check if args.KG_restriction exists and is not empty
1347
- # if getattr(args, 'KG_restriction', None):
1348
- #
1349
- # # api call
1350
- # if strtobool(args.debug):
1351
- # print("--- " + word.lower())
1352
- #
1353
- # # args.KG_restriction exists and is not empty
1354
- # if strtobool(args.debug):
1355
- # print("KG_restriction is provided and not empty:", args.KG_restriction)
1356
- #
1357
- # from_clauses = ' '.join([f"FROM <{choice}>" for choice in args.KG_restriction])
1358
- #
1359
- # # Construct the full SPARQL query
1360
- # query = f"""
1361
- # SELECT ?concept ?label (COUNT(?edge) AS ?score)
1362
- # {from_clauses}
1363
- # WHERE {{
1364
- # ?concept skos:prefLabel|rdfs:label|skos:altLabel|obo:hasRelatedSynonym ?label .
1365
- # FILTER (LCASE(STR(?label)) = "{word.lower()}")
1366
- # ?concept ?edge ?o .
1367
- # }}
1368
- # GROUP BY ?concept ?label
1369
- # ORDER BY DESC(?score)
1370
- # """
1371
- #
1372
- # else:
1373
- # # args.KG_restriction does not exist or is empty
1374
- # if strtobool(args.debug):
1375
- # print("--- "+word.lower())
1376
- # print("KG_restriction is not provided or empty - Consider all the KGs in the virtuoso endpoint")
1377
- #
1378
- # query = f"""
1379
- # SELECT ?concept ?label (COUNT(?edge) AS ?score)
1380
- # WHERE {{
1381
- # ?concept skos:prefLabel|rdfs:label|skos:altLabel|obo:hasRelatedSynonym ?label .
1382
- # FILTER (LCASE(STR(?label)) = "{word.lower()}")
1383
- # ?concept ?edge ?o .
1384
- # }}
1385
- # GROUP BY ?concept ?label
1386
- # ORDER BY DESC(?score)
1387
- # """
1388
- #
1389
- #
1390
- # try:
1391
- # responseText = sparqlQuery(endpoint, query, VirtuosoUsername, key_virtuoso, strtobool(args.USE_CACHE))
1392
- #
1393
- # # Parse the response as JSON
1394
- # results = json.loads(responseText)
1395
- #
1396
- #
1397
- # if len(results) > 0 and results['results']['bindings']:
1398
- #
1399
- # entityBioeUrl = str(results['results']['bindings'][0]['concept']['value'])
1400
- #
1401
- # if cache_map_virtuoso is not None:
1402
- # if not word in cache_map_virtuoso:
1403
- # cache_map_virtuoso[word] = {}
1404
- # cache_map_virtuoso[word][contextWordVirtuoso] = entityBioeUrl
1405
- #
1406
- #
1407
- # # # loop the results
1408
- # for result in results['results']['bindings']:
1409
- # #print(result)
1410
- #
1411
- # contextConcept = result['concept']['value']
1412
- # ALLURIScontext.append(contextConcept)
1413
- # if cache_map_virtuoso is not None:
1414
- # if not word in cache_map_virtuoso:
1415
- # cache_map_virtuoso[word] = {}
1416
- # cache_map_virtuoso[word][contextConcept] = None
1417
- #
1418
- # else:
1419
- #
1420
- # if cache_map_virtuoso is not None:
1421
- # if not word in cache_map_virtuoso:
1422
- # cache_map_virtuoso[word] = {}
1423
- # cache_map_virtuoso[word][contextWordVirtuoso] = None
1424
- #
1425
- # except Exception as err:
1426
- #
1427
- # # if cache_map_virtuoso is not None:
1428
- # # if not word in cache_map_virtuoso:
1429
- # # cache_map_virtuoso[word] = {}
1430
- # # cache_map_virtuoso[word][contextWordVirtuoso] = None
1431
- #
1432
- # return None, None, None, None, None, None, cache_map_virtuoso, load_map_query_input_output
1433
-
1434
-
1435
-
1436
 
1437
  if entityBioeUrl:
1438
 
@@ -1463,43 +1379,23 @@ def virtuoso_api_call(word, text_splitter, args, key_virtuoso, cache_map_virtuos
1463
  load_map_query_input_output)
1464
  else:
1465
 
1466
- # query = f"""
1467
- # SELECT DISTINCT ?labelS ?labelP ?labelO
1468
- # WHERE {{
1469
- # {{
1470
- # <{entityBioeUrl}> ?p ?o.
1471
- # <{entityBioeUrl}> skos:prefLabel|rdfs:label|skos:altLabel|obo:hasRelatedSynonym ?labelS .
1472
- # ?p skos:prefLabel|rdfs:label|skos:altLabel|obo:hasRelatedSynonym ?labelP .
1473
- # ?o skos:prefLabel|rdfs:label|skos:altLabel|obo:hasRelatedSynonym ?labelO .
1474
- # }}
1475
- # UNION
1476
- # {{
1477
- # SELECT ?labelS ?labelP ?labelO
1478
- # WHERE {{
1479
- # <{entityBioeUrl}> ?p ?labelO .
1480
- # <{entityBioeUrl}> skos:prefLabel|rdfs:label|skos:altLabel|obo:hasRelatedSynonym ?labelS .
1481
- # ?p skos:prefLabel|rdfs:label|skos:altLabel|obo:hasRelatedSynonym ?labelP .
1482
- # FILTER (isLiteral(?labelO))
1483
- # }}
1484
- # }}
1485
- # }}
1486
- # """
1487
  query = f"""
 
1488
  SELECT DISTINCT ?labelS ?labelP ?labelO
1489
  WHERE {{
1490
  {{
1491
  <{entityBioeUrl}> ?p ?o.
1492
- <{entityBioeUrl}> skos:prefLabel|rdfs:label|skos:altLabel|obo:hasRelatedSynonym ?labelS .
1493
- ?p skos:prefLabel|rdfs:label|skos:altLabel|obo:hasRelatedSynonym ?labelP .
1494
- ?o skos:prefLabel|rdfs:label|skos:altLabel|obo:hasRelatedSynonym ?labelO .
1495
  }}
1496
  UNION
1497
  {{
1498
  SELECT ?labelS ?labelP ?labelO
1499
  WHERE {{
1500
  <{entityBioeUrl}> ?p ?labelO .
1501
- <{entityBioeUrl}> skos:prefLabel|rdfs:label|skos:altLabel|obo:hasRelatedSynonym ?labelS .
1502
- ?p skos:prefLabel|rdfs:label|skos:altLabel|obo:hasRelatedSynonym ?labelP .
1503
  FILTER (isLiteral(?labelO))
1504
  }}
1505
  }}
@@ -1511,9 +1407,9 @@ def virtuoso_api_call(word, text_splitter, args, key_virtuoso, cache_map_virtuos
1511
  ?ooo rdf:type owl:Restriction .
1512
  ?ooo owl:onProperty ?p .
1513
  ?ooo owl:someValuesFrom ?o .
1514
- <{entityBioeUrl}> skos:prefLabel|rdfs:label|skos:altLabel|obo:hasRelatedSynonym ?labelS .
1515
- ?p skos:prefLabel|rdfs:label|skos:altLabel|obo:hasRelatedSynonym ?labelP .
1516
- ?o skos:prefLabel|rdfs:label|skos:altLabel|obo:hasRelatedSynonym ?labelO .
1517
  }}
1518
  }}
1519
  }}
@@ -1670,43 +1566,23 @@ def virtuoso_api_call(word, text_splitter, args, key_virtuoso, cache_map_virtuos
1670
 
1671
  if not unique_listLabelTriples:
1672
 
1673
- # query = f"""
1674
- # SELECT DISTINCT ?labelS ?labelP ?labelO
1675
- # WHERE {{
1676
- # {{
1677
- # <{xxUrl}> ?p ?o.
1678
- # <{xxUrl}> skos:prefLabel|rdfs:label|skos:altLabel|obo:hasRelatedSynonym ?labelS .
1679
- # ?p skos:prefLabel|rdfs:label|skos:altLabel|obo:hasRelatedSynonym ?labelP .
1680
- # ?o skos:prefLabel|rdfs:label|skos:altLabel|obo:hasRelatedSynonym ?labelO .
1681
- # }}
1682
- # UNION
1683
- # {{
1684
- # SELECT ?labelS ?labelP ?labelO
1685
- # WHERE {{
1686
- # <{xxUrl}> ?p ?labelO .
1687
- # <{xxUrl}> skos:prefLabel|rdfs:label|skos:altLabel|obo:hasRelatedSynonym ?labelS .
1688
- # ?p skos:prefLabel|rdfs:label|skos:altLabel|obo:hasRelatedSynonym ?labelP .
1689
- # FILTER (isLiteral(?labelO))
1690
- # }}
1691
- # }}
1692
- # }}
1693
- # """
1694
  query = f"""
 
1695
  SELECT DISTINCT ?labelS ?labelP ?labelO
1696
  WHERE {{
1697
  {{
1698
  <{xxUrl}> ?p ?o.
1699
- <{xxUrl}> skos:prefLabel|rdfs:label|skos:altLabel|obo:hasRelatedSynonym ?labelS .
1700
- ?p skos:prefLabel|rdfs:label|skos:altLabel|obo:hasRelatedSynonym ?labelP .
1701
- ?o skos:prefLabel|rdfs:label|skos:altLabel|obo:hasRelatedSynonym ?labelO .
1702
  }}
1703
  UNION
1704
  {{
1705
  SELECT ?labelS ?labelP ?labelO
1706
  WHERE {{
1707
  <{xxUrl}> ?p ?labelO .
1708
- <{xxUrl}> skos:prefLabel|rdfs:label|skos:altLabel|obo:hasRelatedSynonym ?labelS .
1709
- ?p skos:prefLabel|rdfs:label|skos:altLabel|obo:hasRelatedSynonym ?labelP .
1710
  FILTER (isLiteral(?labelO))
1711
  }}
1712
  }}
@@ -1718,9 +1594,9 @@ def virtuoso_api_call(word, text_splitter, args, key_virtuoso, cache_map_virtuos
1718
  ?ooo rdf:type owl:Restriction .
1719
  ?ooo owl:onProperty ?p .
1720
  ?ooo owl:someValuesFrom ?o .
1721
- <{xxUrl}> skos:prefLabel|rdfs:label|skos:altLabel|obo:hasRelatedSynonym ?labelS .
1722
- ?p skos:prefLabel|rdfs:label|skos:altLabel|obo:hasRelatedSynonym ?labelP .
1723
- ?o skos:prefLabel|rdfs:label|skos:altLabel|obo:hasRelatedSynonym ?labelO .
1724
  }}
1725
  }}
1726
  }}
 
754
  # df_annotated = df_annotated[(df_annotated['ToLink'] == df_annotated['word']) | df_annotated['ToLink'].isna()]
755
 
756
  # in all the rows having a value not null for the column "ToLink", compare this value to that of the column "word". If they are different, set the value in "ToLink" to None
757
+ #df_annotated.loc[
758
+ # (~df_annotated['ToLink'].isnull()) & (df_annotated['ToLink'] != df_annotated['word']), 'ToLink'] = None
759
  df_annotated.loc[
760
+ (~df_annotated['ToLink'].isnull()) & (
761
+ df_annotated['ToLink'].str.casefold() != df_annotated['word'].str.casefold()), 'ToLink'] = None
762
 
763
  # now fill all the values of the column "toLink" that are empty with the values of the row "word":
764
  # df_annotated['ToLink'] = df_annotated['ToLink'].fillna(df_annotated['word'])
 
947
 
948
  # Construct the full SPARQL query
949
  query = f"""
950
+ prefix skosxl: <http://www.w3.org/2008/05/skos-xl#>
951
  SELECT ?concept ?label (COUNT(?edge) AS ?score)
952
  {from_clauses}
953
  WHERE {{
954
+ ?concept skos:prefLabel|rdfs:label|skos:altLabel|skosxl:literalForm|obo:hasRelatedSynonym ?label .
955
  FILTER (LCASE(STR(?label)) = "{word.lower()}")
956
  ?concept ?edge ?o .
957
  }}
 
992
  print("KG_restriction is not provided or empty - Consider all the KGs in the virtuoso endpoint")
993
 
994
  query = f"""
995
+ prefix skosxl: <http://www.w3.org/2008/05/skos-xl#>
996
  SELECT ?concept ?label (COUNT(?edge) AS ?score)
997
  WHERE {{
998
+ ?concept skos:prefLabel|rdfs:label|skos:altLabel|skosxl:literalForm|obo:hasRelatedSynonym ?label .
999
  FILTER (LCASE(STR(?label)) = "{word.lower()}")
1000
  ?concept ?edge ?o .
1001
  }}
 
1348
 
1349
  return None, None, None, None, None, None, cache_map_virtuoso, load_map_query_input_output
1350
 
1351
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1352
 
1353
  if entityBioeUrl:
1354
 
 
1379
  load_map_query_input_output)
1380
  else:
1381
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1382
  query = f"""
1383
+ prefix skosxl: <http://www.w3.org/2008/05/skos-xl#>
1384
  SELECT DISTINCT ?labelS ?labelP ?labelO
1385
  WHERE {{
1386
  {{
1387
  <{entityBioeUrl}> ?p ?o.
1388
+ <{entityBioeUrl}> skos:prefLabel|rdfs:label|skos:altLabel|skosxl:literalForm|obo:hasRelatedSynonym ?labelS .
1389
+ ?p skos:prefLabel|rdfs:label|skos:altLabel|skosxl:literalForm|obo:hasRelatedSynonym ?labelP .
1390
+ ?o skos:prefLabel|rdfs:label|skos:altLabel|skosxl:literalForm|obo:hasRelatedSynonym ?labelO .
1391
  }}
1392
  UNION
1393
  {{
1394
  SELECT ?labelS ?labelP ?labelO
1395
  WHERE {{
1396
  <{entityBioeUrl}> ?p ?labelO .
1397
+ <{entityBioeUrl}> skos:prefLabel|rdfs:label|skos:altLabel|skosxl:literalForm|obo:hasRelatedSynonym ?labelS .
1398
+ ?p skos:prefLabel|rdfs:label|skos:altLabel|skosxl:literalForm|obo:hasRelatedSynonym ?labelP .
1399
  FILTER (isLiteral(?labelO))
1400
  }}
1401
  }}
 
1407
  ?ooo rdf:type owl:Restriction .
1408
  ?ooo owl:onProperty ?p .
1409
  ?ooo owl:someValuesFrom ?o .
1410
+ <{entityBioeUrl}> skos:prefLabel|rdfs:label|skos:altLabel|skosxl:literalForm|obo:hasRelatedSynonym ?labelS .
1411
+ ?p skos:prefLabel|rdfs:label|skos:altLabel|skosxl:literalForm|obo:hasRelatedSynonym ?labelP .
1412
+ ?o skos:prefLabel|rdfs:label|skos:altLabel|skosxl:literalForm|obo:hasRelatedSynonym ?labelO .
1413
  }}
1414
  }}
1415
  }}
 
1566
 
1567
  if not unique_listLabelTriples:
1568
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1569
  query = f"""
1570
+ prefix skosxl: <http://www.w3.org/2008/05/skos-xl#>
1571
  SELECT DISTINCT ?labelS ?labelP ?labelO
1572
  WHERE {{
1573
  {{
1574
  <{xxUrl}> ?p ?o.
1575
+ <{xxUrl}> skos:prefLabel|rdfs:label|skos:altLabel|skosxl:literalForm|obo:hasRelatedSynonym ?labelS .
1576
+ ?p skos:prefLabel|rdfs:label|skos:altLabel|skosxl:literalForm|obo:hasRelatedSynonym ?labelP .
1577
+ ?o skos:prefLabel|rdfs:label|skos:altLabel|skosxl:literalForm|obo:hasRelatedSynonym ?labelO .
1578
  }}
1579
  UNION
1580
  {{
1581
  SELECT ?labelS ?labelP ?labelO
1582
  WHERE {{
1583
  <{xxUrl}> ?p ?labelO .
1584
+ <{xxUrl}> skos:prefLabel|rdfs:label|skos:altLabel|skosxl:literalForm|obo:hasRelatedSynonym ?labelS .
1585
+ ?p skos:prefLabel|rdfs:label|skos:altLabel|skosxl:literalForm|obo:hasRelatedSynonym ?labelP .
1586
  FILTER (isLiteral(?labelO))
1587
  }}
1588
  }}
 
1594
  ?ooo rdf:type owl:Restriction .
1595
  ?ooo owl:onProperty ?p .
1596
  ?ooo owl:someValuesFrom ?o .
1597
+ <{xxUrl}> skos:prefLabel|rdfs:label|skos:altLabel|skosxl:literalForm|obo:hasRelatedSynonym ?labelS .
1598
+ ?p skos:prefLabel|rdfs:label|skos:altLabel|skosxl:literalForm|obo:hasRelatedSynonym ?labelP .
1599
+ ?o skos:prefLabel|rdfs:label|skos:altLabel|skosxl:literalForm|obo:hasRelatedSynonym ?labelO .
1600
  }}
1601
  }}
1602
  }}