Spaces:
Running
Running
Update src/bin/PROBE.py
Browse files- src/bin/PROBE.py +10 -10
src/bin/PROBE.py
CHANGED
|
@@ -17,7 +17,9 @@ def load_representation(multi_col_representation_vector_file_path):
|
|
| 17 |
|
| 18 |
def run_probe(benchmarks, representation_name, representation_file_human, representation_file_affinity, similarity_tasks=["Sparse","200","500"], function_prediction_aspect="All_Aspects", function_prediction_dataset="All_Data_Sets", family_prediction_dataset=["nc","uc50","uc30","mm15"], detailed_output=False):
|
| 19 |
print("\n\nPROBE (Protein RepresentatiOn Benchmark) run is started...\n\n")
|
| 20 |
-
|
|
|
|
|
|
|
| 21 |
if any(item in ['similarity', 'function', 'family', 'all'] for item in benchmarks):
|
| 22 |
print("\nRepresentation vectors are loading...\n")
|
| 23 |
human_representation_dataframe = load_representation(representation_file_human)
|
|
@@ -30,9 +32,7 @@ def run_probe(benchmarks, representation_name, representation_file_human, repres
|
|
| 30 |
ssi.similarity_tasks = similarity_tasks
|
| 31 |
ssi.detailed_output = detailed_output
|
| 32 |
similarity_result = ssi.calculate_all_correlations()
|
| 33 |
-
|
| 34 |
-
print(similarity_result)
|
| 35 |
-
|
| 36 |
|
| 37 |
if "function" in benchmarks:
|
| 38 |
print("\n\nOntology-based protein function prediction benchmark is running...\n")
|
|
@@ -42,26 +42,26 @@ def run_probe(benchmarks, representation_name, representation_file_human, repres
|
|
| 42 |
fp.representation_name = representation_name
|
| 43 |
fp.detailed_output = detailed_output
|
| 44 |
function_results = fp.pred_output()
|
| 45 |
-
|
| 46 |
-
print(function_results)
|
| 47 |
|
| 48 |
if "family" in benchmarks:
|
| 49 |
print("\n\nDrug target protein family classification benchmark is running...\n")
|
| 50 |
tfc.representation_path = representation_file_human
|
| 51 |
tfc.representation_name = representation_name
|
| 52 |
tfc.detailed_output = detailed_output
|
|
|
|
| 53 |
for dataset in family_prediction_dataset:
|
| 54 |
family_result = tfc.score_protein_rep(dataset)
|
| 55 |
-
|
| 56 |
-
print(family_result)
|
| 57 |
|
| 58 |
if "affinity" in benchmarks:
|
| 59 |
print("\n\nProtein-protein binding affinity estimation benchmark is running...\n")
|
| 60 |
bae.skempi_vectors_path = representation_file_affinity
|
| 61 |
bae.representation_name = representation_name
|
| 62 |
affinity_result = bae.predict_affinities_and_report_results()
|
| 63 |
-
|
| 64 |
-
|
|
|
|
| 65 |
|
| 66 |
print("\n\nPROBE (Protein RepresentatiOn Benchmark) run is finished...\n")
|
| 67 |
return 0
|
|
|
|
| 17 |
|
| 18 |
def run_probe(benchmarks, representation_name, representation_file_human, representation_file_affinity, similarity_tasks=["Sparse","200","500"], function_prediction_aspect="All_Aspects", function_prediction_dataset="All_Data_Sets", family_prediction_dataset=["nc","uc50","uc30","mm15"], detailed_output=False):
|
| 19 |
print("\n\nPROBE (Protein RepresentatiOn Benchmark) run is started...\n\n")
|
| 20 |
+
result = {}
|
| 21 |
+
result['Method'] = representation_name
|
| 22 |
+
|
| 23 |
if any(item in ['similarity', 'function', 'family', 'all'] for item in benchmarks):
|
| 24 |
print("\nRepresentation vectors are loading...\n")
|
| 25 |
human_representation_dataframe = load_representation(representation_file_human)
|
|
|
|
| 32 |
ssi.similarity_tasks = similarity_tasks
|
| 33 |
ssi.detailed_output = detailed_output
|
| 34 |
similarity_result = ssi.calculate_all_correlations()
|
| 35 |
+
result['similarity'] = similarity_result
|
|
|
|
|
|
|
| 36 |
|
| 37 |
if "function" in benchmarks:
|
| 38 |
print("\n\nOntology-based protein function prediction benchmark is running...\n")
|
|
|
|
| 42 |
fp.representation_name = representation_name
|
| 43 |
fp.detailed_output = detailed_output
|
| 44 |
function_results = fp.pred_output()
|
| 45 |
+
result['function'] = function_results
|
|
|
|
| 46 |
|
| 47 |
if "family" in benchmarks:
|
| 48 |
print("\n\nDrug target protein family classification benchmark is running...\n")
|
| 49 |
tfc.representation_path = representation_file_human
|
| 50 |
tfc.representation_name = representation_name
|
| 51 |
tfc.detailed_output = detailed_output
|
| 52 |
+
result['family'] = {}
|
| 53 |
for dataset in family_prediction_dataset:
|
| 54 |
family_result = tfc.score_protein_rep(dataset)
|
| 55 |
+
result['family']['dataset'] = family_result
|
|
|
|
| 56 |
|
| 57 |
if "affinity" in benchmarks:
|
| 58 |
print("\n\nProtein-protein binding affinity estimation benchmark is running...\n")
|
| 59 |
bae.skempi_vectors_path = representation_file_affinity
|
| 60 |
bae.representation_name = representation_name
|
| 61 |
affinity_result = bae.predict_affinities_and_report_results()
|
| 62 |
+
result['affinity'] = affinity_result
|
| 63 |
+
|
| 64 |
+
print(result)
|
| 65 |
|
| 66 |
print("\n\nPROBE (Protein RepresentatiOn Benchmark) run is finished...\n")
|
| 67 |
return 0
|