PROBE

Running

App Files Files Community

gyigit commited on Sep 30, 2024

Commit

a2e6203

1 Parent(s): 25f445b

update app

Browse files

Files changed (2) hide show

app.py +3 -23
src/bin/PROBE.py +40 -0

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ import json
 import yaml
 from src.about import *
 global data_component, filter_component
@@ -19,19 +20,6 @@ def get_baseline_df():
     df = df[present_columns]
     return df
-def update_yaml(representation_name, benchmark_type, human_file_path, skempi_file_path):
-    with open("./src/bin/probe_config.yaml", 'r') as file:
-        yaml_data = yaml.safe_load(file)
-    yaml_data['representation_name'] = representation_name
-    yaml_data['benchmark'] = benchmark_type
-    yaml_data['representation_file_human'] = human_file
-    yaml_data['representation_file_affinity'] = skempi_file
-    with open("./src/bin/probe_config.yaml", "w") as file:
-        yaml.dump(yaml_data, file)
-    return None
 def add_new_eval(
     human_file,
@@ -42,16 +30,8 @@ def add_new_eval(
 ):
     representation_name = model_name_textbox if revision_name_textbox == '' else revision_name_textbox
-    update_yaml(representation_name, benchmark_type, human_file, skempi_file)
-    # Save human and skempi files under ./src/data/representation_vectors using pandas
-    print(human_file)
-    df = pd.read_csv(human_file)
-    print(df.head().to_string())
-    return None
-    if human_file is not None:
-        human_df = pd.read_csv(human_file)
-        human_df.to_csv(f"./src/data/representation_vectors/{representation_name}_human.csv", index=False)
     return None

 import yaml
 from src.about import *
+from src.bin.PROBE import run_probe
 global data_component, filter_component
     df = df[present_columns]
     return df
 def add_new_eval(
     human_file,
 ):
     representation_name = model_name_textbox if revision_name_textbox == '' else revision_name_textbox
+    run_probe(benchmark_type, representation_name, human_file, skempi_file)
     return None

src/bin/PROBE.py CHANGED Viewed

@@ -59,4 +59,44 @@ if args["benchmark"] in  ["affinity","all"]:
     bae.predict_affinities_and_report_results()
 print("\n\nPROBE (Protein RepresentatiOn Benchmark) run is finished...\n")

     bae.predict_affinities_and_report_results()
 print("\n\nPROBE (Protein RepresentatiOn Benchmark) run is finished...\n")
+def run_probe(benchmarks, representation_name, representation_file_human, representation_file_affinity, similarity_tasks=["Sparse","200","500"], function_prediction_aspec="All_Aspects", function_prediction_dataset="All_Data_Sets", family_prediction_dataset=["nc","uc50","uc30","mm15"], detailed_output=False):
+    print("\n\nPROBE (Protein RepresentatiOn Benchmark) run is started...\n\n")
+    if any(item in ['similarity', 'function', 'family', 'all'] for item in benchmarks):
+        print("\nRepresentation vectors are loading...\n")
+        human_representation_dataframe = load_representation(representation_file_human)
+    if "similarity" in benchmarks:
+        print("\nSemantic similarity Inference Benchmark is running...\n")
+        ssi.representation_dataframe = human_representation_dataframe
+        ssi.representation_name = representation_name
+        ssi.protein_names = ssi.representation_dataframe['Entry'].tolist()
+        ssi.similarity_tasks = similarity_tasks
+        ssi.detailed_output = detailed_output
+        ssi.calculate_all_correlations()
+    if "function" in benchmarks:
+        print("\n\nOntology-based protein function prediction benchmark is running...\n")
+        fp.aspect_type = function_prediction_aspect
+        fp.dataset_type = function_prediction_dataset
+        fp.representation_dataframe = human_representation_dataframe
+        fp.representation_name = representation_name
+        fp.detailed_output = detailed_output
+        fp.pred_output()
+    if "family" in benchmarks:
+        print("\n\nDrug target protein family classification benchmark is running...\n")
+        tfc.representation_path = representation_file_human
+        tfc.representation_name = representation_name
+        tfc.detailed_output = detailed_output
+        for dataset in family_prediction_dataset:
+            tfc.score_protein_rep(dataset)
+    if "affinity" in benchmarks:
+        print("\n\nProtein-protein binding affinity estimation benchmark is running...\n")
+        bae.skempi_vectors_path = representation_file_affinity
+        bae.representation_name = representation_name
+        bae.predict_affinities_and_report_results()
+    print("\n\nPROBE (Protein RepresentatiOn Benchmark) run is finished...\n")
+    return 0