Spaces:
Running
Running
Update src/about.py
Browse files- src/about.py +3 -5
src/about.py
CHANGED
@@ -91,8 +91,8 @@ LLM_BENCHMARKS_TEXT = f"""
|
|
91 |
EVALUATION_QUEUE_TEXT = """
|
92 |
# **Benchmarking your own representation model**
|
93 |
## To run the benchmarks, the following representation vectors need to be generated:
|
94 |
-
|
95 |
-
|
96 |
|
97 |
## Format of the both protein representation files:
|
98 |
1. Each row corresponds to the representation vector of a particular protein.
|
@@ -133,11 +133,9 @@ family_prediction_dataset_options = ["nc", "uc50", "uc30", "mm15"]
|
|
133 |
|
134 |
benchmark_specific_metrics = {
|
135 |
'similarity': ['sparse_MF_correlation', 'sparse_BP_correlation', 'sparse_CC_correlation', 'sparse_Ave_correlation',
|
136 |
-
'sparse_MF_pvalue', 'sparse_BP_pvalue', 'sparse_CC_pvalue', 'sparse_Ave_pvalue',
|
137 |
'200_MF_correlation', '200_BP_correlation', '200_CC_correlation', '200_Ave_correlation',
|
138 |
-
'200_MF_pvalue', '200_BP_pvalue', '200_CC_pvalue', '200_Ave_pvalue',
|
139 |
'500_MF_correlation', '500_BP_correlation', '500_CC_correlation', '500_Ave_correlation',
|
140 |
-
|
141 |
'function': {
|
142 |
'aspect_types': ['MF', 'BP', 'CC'],
|
143 |
'dataset_types': ['accuracy', 'F1', 'precision', 'recall']
|
|
|
91 |
EVALUATION_QUEUE_TEXT = """
|
92 |
# **Benchmarking your own representation model**
|
93 |
## To run the benchmarks, the following representation vectors need to be generated:
|
94 |
+
- For benchmarks 1, 2, and 3 (similarity, function, and family), you will need to generate representation vectors for all human proteins. The amino acid sequences for canonical isoforms of human proteins can be found [here](https://drive.google.com/file/d/1wXF2lmj4ZTahMrl66QpYM2TvHmbcIL6b/view?usp=sharing).
|
95 |
+
- For benchmark 4 (affinity), representation vectors will need to be generated for the samples in the SKEMPI dataset, which can be accessed [here](https://drive.google.com/file/d/1m5jssC0RMsiFT_w-Ykh629Pw_An3PInI/view?usp=sharing).
|
96 |
|
97 |
## Format of the both protein representation files:
|
98 |
1. Each row corresponds to the representation vector of a particular protein.
|
|
|
133 |
|
134 |
benchmark_specific_metrics = {
|
135 |
'similarity': ['sparse_MF_correlation', 'sparse_BP_correlation', 'sparse_CC_correlation', 'sparse_Ave_correlation',
|
|
|
136 |
'200_MF_correlation', '200_BP_correlation', '200_CC_correlation', '200_Ave_correlation',
|
|
|
137 |
'500_MF_correlation', '500_BP_correlation', '500_CC_correlation', '500_Ave_correlation',
|
138 |
+
],
|
139 |
'function': {
|
140 |
'aspect_types': ['MF', 'BP', 'CC'],
|
141 |
'dataset_types': ['accuracy', 'F1', 'precision', 'recall']
|