add warnings and progress bars
Browse files
app.py
CHANGED
@@ -75,7 +75,7 @@ def predict_dti():
|
|
75 |
from cddd.inference import InferenceModel
|
76 |
CDDD_MODEL_DIR = 'src/encoders/cddd'
|
77 |
cddd_model = InferenceModel(CDDD_MODEL_DIR)
|
78 |
-
|
79 |
#from huggingface_hub import hf_hub_download
|
80 |
#precomputed_embs = f'{selected_encoder}_encoding.csv'
|
81 |
#REPO_ID = "emmas96/Lenselink"
|
@@ -89,12 +89,12 @@ def predict_dti():
|
|
89 |
REPO_ID = "emmas96/hyperpcm"
|
90 |
checkpoint_path = hf_hub_download(REPO_ID, MOLBERT_MODEL_DIR)
|
91 |
molbert_model = MolBertFeaturizer(checkpoint_path, max_seq_len=500, embedding_type='average-1-cat-pooled')
|
92 |
-
|
93 |
else:
|
94 |
#st.write('No pre-trained version of HyperPCM is available for the chosen encoder.')
|
95 |
-
|
96 |
st.image('molecule_encoder.png')
|
97 |
-
if
|
98 |
#st.write(f'{selected_encoder} embedding')
|
99 |
#st.write(embedding)
|
100 |
st.image('molecule_encoder_done.png')
|
@@ -108,7 +108,8 @@ def predict_dti():
|
|
108 |
sequence = st.text_input('Enter the amino-acid sequence of the query protein target', value='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA', placeholder='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA')
|
109 |
|
110 |
if sequence:
|
111 |
-
st.markdown('\n\n\n\n Plot of protein to be added soon. \n\n\n\n')
|
|
|
112 |
|
113 |
with prot_col2:
|
114 |
selected_encoder = st.selectbox(
|
@@ -117,41 +118,45 @@ def predict_dti():
|
|
117 |
if sequence:
|
118 |
if selected_encoder == 'SeqVec':
|
119 |
from bio_embeddings.embed import SeqVecEmbedder
|
120 |
-
|
121 |
-
|
|
|
122 |
for emb in embeddings:
|
123 |
-
|
124 |
break
|
125 |
elif selected_encoder == 'UniRep':
|
126 |
from jax_unirep.utils import load_params
|
127 |
params = load_params()
|
128 |
from jax_unirep.featurize import get_reps
|
129 |
embedding, h_final, c_final = get_reps([sequence])
|
130 |
-
|
131 |
elif selected_encoder == 'ESM-1b':
|
132 |
from bio_embeddings.embed import ESM1bEmbedder
|
133 |
encoder = ESM1bEmbedder()
|
134 |
embeddings = encoder.embed_batch([sequence])
|
135 |
for emb in embeddings:
|
136 |
-
|
137 |
break
|
138 |
elif selected_encoder == 'ProtT5':
|
139 |
from bio_embeddings.embed import ProtTransT5XLU50Embedder
|
140 |
encoder = ProtTransT5XLU50Embedder()
|
141 |
embeddings = encoder.embed_batch([sequence])
|
142 |
for emb in embeddings:
|
143 |
-
|
144 |
break
|
145 |
else:
|
146 |
-
|
147 |
-
|
148 |
st.image('protein_encoder.png')
|
149 |
-
if
|
150 |
#st.write(f'{selected_encoder} embedding')
|
151 |
#st.write(embedding)
|
152 |
st.image('protein_encoder_done.png')
|
153 |
|
154 |
-
|
|
|
|
|
|
|
155 |
|
156 |
|
157 |
def retrieval():
|
@@ -165,23 +170,33 @@ def retrieval():
|
|
165 |
if sequence:
|
166 |
col1, col2 = st.columns(2)
|
167 |
with col1:
|
168 |
-
st.markdown('\n\n\n\n Plot of protein to be added soon. \n\n\n\n')
|
|
|
169 |
|
170 |
with col2:
|
171 |
-
st.write('Currently encoding the protein with SecVec...')
|
172 |
st.image('protein_encoder_done.png')
|
173 |
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
|
182 |
st.markdown('### Retrieval')
|
183 |
-
|
184 |
-
|
185 |
col1, col2 = st.columns(2)
|
186 |
with col1:
|
187 |
selected_dataset = st.selectbox(
|
|
|
75 |
from cddd.inference import InferenceModel
|
76 |
CDDD_MODEL_DIR = 'src/encoders/cddd'
|
77 |
cddd_model = InferenceModel(CDDD_MODEL_DIR)
|
78 |
+
drug_embedding = cddd_model.seq_to_emb([smiles])
|
79 |
#from huggingface_hub import hf_hub_download
|
80 |
#precomputed_embs = f'{selected_encoder}_encoding.csv'
|
81 |
#REPO_ID = "emmas96/Lenselink"
|
|
|
89 |
REPO_ID = "emmas96/hyperpcm"
|
90 |
checkpoint_path = hf_hub_download(REPO_ID, MOLBERT_MODEL_DIR)
|
91 |
molbert_model = MolBertFeaturizer(checkpoint_path, max_seq_len=500, embedding_type='average-1-cat-pooled')
|
92 |
+
drug_embedding = molbert_model.transform([smiles])
|
93 |
else:
|
94 |
#st.write('No pre-trained version of HyperPCM is available for the chosen encoder.')
|
95 |
+
drug_embedding = None
|
96 |
st.image('molecule_encoder.png')
|
97 |
+
if drug_embedding is not None:
|
98 |
#st.write(f'{selected_encoder} embedding')
|
99 |
#st.write(embedding)
|
100 |
st.image('molecule_encoder_done.png')
|
|
|
108 |
sequence = st.text_input('Enter the amino-acid sequence of the query protein target', value='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA', placeholder='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA')
|
109 |
|
110 |
if sequence:
|
111 |
+
#st.markdown('\n\n\n\n Plot of protein to be added soon. \n\n\n\n')
|
112 |
+
st.error('Visualization of protein to be added soon.')
|
113 |
|
114 |
with prot_col2:
|
115 |
selected_encoder = st.selectbox(
|
|
|
118 |
if sequence:
|
119 |
if selected_encoder == 'SeqVec':
|
120 |
from bio_embeddings.embed import SeqVecEmbedder
|
121 |
+
encoder = SeqVecEmbedder()
|
122 |
+
with st.spinner('Currently encoding the query protein target with SeqVec...'):
|
123 |
+
embeddings = encoder.embed_batch([sequence])
|
124 |
for emb in embeddings:
|
125 |
+
prot_embedding = encoder.reduce_per_protein(emb)
|
126 |
break
|
127 |
elif selected_encoder == 'UniRep':
|
128 |
from jax_unirep.utils import load_params
|
129 |
params = load_params()
|
130 |
from jax_unirep.featurize import get_reps
|
131 |
embedding, h_final, c_final = get_reps([sequence])
|
132 |
+
prot_embedding = embedding.mean(axis=0)
|
133 |
elif selected_encoder == 'ESM-1b':
|
134 |
from bio_embeddings.embed import ESM1bEmbedder
|
135 |
encoder = ESM1bEmbedder()
|
136 |
embeddings = encoder.embed_batch([sequence])
|
137 |
for emb in embeddings:
|
138 |
+
prot_embedding = encoder.reduce_per_protein(emb)
|
139 |
break
|
140 |
elif selected_encoder == 'ProtT5':
|
141 |
from bio_embeddings.embed import ProtTransT5XLU50Embedder
|
142 |
encoder = ProtTransT5XLU50Embedder()
|
143 |
embeddings = encoder.embed_batch([sequence])
|
144 |
for emb in embeddings:
|
145 |
+
prot_embedding = encoder.reduce_per_protein(emb)
|
146 |
break
|
147 |
else:
|
148 |
+
st.warning('Chosen encoder above.')
|
149 |
+
prot_embedding = None
|
150 |
st.image('protein_encoder.png')
|
151 |
+
if prot_embedding is not None:
|
152 |
#st.write(f'{selected_encoder} embedding')
|
153 |
#st.write(embedding)
|
154 |
st.image('protein_encoder_done.png')
|
155 |
|
156 |
+
if not drug_embedding or not prot_embedding:
|
157 |
+
st.error('Witing for computed drug and target embeddings...')
|
158 |
+
else:
|
159 |
+
st.warning('In the future inference will be run with HyperPCM on the given drug compound and protein target...')
|
160 |
|
161 |
|
162 |
def retrieval():
|
|
|
170 |
if sequence:
|
171 |
col1, col2 = st.columns(2)
|
172 |
with col1:
|
173 |
+
#st.markdown('\n\n\n\n Plot of protein to be added soon. \n\n\n\n')
|
174 |
+
st.error('Visualization of protein to be added soon.')
|
175 |
|
176 |
with col2:
|
177 |
+
#st.write('Currently encoding the protein with SecVec...')
|
178 |
st.image('protein_encoder_done.png')
|
179 |
|
180 |
+
from bio_embeddings.embed import SeqVecEmbedder
|
181 |
+
encoder = SeqVecEmbedder()
|
182 |
+
with st.spinner('Currently encoding the query protein target with SeqVec...'):
|
183 |
+
embeddings = encoder.embed_batch([sequence])
|
184 |
+
for emb in embeddings:
|
185 |
+
embedding = encoder.reduce_per_protein(emb)
|
186 |
+
break
|
187 |
+
st.success('Encoding complete.')
|
188 |
+
|
189 |
+
st.markdown('### Inference')
|
190 |
+
|
191 |
+
import time
|
192 |
+
progress_text = "HyperPCM predicts the QSAR model for the query protein target. Please wait."
|
193 |
+
my_bar = st.progress(0, text=progress_text)
|
194 |
+
for i in range(100):
|
195 |
+
time.sleep(0.1)
|
196 |
+
my_bar.progress(i + 1, text=progress_text)
|
197 |
|
198 |
st.markdown('### Retrieval')
|
199 |
+
|
|
|
200 |
col1, col2 = st.columns(2)
|
201 |
with col1:
|
202 |
selected_dataset = st.selectbox(
|