emmas96 commited on
Commit
4f1ea03
·
1 Parent(s): 52833bb

include example protein structure without prediction

Browse files
Files changed (1) hide show
  1. app.py +58 -43
app.py CHANGED
@@ -110,7 +110,9 @@ def predict_dti():
110
  with prot_col1:
111
  sequence = st.text_input('Enter query amino-acid sequence', value='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA', placeholder='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA')
112
 
113
- if sequence:
 
 
114
  st.error('Visualization comming soon...')
115
 
116
  with prot_col2:
@@ -186,53 +188,63 @@ def retrieval():
186
  col1, col2, col3, col4 = st.columns(4)
187
  with col2:
188
  sequence = st.text_input('Enter query amino-acid sequence', value='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA', placeholder='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA')
189
- if sequence:
 
 
190
  st.error('Visualization coming soon...')
191
 
192
  with col3:
 
 
 
193
  if sequence:
194
- st.image('figures/protein_encoder_done.png')
195
-
196
- with st.spinner('Encoding in progress...'):
197
- from bio_embeddings.embed import SeqVecEmbedder
198
- encoder = SeqVecEmbedder()
199
- embeddings = encoder.embed_batch([sequence])
200
- for emb in embeddings:
201
- embedding = encoder.reduce_per_protein(emb)
202
- break
203
- st.success('Encoding complete.')
204
-
205
- st.markdown('### Inference')
206
-
207
- import time
208
- progress_text = "HyperPCM predicts the QSAR model for the query protein target. Please wait."
209
- my_bar = st.progress(0, text=progress_text)
210
- for i in range(100):
211
- time.sleep(0.1)
212
- my_bar.progress(i + 1, text=progress_text)
213
- my_bar.progress(100, text="HyperPCM predicts the QSAR model for the query protein target. Done.")
214
 
215
- st.markdown('### Retrieval')
216
-
217
- col1, col2 = st.columns(2)
218
- with col1:
219
- selected_dataset = st.selectbox(
220
- 'Select dataset from which the drug compounds should be retrieved',('Lenselink', 'Davis')
221
- )
222
- with col2:
223
- selected_k = st.selectbox(
224
- 'Select the top-k number of drug compounds to retrieve',(5, 10, 15, 20)
225
- )
226
-
227
- st.write(f'The top-{selected_k} most active drug coupounds from {selected_dataset} predicted by HyperPCM are: ')
228
- dummy_smiles = ['CC(=O)OC1=CC=CC=C1C(=O)O', 'COc1cc(C=O)ccc1O', 'CC(=O)Nc1ccc(O)cc1', 'CC(=O)Nc1ccc(OS(=O)(=O)O)cc1', 'CC(=O)Nc1ccc(O[C@@H]2O[C@H](C(=O)O)[C@@H](O)[C@H](O)[C@H]2O)cc1']
229
- cols = st.columns(5)
230
- for j, col in enumerate(cols):
231
- with col:
232
- for i in range(int(selected_k/5)):
233
- mol = Chem.MolFromSmiles(dummy_smiles[j])
234
- mol_img = Chem.Draw.MolToImage(mol)
235
- st.image(mol_img)
 
 
 
 
 
 
 
 
236
 
237
  def display_protein():
238
  st.markdown('## Display protein structure')
@@ -242,6 +254,9 @@ def display_protein():
242
  sequence = st.text_input('Enter the amino-acid sequence of the query protein target', value='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA', placeholder='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA')
243
 
244
  if sequence:
 
 
 
245
  model = esm.pretrained.esmfold_v1()
246
  model = model.eval().cuda()
247
 
 
110
  with prot_col1:
111
  sequence = st.text_input('Enter query amino-acid sequence', value='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA', placeholder='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA')
112
 
113
+ if sequence == 'HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA':
114
+ st.image('figures/ex_protein.jpeg')
115
+ elif sequence:
116
  st.error('Visualization comming soon...')
117
 
118
  with prot_col2:
 
188
  col1, col2, col3, col4 = st.columns(4)
189
  with col2:
190
  sequence = st.text_input('Enter query amino-acid sequence', value='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA', placeholder='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA')
191
+ if sequence == 'HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA':
192
+ st.image('figures/ex_protein.jpeg')
193
+ elif sequence:
194
  st.error('Visualization coming soon...')
195
 
196
  with col3:
197
+ selected_encoder = st.selectbox(
198
+ 'Select encoder for protein target',('SeqVec', 'None')
199
+ )
200
  if sequence:
201
+ if selected_encoder == 'SeqVec':
202
+ st.image('figures/protein_encoder_done.png')
203
+ with st.spinner('Encoding in progress...'):
204
+ from bio_embeddings.embed import SeqVecEmbedder
205
+ encoder = SeqVecEmbedder()
206
+ embeddings = encoder.embed_batch([sequence])
207
+ for emb in embeddings:
208
+ prot_embedding = encoder.reduce_per_protein(emb)
209
+ break
210
+ st.success('Encoding complete.')
211
+ else:
212
+ prot_embedding = None
213
+ st.image('figures/protein_encoder.png')
214
+ st.warning('Choose encoder above...')
215
+
216
+ if prot_embedding is not None:
217
+ st.markdown('### Inference')
 
 
 
218
 
219
+ import time
220
+ progress_text = "HyperPCM predicts the QSAR model for the query protein target. Please wait."
221
+ my_bar = st.progress(0, text=progress_text)
222
+ for i in range(100):
223
+ time.sleep(0.1)
224
+ my_bar.progress(i + 1, text=progress_text)
225
+ my_bar.progress(100, text="HyperPCM predicts the QSAR model for the query protein target. Done.")
226
+
227
+ st.markdown('### Retrieval')
228
+
229
+ col1, col2 = st.columns(2)
230
+ with col1:
231
+ selected_dataset = st.selectbox(
232
+ 'Select dataset from which the drug compounds should be retrieved',('Lenselink', 'Davis')
233
+ )
234
+ with col2:
235
+ selected_k = st.selectbox(
236
+ 'Select the top-k number of drug compounds to retrieve',(5, 10, 15, 20)
237
+ )
238
+
239
+ st.write(f'The top-{selected_k} most active drug coupounds from {selected_dataset} predicted by HyperPCM are: ')
240
+ dummy_smiles = ['CC(=O)OC1=CC=CC=C1C(=O)O', 'COc1cc(C=O)ccc1O', 'CC(=O)Nc1ccc(O)cc1', 'CC(=O)Nc1ccc(OS(=O)(=O)O)cc1', 'CC(=O)Nc1ccc(O[C@@H]2O[C@H](C(=O)O)[C@@H](O)[C@H](O)[C@H]2O)cc1']
241
+ cols = st.columns(5)
242
+ for j, col in enumerate(cols):
243
+ with col:
244
+ for i in range(int(selected_k/5)):
245
+ mol = Chem.MolFromSmiles(dummy_smiles[j])
246
+ mol_img = Chem.Draw.MolToImage(mol)
247
+ st.image(mol_img)
248
 
249
  def display_protein():
250
  st.markdown('## Display protein structure')
 
254
  sequence = st.text_input('Enter the amino-acid sequence of the query protein target', value='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA', placeholder='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA')
255
 
256
  if sequence:
257
+
258
+ st.image('figures/ex_protein.jpeg')
259
+
260
  model = esm.pretrained.esmfold_v1()
261
  model = model.eval().cuda()
262