emmas96 commited on
Commit
94a9538
·
1 Parent(s): 3c1ebe4

update interface to choose target in retrieval

Browse files
Files changed (1) hide show
  1. app.py +24 -42
app.py CHANGED
@@ -144,28 +144,28 @@ def retrieval():
144
 
145
  st.write('In the furute this page will retrieve the top-k drug compounds that are predicted to have the highest activity toward the given protein target from either the Lenselink or Davis datasets.')
146
 
147
- st.markdown('### Target')
148
  sequence = st.text_input('Enter the amino-acid sequence of the query protein target', value='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA', placeholder='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA')
149
 
150
  if sequence:
151
- st.markdown('\n\n\n\n Plot of protein to be added soon. \n\n\n\n')
 
 
152
 
153
- selected_encoder = st.selectbox(
154
- 'Select encoder for protein target',('None', 'SeqVec')
155
- )
156
- if selected_encoder == 'SeqVec':
157
  from bio_embeddings.embed import SeqVecEmbedder
158
  encoder = SeqVecEmbedder()
159
  embeddings = encoder.embed_batch([sequence])
160
  for emb in embeddings:
161
  embedding = encoder.reduce_per_protein(emb)
162
  break
163
- else:
164
- st.write('Only SeqVec is currently available to encode protein structures.')
165
- embedding = None
166
- if embedding is not None:
167
- st.write(f'{selected_encoder} embedding')
168
  st.write(embedding)
 
169
 
170
  st.markdown('### Retrieval')
171
  st.write('TODO HyperPCM predicts the QSAR model for the given protein target.')
@@ -181,37 +181,19 @@ def retrieval():
181
  )
182
 
183
  st.write(f'The top-{selected_k} most active drug coupounds from {selected_dataset} predicted by HyperPCM are: ')
184
- col1, col2, col3, col4, col5 = st.columns(5)
185
- with col1:
186
- smiles = 'CC(=O)OC1=CC=CC=C1C(=O)O'
187
- mol = Chem.MolFromSmiles(smiles)
188
- mol_img = Chem.Draw.MolToImage(mol)
189
- st.image(mol_img)
190
-
191
- with col2:
192
- smiles = 'COc1cc(C=O)ccc1O'
193
- mol = Chem.MolFromSmiles(smiles)
194
- mol_img = Chem.Draw.MolToImage(mol)
195
- st.image(mol_img)
196
-
197
- with col3:
198
- smiles = 'CC(=O)Nc1ccc(O)cc1'
199
- mol = Chem.MolFromSmiles(smiles)
200
- mol_img = Chem.Draw.MolToImage(mol)
201
- st.image(mol_img)
202
-
203
- with col4:
204
- smiles = 'CC(=O)Nc1ccc(OS(=O)(=O)O)cc1'
205
- mol = Chem.MolFromSmiles(smiles)
206
- mol_img = Chem.Draw.MolToImage(mol)
207
- st.image(mol_img)
208
-
209
- with col5:
210
- smiles = 'CC(=O)Nc1ccc(O[C@@H]2O[C@H](C(=O)O)[C@@H](O)[C@H](O)[C@H]2O)cc1'
211
- mol = Chem.MolFromSmiles(smiles)
212
- mol_img = Chem.Draw.MolToImage(mol)
213
- st.image(mol_img)
214
-
215
 
216
  def display_protein():
217
  st.markdown('## Display protein')
 
144
 
145
  st.write('In the furute this page will retrieve the top-k drug compounds that are predicted to have the highest activity toward the given protein target from either the Lenselink or Davis datasets.')
146
 
147
+ st.markdown('### Choose protein target')
148
  sequence = st.text_input('Enter the amino-acid sequence of the query protein target', value='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA', placeholder='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA')
149
 
150
  if sequence:
151
+ col1, col2, col3 = st.columns(3)
152
+ with col1:
153
+ st.markdown('\n\n\n\n Plot of protein to be added soon. \n\n\n\n')
154
 
155
+ with col2:
156
+ st.write('Encoding with SecVec')
157
+ st.image('protein_encoder.png')
158
+
159
  from bio_embeddings.embed import SeqVecEmbedder
160
  encoder = SeqVecEmbedder()
161
  embeddings = encoder.embed_batch([sequence])
162
  for emb in embeddings:
163
  embedding = encoder.reduce_per_protein(emb)
164
  break
165
+ with col3:
166
+ st.write(f'SeqVec embedding')
 
 
 
167
  st.write(embedding)
168
+ st.write(np.transpose(embedding))
169
 
170
  st.markdown('### Retrieval')
171
  st.write('TODO HyperPCM predicts the QSAR model for the given protein target.')
 
181
  )
182
 
183
  st.write(f'The top-{selected_k} most active drug coupounds from {selected_dataset} predicted by HyperPCM are: ')
184
+ dummy_smiles = [['CC(=O)OC1=CC=CC=C1C(=O)O', 'COc1cc(C=O)ccc1O', 'CC(=O)Nc1ccc(O)cc1', 'CC(=O)Nc1ccc(OS(=O)(=O)O)cc1',
185
+ 'CC(=O)Nc1ccc(O[C@@H]2O[C@H](C(=O)O)[C@@H](O)[C@H](O)[C@H]2O)cc1'], ['CC(=O)OC1=CC=CC=C1C(=O)O', 'COc1cc(C=O)ccc1O', 'CC(=O)Nc1ccc(O)cc1',
186
+ 'CC(=O)Nc1ccc(OS(=O)(=O)O)cc1', 'CC(=O)Nc1ccc(O[C@@H]2O[C@H](C(=O)O)[C@@H](O)[C@H](O)[C@H]2O)cc1'], ['CC(=O)OC1=CC=CC=C1C(=O)O',
187
+ 'COc1cc(C=O)ccc1O', 'CC(=O)Nc1ccc(O)cc1', 'CC(=O)Nc1ccc(OS(=O)(=O)O)cc1', 'CC(=O)Nc1ccc(O[C@@H]2O[C@H](C(=O)O)[C@@H](O)[C@H](O)[C@H]2O)cc1'],
188
+ ['CC(=O)OC1=CC=CC=C1C(=O)O', 'COc1cc(C=O)ccc1O', 'CC(=O)Nc1ccc(O)cc1', 'CC(=O)Nc1ccc(OS(=O)(=O)O)cc1',
189
+ 'CC(=O)Nc1ccc(O[C@@H]2O[C@H](C(=O)O)[C@@H](O)[C@H](O)[C@H]2O)cc1']]
190
+ cols = st.columns(5)
191
+ for j, col in enumerate(cols):
192
+ with cols:
193
+ for i in range(int(selected_k/5)):
194
+ mol = Chem.MolFromSmiles(dummy_smiles[i,j])
195
+ mol_img = Chem.Draw.MolToImage(mol)
196
+ st.image(mol_img)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
 
198
  def display_protein():
199
  st.markdown('## Display protein')