emmas96 commited on
Commit
b318bc6
·
1 Parent(s): 4011e18

reformulations

Browse files
Files changed (1) hide show
  1. app.py +51 -46
app.py CHANGED
@@ -23,6 +23,7 @@ st.markdown(
23
  🧬 Github: [ml-jku/hyper-dti](https://https://github.com/ml-jku/hyper-dti) 📝 NeurIPS 2022 AI4Science workshop paper: [OpenReview](https://openreview.net/forum?id=dIX34JWnIAL)\n
24
  """
25
  )
 
26
 
27
 
28
  def about_page():
@@ -68,7 +69,7 @@ def predict_dti():
68
 
69
  with mol_col2:
70
  selected_encoder = st.selectbox(
71
- 'Select encoder for drug compound',('None', 'CDDD', 'MolBERT')
72
  )
73
  if smiles:
74
  if selected_encoder == 'CDDD':
@@ -90,14 +91,16 @@ def predict_dti():
90
  checkpoint_path = hf_hub_download(REPO_ID, MOLBERT_MODEL_DIR)
91
  molbert_model = MolBertFeaturizer(checkpoint_path, max_seq_len=500, embedding_type='average-1-cat-pooled')
92
  drug_embedding = molbert_model.transform([smiles])
 
 
93
  else:
94
- #st.write('No pre-trained version of HyperPCM is available for the chosen encoder.')
95
  drug_embedding = None
96
  st.image('molecule_encoder.png')
 
 
97
  if drug_embedding is not None:
98
- #st.write(f'{selected_encoder} embedding')
99
- #st.write(embedding)
100
  st.image('molecule_encoder_done.png')
 
101
 
102
  with col2:
103
  st.markdown('### Target')
@@ -108,53 +111,55 @@ def predict_dti():
108
  sequence = st.text_input('Enter the amino-acid sequence of the query protein target', value='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA', placeholder='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA')
109
 
110
  if sequence:
111
- #st.markdown('\n\n\n\n Plot of protein to be added soon. \n\n\n\n')
112
- st.error('Visualization of protein to be added soon.')
113
 
114
  with prot_col2:
115
  selected_encoder = st.selectbox(
116
  'Select encoder for protein target',('None', 'SeqVec', 'UniRep', 'ESM-1b', 'ProtT5')
117
  )
 
118
  if sequence:
119
  if selected_encoder == 'SeqVec':
120
- from bio_embeddings.embed import SeqVecEmbedder
121
- encoder = SeqVecEmbedder()
122
- with st.spinner('Currently encoding the query protein target with SeqVec...'):
123
  embeddings = encoder.embed_batch([sequence])
124
- for emb in embeddings:
125
- prot_embedding = encoder.reduce_per_protein(emb)
126
- break
127
  elif selected_encoder == 'UniRep':
128
- from jax_unirep.utils import load_params
129
- params = load_params()
130
- from jax_unirep.featurize import get_reps
131
- embedding, h_final, c_final = get_reps([sequence])
132
- prot_embedding = embedding.mean(axis=0)
 
133
  elif selected_encoder == 'ESM-1b':
134
- from bio_embeddings.embed import ESM1bEmbedder
135
- encoder = ESM1bEmbedder()
136
- embeddings = encoder.embed_batch([sequence])
137
- for emb in embeddings:
138
- prot_embedding = encoder.reduce_per_protein(emb)
139
- break
 
140
  elif selected_encoder == 'ProtT5':
141
- from bio_embeddings.embed import ProtTransT5XLU50Embedder
142
- encoder = ProtTransT5XLU50Embedder()
143
- embeddings = encoder.embed_batch([sequence])
144
- for emb in embeddings:
145
- prot_embedding = encoder.reduce_per_protein(emb)
146
- break
 
147
  else:
148
- st.warning('Chosen encoder above.')
149
  prot_embedding = None
150
- st.image('protein_encoder.png')
 
151
  if prot_embedding is not None:
152
- #st.write(f'{selected_encoder} embedding')
153
- #st.write(embedding)
154
  st.image('protein_encoder_done.png')
 
155
 
156
  if not drug_embedding or not prot_embedding:
157
- st.error('Witing for computed drug and target embeddings...')
158
  else:
159
  st.warning('In the future inference will be run with HyperPCM on the given drug compound and protein target...')
160
 
@@ -164,26 +169,25 @@ def retrieval():
164
 
165
  st.write('In the furute this page will retrieve the top-k drug compounds that are predicted to have the highest activity toward the given protein target from either the Lenselink or Davis datasets.')
166
 
167
- st.markdown('### Choose protein target')
168
  sequence = st.text_input('Enter the amino-acid sequence of the query protein target', value='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA', placeholder='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA')
169
 
170
  if sequence:
171
  col1, col2 = st.columns(2)
172
  with col1:
173
- #st.markdown('\n\n\n\n Plot of protein to be added soon. \n\n\n\n')
174
- st.error('Visualization of protein to be added soon.')
175
 
176
  with col2:
177
- #st.write('Currently encoding the protein with SecVec...')
178
- st.image('protein_encoder_done.png')
179
-
180
- from bio_embeddings.embed import SeqVecEmbedder
181
- encoder = SeqVecEmbedder()
182
- with st.spinner('Currently encoding the query protein target with SeqVec...'):
183
  embeddings = encoder.embed_batch([sequence])
184
- for emb in embeddings:
185
- embedding = encoder.reduce_per_protein(emb)
186
- break
 
187
  st.success('Encoding complete.')
188
 
189
  st.markdown('### Inference')
@@ -194,6 +198,7 @@ def retrieval():
194
  for i in range(100):
195
  time.sleep(0.1)
196
  my_bar.progress(i + 1, text=progress_text)
 
197
 
198
  st.markdown('### Retrieval')
199
 
 
23
  🧬 Github: [ml-jku/hyper-dti](https://https://github.com/ml-jku/hyper-dti) 📝 NeurIPS 2022 AI4Science workshop paper: [OpenReview](https://openreview.net/forum?id=dIX34JWnIAL)\n
24
  """
25
  )
26
+ st.error('WARNING! This app is currently under development and should not be used!')
27
 
28
 
29
  def about_page():
 
69
 
70
  with mol_col2:
71
  selected_encoder = st.selectbox(
72
+ 'Select encoder for drug compound',('None', 'CDDD', 'MolBERT', 'Dummy')
73
  )
74
  if smiles:
75
  if selected_encoder == 'CDDD':
 
91
  checkpoint_path = hf_hub_download(REPO_ID, MOLBERT_MODEL_DIR)
92
  molbert_model = MolBertFeaturizer(checkpoint_path, max_seq_len=500, embedding_type='average-1-cat-pooled')
93
  drug_embedding = molbert_model.transform([smiles])
94
+ elif selected_encoder == 'Dummy':
95
+ drug_embedding = [0,1,2,3,4,5]
96
  else:
 
97
  drug_embedding = None
98
  st.image('molecule_encoder.png')
99
+ st.warning('Choose encoder above...')
100
+
101
  if drug_embedding is not None:
 
 
102
  st.image('molecule_encoder_done.png')
103
+ st.success('Encoding complete.')
104
 
105
  with col2:
106
  st.markdown('### Target')
 
111
  sequence = st.text_input('Enter the amino-acid sequence of the query protein target', value='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA', placeholder='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA')
112
 
113
  if sequence:
114
+ st.error('Visualization comming soon...')
 
115
 
116
  with prot_col2:
117
  selected_encoder = st.selectbox(
118
  'Select encoder for protein target',('None', 'SeqVec', 'UniRep', 'ESM-1b', 'ProtT5')
119
  )
120
+ st.image('protein_encoder.png')
121
  if sequence:
122
  if selected_encoder == 'SeqVec':
123
+ with st.spinner('Encoding in progress...'):
124
+ from bio_embeddings.embed import SeqVecEmbedder
125
+ encoder = SeqVecEmbedder()
126
  embeddings = encoder.embed_batch([sequence])
127
+ for emb in embeddings:
128
+ prot_embedding = encoder.reduce_per_protein(emb)
129
+ break
130
  elif selected_encoder == 'UniRep':
131
+ with st.spinner('Encoding in progress...'):
132
+ from jax_unirep.utils import load_params
133
+ params = load_params()
134
+ from jax_unirep.featurize import get_reps
135
+ embedding, h_final, c_final = get_reps([sequence])
136
+ prot_embedding = embedding.mean(axis=0)
137
  elif selected_encoder == 'ESM-1b':
138
+ with st.spinner('Encoding in progress...'):
139
+ from bio_embeddings.embed import ESM1bEmbedder
140
+ encoder = ESM1bEmbedder()
141
+ embeddings = encoder.embed_batch([sequence])
142
+ for emb in embeddings:
143
+ prot_embedding = encoder.reduce_per_protein(emb)
144
+ break
145
  elif selected_encoder == 'ProtT5':
146
+ with st.spinner('Encoding in progress...'):
147
+ from bio_embeddings.embed import ProtTransT5XLU50Embedder
148
+ encoder = ProtTransT5XLU50Embedder()
149
+ embeddings = encoder.embed_batch([sequence])
150
+ for emb in embeddings:
151
+ prot_embedding = encoder.reduce_per_protein(emb)
152
+ break
153
  else:
 
154
  prot_embedding = None
155
+ st.warning('Chosen encoder above...')
156
+
157
  if prot_embedding is not None:
 
 
158
  st.image('protein_encoder_done.png')
159
+ st.success('Encoding complete.')
160
 
161
  if not drug_embedding or not prot_embedding:
162
+ st.error('Waiting for both drug and target embeddings to be computed...')
163
  else:
164
  st.warning('In the future inference will be run with HyperPCM on the given drug compound and protein target...')
165
 
 
169
 
170
  st.write('In the furute this page will retrieve the top-k drug compounds that are predicted to have the highest activity toward the given protein target from either the Lenselink or Davis datasets.')
171
 
172
+ st.markdown('### Target')
173
  sequence = st.text_input('Enter the amino-acid sequence of the query protein target', value='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA', placeholder='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA')
174
 
175
  if sequence:
176
  col1, col2 = st.columns(2)
177
  with col1:
178
+ st.error('Visualization coming soon...')
 
179
 
180
  with col2:
181
+ st.image('protein_encoder.png')
182
+
183
+ with st.spinner('Encoding in progress...'):
184
+ from bio_embeddings.embed import SeqVecEmbedder
185
+ encoder = SeqVecEmbedder()
 
186
  embeddings = encoder.embed_batch([sequence])
187
+ for emb in embeddings:
188
+ embedding = encoder.reduce_per_protein(emb)
189
+ break
190
+ st.image('protein_encoder_done.png')
191
  st.success('Encoding complete.')
192
 
193
  st.markdown('### Inference')
 
198
  for i in range(100):
199
  time.sleep(0.1)
200
  my_bar.progress(i + 1, text=progress_text)
201
+ my_bar.progress(100, text="HyperPCM predicts the QSAR model for the query protein target. Done.)
202
 
203
  st.markdown('### Retrieval')
204