emmas96 commited on
Commit
65c318c
·
1 Parent(s): 99a9694

update illustrations of respective encoders

Browse files
Files changed (1) hide show
  1. app.py +83 -71
app.py CHANGED
@@ -56,85 +56,98 @@ def predict_dti():
56
 
57
  with col1:
58
  st.markdown('### Drug')
59
- smiles = st.text_input('Enter the SMILES of the query drug compound', value='CC(=O)OC1=CC=CC=C1C(=O)O', placeholder='CC(=O)OC1=CC=CC=C1C(=O)O')
60
 
61
- if smiles:
62
- mol = Chem.MolFromSmiles(smiles)
63
- mol_img = Chem.Draw.MolToImage(mol)
64
- st.image(mol_img) #, width = 140)
65
-
 
 
 
 
 
66
  selected_encoder = st.selectbox(
67
  'Select encoder for drug compound',('None', 'CDDD', 'MolBERT')
68
  )
69
- if selected_encoder == 'CDDD':
70
- from cddd.inference import InferenceModel
71
- CDDD_MODEL_DIR = 'src/encoders/cddd'
72
- cddd_model = InferenceModel(CDDD_MODEL_DIR)
73
- embedding = cddd_model.seq_to_emb([smiles])
74
- #from huggingface_hub import hf_hub_download
75
- #precomputed_embs = f'{selected_encoder}_encoding.csv'
76
- #REPO_ID = "emmas96/Lenselink"
77
- #embs_path = hf_hub_download(REPO_ID, precomputed_embs)
78
- #embs = pd.read_csv(embs_path)
79
- #embedding = embs[smiles]
80
- elif selected_encoder == 'MolBERT':
81
- from molbert.utils.featurizer.molbert_featurizer import MolBertFeaturizer
82
- from huggingface_hub import hf_hub_download
83
- CDDD_MODEL_DIR = 'encoders/molbert/last.ckpt'
84
- REPO_ID = "emmas96/hyperpcm"
85
- checkpoint_path = hf_hub_download(REPO_ID, MOLBERT_MODEL_DIR)
86
- molbert_model = MolBertFeaturizer(checkpoint_path, max_seq_len=500, embedding_type='average-1-cat-pooled')
87
- embedding = molbert_model.transform([smiles])
88
- else:
89
- st.write('No pre-trained version of HyperPCM is available for the chosen encoder.')
90
- embedding = None
91
- if embedding is not None:
92
- st.write(f'{selected_encoder} embedding')
93
- st.write(embedding)
 
 
94
 
95
  with col2:
96
  st.markdown('### Target')
97
- sequence = st.text_input('Enter the amino-acid sequence of the query protein target', value='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA', placeholder='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA')
 
 
 
 
98
 
99
- if sequence:
100
- st.markdown('\n\n\n\n Plot of protein to be added soon. \n\n\n\n')
101
-
 
102
  selected_encoder = st.selectbox(
103
  'Select encoder for protein target',('None', 'SeqVec', 'UniRep', 'ESM-1b', 'ProtT5')
104
  )
105
- if selected_encoder == 'SeqVec':
106
- from bio_embeddings.embed import SeqVecEmbedder
107
- encoder = SeqVecEmbedder()
108
- embeddings = encoder.embed_batch([sequence])
109
- for emb in embeddings:
110
- embedding = encoder.reduce_per_protein(emb)
111
- break
112
- elif selected_encoder == 'UniRep':
113
- from jax_unirep.utils import load_params
114
- params = load_params()
115
- from jax_unirep.featurize import get_reps
116
- embedding, h_final, c_final = get_reps([sequence])
117
- embedding = embedding.mean(axis=0)
118
- elif selected_encoder == 'ESM-1b':
119
- from bio_embeddings.embed import ESM1bEmbedder
120
- encoder = ESM1bEmbedder()
121
- embeddings = encoder.embed_batch([sequence])
122
- for emb in embeddings:
123
- embedding = encoder.reduce_per_protein(emb)
124
- break
125
- elif selected_encoder == 'ProtT5':
126
- from bio_embeddings.embed import ProtTransT5XLU50Embedder
127
- encoder = ProtTransT5XLU50Embedder()
128
- embeddings = encoder.embed_batch([sequence])
129
- for emb in embeddings:
130
- embedding = encoder.reduce_per_protein(emb)
131
- break
132
- else:
133
- st.write('No pre-trained version of HyperPCM is available for the chosen encoder.')
134
- embedding = None
135
- if embedding is not None:
136
- st.write(f'{selected_encoder} embedding')
137
- st.write(embedding)
 
 
138
 
139
  st.write('TODO run inference with HyperPCM on the given drug compound and protein target.')
140
 
@@ -148,7 +161,7 @@ def retrieval():
148
  sequence = st.text_input('Enter the amino-acid sequence of the query protein target', value='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA', placeholder='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA')
149
 
150
  if sequence:
151
- col1, col2, col3 = st.columns(3)
152
  with col1:
153
  st.markdown('\n\n\n\n Plot of protein to be added soon. \n\n\n\n')
154
 
@@ -162,8 +175,7 @@ def retrieval():
162
  for emb in embeddings:
163
  embedding = encoder.reduce_per_protein(emb)
164
  break
165
- with col3:
166
- st.write(f'SeqVec embedding')
167
  st.write(embedding)
168
  st.write(np.transpose(embedding))
169
 
 
56
 
57
  with col1:
58
  st.markdown('### Drug')
 
59
 
60
+ mol_col1, mol_col2 = st.columns(2)
61
+
62
+ with mol_col1:
63
+ smiles = st.text_input('Enter the SMILES of the query drug compound', value='CC(=O)OC1=CC=CC=C1C(=O)O', placeholder='CC(=O)OC1=CC=CC=C1C(=O)O')
64
+ if smiles:
65
+ mol = Chem.MolFromSmiles(smiles)
66
+ mol_img = Chem.Draw.MolToImage(mol)
67
+ st.image(mol_img) #, width = 140)
68
+
69
+ with mol_col2:
70
  selected_encoder = st.selectbox(
71
  'Select encoder for drug compound',('None', 'CDDD', 'MolBERT')
72
  )
73
+ st.image('molecule_encoder.png')
74
+ if smiles:
75
+ if selected_encoder == 'CDDD':
76
+ from cddd.inference import InferenceModel
77
+ CDDD_MODEL_DIR = 'src/encoders/cddd'
78
+ cddd_model = InferenceModel(CDDD_MODEL_DIR)
79
+ embedding = cddd_model.seq_to_emb([smiles])
80
+ #from huggingface_hub import hf_hub_download
81
+ #precomputed_embs = f'{selected_encoder}_encoding.csv'
82
+ #REPO_ID = "emmas96/Lenselink"
83
+ #embs_path = hf_hub_download(REPO_ID, precomputed_embs)
84
+ #embs = pd.read_csv(embs_path)
85
+ #embedding = embs[smiles]
86
+ elif selected_encoder == 'MolBERT':
87
+ from molbert.utils.featurizer.molbert_featurizer import MolBertFeaturizer
88
+ from huggingface_hub import hf_hub_download
89
+ CDDD_MODEL_DIR = 'encoders/molbert/last.ckpt'
90
+ REPO_ID = "emmas96/hyperpcm"
91
+ checkpoint_path = hf_hub_download(REPO_ID, MOLBERT_MODEL_DIR)
92
+ molbert_model = MolBertFeaturizer(checkpoint_path, max_seq_len=500, embedding_type='average-1-cat-pooled')
93
+ embedding = molbert_model.transform([smiles])
94
+ else:
95
+ st.write('No pre-trained version of HyperPCM is available for the chosen encoder.')
96
+ embedding = None
97
+ if embedding is not None:
98
+ st.write(f'{selected_encoder} embedding')
99
+ st.write(embedding)
100
 
101
  with col2:
102
  st.markdown('### Target')
103
+
104
+ prot_col1, prot_col2 = st.columns(2)
105
+
106
+ with prot_col1:
107
+ sequence = st.text_input('Enter the amino-acid sequence of the query protein target', value='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA', placeholder='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA')
108
 
109
+ if sequence:
110
+ st.markdown('\n\n\n\n Plot of protein to be added soon. \n\n\n\n')
111
+
112
+ with prot_col2:
113
  selected_encoder = st.selectbox(
114
  'Select encoder for protein target',('None', 'SeqVec', 'UniRep', 'ESM-1b', 'ProtT5')
115
  )
116
+ st.image('protein_encoder.png')
117
+ if sequence:
118
+ if selected_encoder == 'SeqVec':
119
+ from bio_embeddings.embed import SeqVecEmbedder
120
+ encoder = SeqVecEmbedder()
121
+ embeddings = encoder.embed_batch([sequence])
122
+ for emb in embeddings:
123
+ embedding = encoder.reduce_per_protein(emb)
124
+ break
125
+ elif selected_encoder == 'UniRep':
126
+ from jax_unirep.utils import load_params
127
+ params = load_params()
128
+ from jax_unirep.featurize import get_reps
129
+ embedding, h_final, c_final = get_reps([sequence])
130
+ embedding = embedding.mean(axis=0)
131
+ elif selected_encoder == 'ESM-1b':
132
+ from bio_embeddings.embed import ESM1bEmbedder
133
+ encoder = ESM1bEmbedder()
134
+ embeddings = encoder.embed_batch([sequence])
135
+ for emb in embeddings:
136
+ embedding = encoder.reduce_per_protein(emb)
137
+ break
138
+ elif selected_encoder == 'ProtT5':
139
+ from bio_embeddings.embed import ProtTransT5XLU50Embedder
140
+ encoder = ProtTransT5XLU50Embedder()
141
+ embeddings = encoder.embed_batch([sequence])
142
+ for emb in embeddings:
143
+ embedding = encoder.reduce_per_protein(emb)
144
+ break
145
+ else:
146
+ st.write('No pre-trained version of HyperPCM is available for the chosen encoder.')
147
+ embedding = None
148
+ if embedding is not None:
149
+ st.write(f'{selected_encoder} embedding')
150
+ st.write(embedding)
151
 
152
  st.write('TODO run inference with HyperPCM on the given drug compound and protein target.')
153
 
 
161
  sequence = st.text_input('Enter the amino-acid sequence of the query protein target', value='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA', placeholder='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA')
162
 
163
  if sequence:
164
+ col1, col2 = st.columns(2)
165
  with col1:
166
  st.markdown('\n\n\n\n Plot of protein to be added soon. \n\n\n\n')
167
 
 
175
  for emb in embeddings:
176
  embedding = encoder.reduce_per_protein(emb)
177
  break
178
+ st.write('SeqVec embedding')
 
179
  st.write(embedding)
180
  st.write(np.transpose(embedding))
181