update illustrations of respective encoders
Browse files
app.py
CHANGED
@@ -56,85 +56,98 @@ def predict_dti():
|
|
56 |
|
57 |
with col1:
|
58 |
st.markdown('### Drug')
|
59 |
-
smiles = st.text_input('Enter the SMILES of the query drug compound', value='CC(=O)OC1=CC=CC=C1C(=O)O', placeholder='CC(=O)OC1=CC=CC=C1C(=O)O')
|
60 |
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
st.
|
65 |
-
|
|
|
|
|
|
|
|
|
|
|
66 |
selected_encoder = st.selectbox(
|
67 |
'Select encoder for drug compound',('None', 'CDDD', 'MolBERT')
|
68 |
)
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
|
|
|
|
94 |
|
95 |
with col2:
|
96 |
st.markdown('### Target')
|
97 |
-
|
|
|
|
|
|
|
|
|
98 |
|
99 |
-
|
100 |
-
|
101 |
-
|
|
|
102 |
selected_encoder = st.selectbox(
|
103 |
'Select encoder for protein target',('None', 'SeqVec', 'UniRep', 'ESM-1b', 'ProtT5')
|
104 |
)
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
|
|
|
|
138 |
|
139 |
st.write('TODO run inference with HyperPCM on the given drug compound and protein target.')
|
140 |
|
@@ -148,7 +161,7 @@ def retrieval():
|
|
148 |
sequence = st.text_input('Enter the amino-acid sequence of the query protein target', value='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA', placeholder='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA')
|
149 |
|
150 |
if sequence:
|
151 |
-
col1, col2
|
152 |
with col1:
|
153 |
st.markdown('\n\n\n\n Plot of protein to be added soon. \n\n\n\n')
|
154 |
|
@@ -162,8 +175,7 @@ def retrieval():
|
|
162 |
for emb in embeddings:
|
163 |
embedding = encoder.reduce_per_protein(emb)
|
164 |
break
|
165 |
-
|
166 |
-
st.write(f'SeqVec embedding')
|
167 |
st.write(embedding)
|
168 |
st.write(np.transpose(embedding))
|
169 |
|
|
|
56 |
|
57 |
with col1:
|
58 |
st.markdown('### Drug')
|
|
|
59 |
|
60 |
+
mol_col1, mol_col2 = st.columns(2)
|
61 |
+
|
62 |
+
with mol_col1:
|
63 |
+
smiles = st.text_input('Enter the SMILES of the query drug compound', value='CC(=O)OC1=CC=CC=C1C(=O)O', placeholder='CC(=O)OC1=CC=CC=C1C(=O)O')
|
64 |
+
if smiles:
|
65 |
+
mol = Chem.MolFromSmiles(smiles)
|
66 |
+
mol_img = Chem.Draw.MolToImage(mol)
|
67 |
+
st.image(mol_img) #, width = 140)
|
68 |
+
|
69 |
+
with mol_col2:
|
70 |
selected_encoder = st.selectbox(
|
71 |
'Select encoder for drug compound',('None', 'CDDD', 'MolBERT')
|
72 |
)
|
73 |
+
st.image('molecule_encoder.png')
|
74 |
+
if smiles:
|
75 |
+
if selected_encoder == 'CDDD':
|
76 |
+
from cddd.inference import InferenceModel
|
77 |
+
CDDD_MODEL_DIR = 'src/encoders/cddd'
|
78 |
+
cddd_model = InferenceModel(CDDD_MODEL_DIR)
|
79 |
+
embedding = cddd_model.seq_to_emb([smiles])
|
80 |
+
#from huggingface_hub import hf_hub_download
|
81 |
+
#precomputed_embs = f'{selected_encoder}_encoding.csv'
|
82 |
+
#REPO_ID = "emmas96/Lenselink"
|
83 |
+
#embs_path = hf_hub_download(REPO_ID, precomputed_embs)
|
84 |
+
#embs = pd.read_csv(embs_path)
|
85 |
+
#embedding = embs[smiles]
|
86 |
+
elif selected_encoder == 'MolBERT':
|
87 |
+
from molbert.utils.featurizer.molbert_featurizer import MolBertFeaturizer
|
88 |
+
from huggingface_hub import hf_hub_download
|
89 |
+
CDDD_MODEL_DIR = 'encoders/molbert/last.ckpt'
|
90 |
+
REPO_ID = "emmas96/hyperpcm"
|
91 |
+
checkpoint_path = hf_hub_download(REPO_ID, MOLBERT_MODEL_DIR)
|
92 |
+
molbert_model = MolBertFeaturizer(checkpoint_path, max_seq_len=500, embedding_type='average-1-cat-pooled')
|
93 |
+
embedding = molbert_model.transform([smiles])
|
94 |
+
else:
|
95 |
+
st.write('No pre-trained version of HyperPCM is available for the chosen encoder.')
|
96 |
+
embedding = None
|
97 |
+
if embedding is not None:
|
98 |
+
st.write(f'{selected_encoder} embedding')
|
99 |
+
st.write(embedding)
|
100 |
|
101 |
with col2:
|
102 |
st.markdown('### Target')
|
103 |
+
|
104 |
+
prot_col1, prot_col2 = st.columns(2)
|
105 |
+
|
106 |
+
with prot_col1:
|
107 |
+
sequence = st.text_input('Enter the amino-acid sequence of the query protein target', value='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA', placeholder='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA')
|
108 |
|
109 |
+
if sequence:
|
110 |
+
st.markdown('\n\n\n\n Plot of protein to be added soon. \n\n\n\n')
|
111 |
+
|
112 |
+
with prot_col2:
|
113 |
selected_encoder = st.selectbox(
|
114 |
'Select encoder for protein target',('None', 'SeqVec', 'UniRep', 'ESM-1b', 'ProtT5')
|
115 |
)
|
116 |
+
st.image('protein_encoder.png')
|
117 |
+
if sequence:
|
118 |
+
if selected_encoder == 'SeqVec':
|
119 |
+
from bio_embeddings.embed import SeqVecEmbedder
|
120 |
+
encoder = SeqVecEmbedder()
|
121 |
+
embeddings = encoder.embed_batch([sequence])
|
122 |
+
for emb in embeddings:
|
123 |
+
embedding = encoder.reduce_per_protein(emb)
|
124 |
+
break
|
125 |
+
elif selected_encoder == 'UniRep':
|
126 |
+
from jax_unirep.utils import load_params
|
127 |
+
params = load_params()
|
128 |
+
from jax_unirep.featurize import get_reps
|
129 |
+
embedding, h_final, c_final = get_reps([sequence])
|
130 |
+
embedding = embedding.mean(axis=0)
|
131 |
+
elif selected_encoder == 'ESM-1b':
|
132 |
+
from bio_embeddings.embed import ESM1bEmbedder
|
133 |
+
encoder = ESM1bEmbedder()
|
134 |
+
embeddings = encoder.embed_batch([sequence])
|
135 |
+
for emb in embeddings:
|
136 |
+
embedding = encoder.reduce_per_protein(emb)
|
137 |
+
break
|
138 |
+
elif selected_encoder == 'ProtT5':
|
139 |
+
from bio_embeddings.embed import ProtTransT5XLU50Embedder
|
140 |
+
encoder = ProtTransT5XLU50Embedder()
|
141 |
+
embeddings = encoder.embed_batch([sequence])
|
142 |
+
for emb in embeddings:
|
143 |
+
embedding = encoder.reduce_per_protein(emb)
|
144 |
+
break
|
145 |
+
else:
|
146 |
+
st.write('No pre-trained version of HyperPCM is available for the chosen encoder.')
|
147 |
+
embedding = None
|
148 |
+
if embedding is not None:
|
149 |
+
st.write(f'{selected_encoder} embedding')
|
150 |
+
st.write(embedding)
|
151 |
|
152 |
st.write('TODO run inference with HyperPCM on the given drug compound and protein target.')
|
153 |
|
|
|
161 |
sequence = st.text_input('Enter the amino-acid sequence of the query protein target', value='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA', placeholder='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA')
|
162 |
|
163 |
if sequence:
|
164 |
+
col1, col2 = st.columns(2)
|
165 |
with col1:
|
166 |
st.markdown('\n\n\n\n Plot of protein to be added soon. \n\n\n\n')
|
167 |
|
|
|
175 |
for emb in embeddings:
|
176 |
embedding = encoder.reduce_per_protein(emb)
|
177 |
break
|
178 |
+
st.write('SeqVec embedding')
|
|
|
179 |
st.write(embedding)
|
180 |
st.write(np.transpose(embedding))
|
181 |
|