Spaces:

emmas96
/

hyper-dti

Running

App Files Files Community

emmas96 commited on May 24, 2023

Commit

94a9538

1 Parent(s): 3c1ebe4

update interface to choose target in retrieval

Browse files

Files changed (1) hide show

app.py +24 -42

app.py CHANGED Viewed

@@ -144,28 +144,28 @@ def retrieval():
     st.write('In the furute this page will retrieve the top-k drug compounds that are predicted to have the highest activity toward the given protein target from either the Lenselink or Davis datasets.')
-    st.markdown('### Target')
     sequence = st.text_input('Enter the amino-acid sequence of the query protein target', value='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA', placeholder='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA')
     if sequence:
-        st.markdown('\n\n\n\n Plot of protein to be added soon. \n\n\n\n')
-        selected_encoder = st.selectbox(
-            'Select encoder for protein target',('None', 'SeqVec')
-        )
-        if selected_encoder == 'SeqVec':
             from bio_embeddings.embed import SeqVecEmbedder
             encoder = SeqVecEmbedder()
             embeddings = encoder.embed_batch([sequence])
             for emb in embeddings:
                 embedding = encoder.reduce_per_protein(emb)
                 break
-        else:
-            st.write('Only SeqVec is currently available to encode protein structures.')
-            embedding = None
-        if embedding is not None:
-            st.write(f'{selected_encoder} embedding')
             st.write(embedding)
     st.markdown('### Retrieval')
     st.write('TODO HyperPCM predicts the QSAR model for the given protein target.')
@@ -181,37 +181,19 @@ def retrieval():
             )
     st.write(f'The top-{selected_k} most active drug coupounds from {selected_dataset} predicted by HyperPCM are: ')
-    col1, col2, col3, col4, col5 = st.columns(5)
-    with col1:
-        smiles = 'CC(=O)OC1=CC=CC=C1C(=O)O'
-        mol = Chem.MolFromSmiles(smiles)
-        mol_img = Chem.Draw.MolToImage(mol)
-        st.image(mol_img)
-    with col2:
-        smiles = 'COc1cc(C=O)ccc1O'
-        mol = Chem.MolFromSmiles(smiles)
-        mol_img = Chem.Draw.MolToImage(mol)
-        st.image(mol_img)
-    with col3:
-        smiles = 'CC(=O)Nc1ccc(O)cc1'
-        mol = Chem.MolFromSmiles(smiles)
-        mol_img = Chem.Draw.MolToImage(mol)
-        st.image(mol_img)
-    with col4:
-        smiles = 'CC(=O)Nc1ccc(OS(=O)(=O)O)cc1'
-        mol = Chem.MolFromSmiles(smiles)
-        mol_img = Chem.Draw.MolToImage(mol)
-        st.image(mol_img)
-    with col5:
-        smiles = 'CC(=O)Nc1ccc(O[C@@H]2O[C@H](C(=O)O)[C@@H](O)[C@H](O)[C@H]2O)cc1'
-        mol = Chem.MolFromSmiles(smiles)
-        mol_img = Chem.Draw.MolToImage(mol)
-        st.image(mol_img)
 def display_protein():
     st.markdown('## Display protein')

     st.write('In the furute this page will retrieve the top-k drug compounds that are predicted to have the highest activity toward the given protein target from either the Lenselink or Davis datasets.')
+    st.markdown('### Choose protein target')
     sequence = st.text_input('Enter the amino-acid sequence of the query protein target', value='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA', placeholder='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA')
     if sequence:
+        col1, col2, col3 = st.columns(3)
+        with col1:
+            st.markdown('\n\n\n\n Plot of protein to be added soon. \n\n\n\n')
+        with col2:
+            st.write('Encoding with SecVec')
+            st.image('protein_encoder.png')
             from bio_embeddings.embed import SeqVecEmbedder
             encoder = SeqVecEmbedder()
             embeddings = encoder.embed_batch([sequence])
             for emb in embeddings:
                 embedding = encoder.reduce_per_protein(emb)
                 break
+        with col3:
+            st.write(f'SeqVec embedding')
             st.write(embedding)
+            st.write(np.transpose(embedding))
     st.markdown('### Retrieval')
     st.write('TODO HyperPCM predicts the QSAR model for the given protein target.')
             )
     st.write(f'The top-{selected_k} most active drug coupounds from {selected_dataset} predicted by HyperPCM are: ')
+    dummy_smiles = [['CC(=O)OC1=CC=CC=C1C(=O)O', 'COc1cc(C=O)ccc1O', 'CC(=O)Nc1ccc(O)cc1', 'CC(=O)Nc1ccc(OS(=O)(=O)O)cc1',
+                    'CC(=O)Nc1ccc(O[C@@H]2O[C@H](C(=O)O)[C@@H](O)[C@H](O)[C@H]2O)cc1'], ['CC(=O)OC1=CC=CC=C1C(=O)O', 'COc1cc(C=O)ccc1O', 'CC(=O)Nc1ccc(O)cc1',
+                    'CC(=O)Nc1ccc(OS(=O)(=O)O)cc1', 'CC(=O)Nc1ccc(O[C@@H]2O[C@H](C(=O)O)[C@@H](O)[C@H](O)[C@H]2O)cc1'], ['CC(=O)OC1=CC=CC=C1C(=O)O',
+                    'COc1cc(C=O)ccc1O', 'CC(=O)Nc1ccc(O)cc1', 'CC(=O)Nc1ccc(OS(=O)(=O)O)cc1', 'CC(=O)Nc1ccc(O[C@@H]2O[C@H](C(=O)O)[C@@H](O)[C@H](O)[C@H]2O)cc1'],
+                    ['CC(=O)OC1=CC=CC=C1C(=O)O', 'COc1cc(C=O)ccc1O', 'CC(=O)Nc1ccc(O)cc1', 'CC(=O)Nc1ccc(OS(=O)(=O)O)cc1',
+                    'CC(=O)Nc1ccc(O[C@@H]2O[C@H](C(=O)O)[C@@H](O)[C@H](O)[C@H]2O)cc1']]
+    cols = st.columns(5)
+    for j, col in enumerate(cols):
+        with cols:
+            for i in range(int(selected_k/5)):
+                mol = Chem.MolFromSmiles(dummy_smiles[i,j])
+                mol_img = Chem.Draw.MolToImage(mol)
+                st.image(mol_img)
 def display_protein():
     st.markdown('## Display protein')