Spaces:

emmas96
/

hyper-dti

Running

App Files Files Community

emmas96 commited on May 25, 2023

Commit

4f1ea03

1 Parent(s): 52833bb

include example protein structure without prediction

Browse files

Files changed (1) hide show

app.py +58 -43

app.py CHANGED Viewed

@@ -110,7 +110,9 @@ def predict_dti():
         with prot_col1:
             sequence = st.text_input('Enter query amino-acid sequence', value='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA', placeholder='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA')
-            if sequence:
                 st.error('Visualization comming soon...')
         with prot_col2:
@@ -186,53 +188,63 @@ def retrieval():
     col1, col2, col3, col4 = st.columns(4)
     with col2:
         sequence = st.text_input('Enter query amino-acid sequence', value='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA', placeholder='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA')
-        if sequence:
             st.error('Visualization coming soon...')
     with col3:
         if sequence:
-            st.image('figures/protein_encoder_done.png')
-            with st.spinner('Encoding in progress...'):
-                from bio_embeddings.embed import SeqVecEmbedder
-                encoder = SeqVecEmbedder()
-                embeddings = encoder.embed_batch([sequence])
-                for emb in embeddings:
-                    embedding = encoder.reduce_per_protein(emb)
-                    break
-            st.success('Encoding complete.')
-    st.markdown('### Inference')
-    import time
-    progress_text = "HyperPCM predicts the QSAR model for the query protein target. Please wait."
-    my_bar = st.progress(0, text=progress_text)
-    for i in range(100):
-        time.sleep(0.1)
-        my_bar.progress(i + 1, text=progress_text)
-    my_bar.progress(100, text="HyperPCM predicts the QSAR model for the query protein target. Done.")
-    st.markdown('### Retrieval')
-    col1, col2 = st.columns(2)
-    with col1:
-        selected_dataset = st.selectbox(
-                'Select dataset from which the drug compounds should be retrieved',('Lenselink', 'Davis')
-            )
-    with col2:
-        selected_k = st.selectbox(
-                'Select the top-k number of drug compounds to retrieve',(5, 10, 15, 20)
-            )
-    st.write(f'The top-{selected_k} most active drug coupounds from {selected_dataset} predicted by HyperPCM are: ')
-    dummy_smiles = ['CC(=O)OC1=CC=CC=C1C(=O)O', 'COc1cc(C=O)ccc1O', 'CC(=O)Nc1ccc(O)cc1', 'CC(=O)Nc1ccc(OS(=O)(=O)O)cc1', 'CC(=O)Nc1ccc(O[C@@H]2O[C@H](C(=O)O)[C@@H](O)[C@H](O)[C@H]2O)cc1']
-    cols = st.columns(5)
-    for j, col in enumerate(cols):
-        with col:
-            for i in range(int(selected_k/5)):
-                mol = Chem.MolFromSmiles(dummy_smiles[j])
-                mol_img = Chem.Draw.MolToImage(mol)
-                st.image(mol_img)
 def display_protein():
     st.markdown('## Display protein structure')
@@ -242,6 +254,9 @@ def display_protein():
     sequence = st.text_input('Enter the amino-acid sequence of the query protein target', value='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA', placeholder='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA')
     if sequence:
         model = esm.pretrained.esmfold_v1()
         model = model.eval().cuda()

         with prot_col1:
             sequence = st.text_input('Enter query amino-acid sequence', value='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA', placeholder='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA')
+            if sequence == 'HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA':
+                st.image('figures/ex_protein.jpeg')
+            elif sequence:
                 st.error('Visualization comming soon...')
         with prot_col2:
     col1, col2, col3, col4 = st.columns(4)
     with col2:
         sequence = st.text_input('Enter query amino-acid sequence', value='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA', placeholder='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA')
+        if sequence == 'HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA':
+            st.image('figures/ex_protein.jpeg')
+        elif sequence:
             st.error('Visualization coming soon...')
     with col3:
+        selected_encoder = st.selectbox(
+                'Select encoder for protein target',('SeqVec', 'None')
+            )
         if sequence:
+            if selected_encoder == 'SeqVec':
+                st.image('figures/protein_encoder_done.png')
+                with st.spinner('Encoding in progress...'):
+                    from bio_embeddings.embed import SeqVecEmbedder
+                    encoder = SeqVecEmbedder()
+                    embeddings = encoder.embed_batch([sequence])
+                    for emb in embeddings:
+                        prot_embedding = encoder.reduce_per_protein(emb)
+                        break
+                st.success('Encoding complete.')
+            else:
+                prot_embedding = None
+                st.image('figures/protein_encoder.png')
+                st.warning('Choose encoder above...')
+    if prot_embedding is not None:
+        st.markdown('### Inference')
+        import time
+        progress_text = "HyperPCM predicts the QSAR model for the query protein target. Please wait."
+        my_bar = st.progress(0, text=progress_text)
+        for i in range(100):
+            time.sleep(0.1)
+            my_bar.progress(i + 1, text=progress_text)
+        my_bar.progress(100, text="HyperPCM predicts the QSAR model for the query protein target. Done.")
+        st.markdown('### Retrieval')
+        col1, col2 = st.columns(2)
+        with col1:
+            selected_dataset = st.selectbox(
+                    'Select dataset from which the drug compounds should be retrieved',('Lenselink', 'Davis')
+                )
+        with col2:
+            selected_k = st.selectbox(
+                    'Select the top-k number of drug compounds to retrieve',(5, 10, 15, 20)
+                )
+        st.write(f'The top-{selected_k} most active drug coupounds from {selected_dataset} predicted by HyperPCM are: ')
+        dummy_smiles = ['CC(=O)OC1=CC=CC=C1C(=O)O', 'COc1cc(C=O)ccc1O', 'CC(=O)Nc1ccc(O)cc1', 'CC(=O)Nc1ccc(OS(=O)(=O)O)cc1', 'CC(=O)Nc1ccc(O[C@@H]2O[C@H](C(=O)O)[C@@H](O)[C@H](O)[C@H]2O)cc1']
+        cols = st.columns(5)
+        for j, col in enumerate(cols):
+            with col:
+                for i in range(int(selected_k/5)):
+                    mol = Chem.MolFromSmiles(dummy_smiles[j])
+                    mol_img = Chem.Draw.MolToImage(mol)
+                    st.image(mol_img)
 def display_protein():
     st.markdown('## Display protein structure')
     sequence = st.text_input('Enter the amino-acid sequence of the query protein target', value='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA', placeholder='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA')
     if sequence:
+        st.image('figures/ex_protein.jpeg')
         model = esm.pretrained.esmfold_v1()
         model = model.eval().cuda()