Spaces:

emmas96
/

hyper-dti

Running

App Files Files Community

emmas96 commited on Dec 8, 2023

Commit

35aa55d

1 Parent(s): 139050f

test seqvec encoder

Browse files

Files changed (1) hide show

app.py +19 -11

app.py CHANGED Viewed

@@ -23,15 +23,13 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 st.set_page_config(layout="wide")
-st.title('HyperDTI: Robust Task-Conditioned Modeling of Drug-Target Interactions.\n')
 st.markdown('')
 st.markdown(
     """
-    🧬 Github: [ml-jku/hyper-dti](https://https://github.com/ml-jku/hyper-dti)    📝 NeurIPS 2022 AI4Science workshop paper: [OpenReview](https://openreview.net/forum?id=dIX34JWnIAL)\n
     """
 )
-#st.error('WARNING! This app is currently under development and should not be used!')
-#st.divider()
 def about_page():
     st.markdown(
@@ -57,9 +55,9 @@ def about_page():
 def retrieval():
-    st.markdown('## Retrieve top-k most active drug compounds')
-    st.write('In the furute this page will retrieve the top-k drug compounds that are predicted to have the highest activity toward the given protein target from either the Lenselink or Davis datasets.')
     col1, col2 = st.columns(2)
     with col1:
@@ -71,7 +69,7 @@ def retrieval():
     with col1:
         ex_target = 'YTKMKTATNIYIFNLALADALATSTLPFQSVNYLMGTWPFGTILCKIVISIDYYNMFTSIFTLCTMSVDRYIAVCHPVKALDFRTPRNAKTVNVCNWI'
         sequence = st.text_input('Enter amino-acid sequence', value=ex_target, placeholder=ex_target)
-        if sequence == 'HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA' or sequence == ex_target:
             st.image('figures/ex_protein.jpeg', use_column_width='always')
         elif sequence:
             st.error('Visualization coming soon...')
@@ -84,12 +82,22 @@ def retrieval():
             if selected_encoder == 'SeqVec':
                 st.image('figures/protein_encoder_done.png')
                 with st.spinner('Encoding in progress...'):
-                    # TODO make SeqVec embedding on the spot
                     with open(os.path.join(data_path, f'Lenselink/processed/SeqVec_encoding_test.pickle'), 'rb') as handle:
                         test_set = pickle.load(handle)
-                    # TODO handle case if sequence not in test set
-                    query_embedding = test_set[sequence]
                 st.success('Encoding complete.')
             else:
                 query_embedding = None

 st.set_page_config(layout="wide")
+st.title('HyperDTI: Robust Task-Conditioned Modeling of Drug-Target Interactions\n')
 st.markdown('')
 st.markdown(
     """
+    🧬 Github: [ml-jku/hyper-dti](https://https://github.com/ml-jku/hyper-dti)    📝 NeurIPS 2022 AI4Science workshop paper: [OpenReview](https://openreview.net/forum?id=dIX34JWnIAL) TBA Journal of Chemical Information and Modeling. \n
     """
 )
 def about_page():
     st.markdown(
 def retrieval():
+    st.markdown('## Retrieval of most active drug compounds')
+    st.write('Use HyperPCM to generate a QSAR model for a selected query protein target and retrieve the top-k drug compounds predicted to have the highest activity toward the given protein target from the Lenselink datasets.')
     col1, col2 = st.columns(2)
     with col1:
     with col1:
         ex_target = 'YTKMKTATNIYIFNLALADALATSTLPFQSVNYLMGTWPFGTILCKIVISIDYYNMFTSIFTLCTMSVDRYIAVCHPVKALDFRTPRNAKTVNVCNWI'
         sequence = st.text_input('Enter amino-acid sequence', value=ex_target, placeholder=ex_target)
+        if sequence == 'HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA': # or sequence == ex_target:
             st.image('figures/ex_protein.jpeg', use_column_width='always')
         elif sequence:
             st.error('Visualization coming soon...')
             if selected_encoder == 'SeqVec':
                 st.image('figures/protein_encoder_done.png')
                 with st.spinner('Encoding in progress...'):
                     with open(os.path.join(data_path, f'Lenselink/processed/SeqVec_encoding_test.pickle'), 'rb') as handle:
                         test_set = pickle.load(handle)
+                    print(sequence in test_set.keys())
+                    print(sequence in test_set.keys().values)
+                    if sequence in test_set.keys():
+                        query_embedding = test_set[sequence]
+                    else:
+                        from bio_embeddings.embed import SeqVecEmbedder
+                        encoder = SeqVecEmbedder()
+                        embeddings = encoder.embed_batch([sequence])
+                        for emb in embeddings:
+                            query_embedding = encoder.reduce_per_protein(emb)
+                            break
                 st.success('Encoding complete.')
             else:
                 query_embedding = None