emmas96 commited on
Commit
35aa55d
·
1 Parent(s): 139050f

test seqvec encoder

Browse files
Files changed (1) hide show
  1. app.py +19 -11
app.py CHANGED
@@ -23,15 +23,13 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
23
 
24
  st.set_page_config(layout="wide")
25
 
26
- st.title('HyperDTI: Robust Task-Conditioned Modeling of Drug-Target Interactions.\n')
27
  st.markdown('')
28
  st.markdown(
29
  """
30
- 🧬 Github: [ml-jku/hyper-dti](https://https://github.com/ml-jku/hyper-dti) 📝 NeurIPS 2022 AI4Science workshop paper: [OpenReview](https://openreview.net/forum?id=dIX34JWnIAL)\n
31
  """
32
  )
33
- #st.error('WARNING! This app is currently under development and should not be used!')
34
- #st.divider()
35
 
36
  def about_page():
37
  st.markdown(
@@ -57,9 +55,9 @@ def about_page():
57
 
58
 
59
  def retrieval():
60
- st.markdown('## Retrieve top-k most active drug compounds')
61
 
62
- st.write('In the furute this page will retrieve the top-k drug compounds that are predicted to have the highest activity toward the given protein target from either the Lenselink or Davis datasets.')
63
 
64
  col1, col2 = st.columns(2)
65
  with col1:
@@ -71,7 +69,7 @@ def retrieval():
71
  with col1:
72
  ex_target = 'YTKMKTATNIYIFNLALADALATSTLPFQSVNYLMGTWPFGTILCKIVISIDYYNMFTSIFTLCTMSVDRYIAVCHPVKALDFRTPRNAKTVNVCNWI'
73
  sequence = st.text_input('Enter amino-acid sequence', value=ex_target, placeholder=ex_target)
74
- if sequence == 'HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA' or sequence == ex_target:
75
  st.image('figures/ex_protein.jpeg', use_column_width='always')
76
  elif sequence:
77
  st.error('Visualization coming soon...')
@@ -84,12 +82,22 @@ def retrieval():
84
  if selected_encoder == 'SeqVec':
85
  st.image('figures/protein_encoder_done.png')
86
  with st.spinner('Encoding in progress...'):
87
- # TODO make SeqVec embedding on the spot
88
-
89
  with open(os.path.join(data_path, f'Lenselink/processed/SeqVec_encoding_test.pickle'), 'rb') as handle:
90
  test_set = pickle.load(handle)
91
- # TODO handle case if sequence not in test set
92
- query_embedding = test_set[sequence]
 
 
 
 
 
 
 
 
 
 
 
93
  st.success('Encoding complete.')
94
  else:
95
  query_embedding = None
 
23
 
24
  st.set_page_config(layout="wide")
25
 
26
+ st.title('HyperDTI: Robust Task-Conditioned Modeling of Drug-Target Interactions\n')
27
  st.markdown('')
28
  st.markdown(
29
  """
30
+ 🧬 Github: [ml-jku/hyper-dti](https://https://github.com/ml-jku/hyper-dti) 📝 NeurIPS 2022 AI4Science workshop paper: [OpenReview](https://openreview.net/forum?id=dIX34JWnIAL) TBA Journal of Chemical Information and Modeling. \n
31
  """
32
  )
 
 
33
 
34
  def about_page():
35
  st.markdown(
 
55
 
56
 
57
  def retrieval():
58
+ st.markdown('## Retrieval of most active drug compounds')
59
 
60
+ st.write('Use HyperPCM to generate a QSAR model for a selected query protein target and retrieve the top-k drug compounds predicted to have the highest activity toward the given protein target from the Lenselink datasets.')
61
 
62
  col1, col2 = st.columns(2)
63
  with col1:
 
69
  with col1:
70
  ex_target = 'YTKMKTATNIYIFNLALADALATSTLPFQSVNYLMGTWPFGTILCKIVISIDYYNMFTSIFTLCTMSVDRYIAVCHPVKALDFRTPRNAKTVNVCNWI'
71
  sequence = st.text_input('Enter amino-acid sequence', value=ex_target, placeholder=ex_target)
72
+ if sequence == 'HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA': # or sequence == ex_target:
73
  st.image('figures/ex_protein.jpeg', use_column_width='always')
74
  elif sequence:
75
  st.error('Visualization coming soon...')
 
82
  if selected_encoder == 'SeqVec':
83
  st.image('figures/protein_encoder_done.png')
84
  with st.spinner('Encoding in progress...'):
85
+
 
86
  with open(os.path.join(data_path, f'Lenselink/processed/SeqVec_encoding_test.pickle'), 'rb') as handle:
87
  test_set = pickle.load(handle)
88
+
89
+ print(sequence in test_set.keys())
90
+ print(sequence in test_set.keys().values)
91
+ if sequence in test_set.keys():
92
+ query_embedding = test_set[sequence]
93
+ else:
94
+ from bio_embeddings.embed import SeqVecEmbedder
95
+ encoder = SeqVecEmbedder()
96
+ embeddings = encoder.embed_batch([sequence])
97
+ for emb in embeddings:
98
+ query_embedding = encoder.reduce_per_protein(emb)
99
+ break
100
+
101
  st.success('Encoding complete.')
102
  else:
103
  query_embedding = None