Accelernate commited on
Commit
854a59f
·
verified ·
1 Parent(s): 9d19be8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -34
app.py CHANGED
@@ -1,4 +1,4 @@
1
- import streamlit as st
2
  from stmol import showmol
3
  import py3Dmol
4
  import requests
@@ -14,9 +14,39 @@ import urllib.parse
14
 
15
  urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
16
 
 
17
  st.set_page_config(layout='wide')
18
- st.sidebar.title('🔮 GenPro2 Protein Generator, Structure Predictor, and Analysis Tool')
19
- st.sidebar.write('GenPro2 is an end-to-end protein sequence generator, structure predictor, and analysis tool based [*ESMFold*](https://esmatlas.com/about) and the ESM-2 language model.')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  def generate_sequence_from_words(words, length):
22
  seed = ' '.join(words).encode('utf-8')
@@ -64,11 +94,10 @@ def perform_blast_analysis(sequence):
64
  st.write(f"**Organism Code:** {organism}")
65
  st.write(f"**Sequence Identity:** {identity_percentage:.2f}%")
66
 
67
-
68
  # Fetch protein function (if available)
69
  if hasattr(alignment, 'description') and alignment.description:
70
  st.write(f"**Potential Function:** {alignment.description}")
71
-
72
  st.write("No significant matches found. This might be a unique protein sequence!")
73
  except Exception as e:
74
  st.error(f"An error occurred during protein analysis: {str(e)}")
@@ -109,7 +138,7 @@ def update(sequence, word1, word2, word3, sequence_length):
109
  st.write("Please try again later or contact support if the issue persists.")
110
 
111
  def share_on_twitter(word1, word2, word3, length, plddt):
112
- tweet_text = f"I just generated a new protein using #GenPro2 from the seed-words '{word1}', '{word2}', and '{word3}' + sequence length {length}! It has a predicted protein score of: {plddt}%."
113
  tweet_url = f"https://twitter.com/intent/tweet?text={urllib.parse.quote(tweet_text)}"
114
  return tweet_url
115
 
@@ -121,25 +150,29 @@ if 'show_analyze_button' not in st.session_state:
121
  if 'structure_info' not in st.session_state:
122
  st.session_state.structure_info = None
123
 
124
- st.title("Word-Seeded Protein Sequence Generator, Structure Predictor, and Analysis Tool")
 
 
 
 
 
 
 
 
125
 
126
- st.sidebar.subheader("Generate Sequence from Words")
127
- word1 = st.sidebar.text_input("Word 1")
128
- word2 = st.sidebar.text_input("Word 2")
129
- word3 = st.sidebar.text_input("Word 3")
130
- sequence_length = st.sidebar.number_input("Sequence Length", min_value=50, max_value=400, value=100, step=10)
131
 
132
- if st.sidebar.button('Generate and Predict'):
133
  if word1 and word2 and word3:
134
  sequence = generate_sequence_from_words([word1, word2, word3], sequence_length)
135
  st.session_state.sequence = sequence
136
- st.sidebar.text_area("Generated Sequence", sequence, height=100)
137
- st.sidebar.info("Note: The same words and sequence length will always produce the same sequence.")
138
 
139
  with st.spinner("Predicting protein structure... This may take a few minutes."):
140
  update(sequence, word1, word2, word3, sequence_length)
141
  else:
142
- st.sidebar.warning("Please enter all three words to generate a sequence.")
143
 
144
  # Display structure information if available
145
  if st.session_state.structure_info:
@@ -148,14 +181,14 @@ if st.session_state.structure_info:
148
  render_mol(info['pdb_string'])
149
 
150
  st.subheader('plDDT Confidence Score')
151
- st.write('plDDT is a bench mark for scoring the confidence level in protein folding prediction based on a scale from 0-100%. 70% or more is really good!')
152
  plddt_score = int(info["b_value"] * 100)
153
- st.info(f'Average plDDT: {plddt_score}%')
154
 
155
  st.subheader("Share your unique protein on X(Twitter)")
156
 
157
  st.markdown("""
158
- <div style='background-color: #e6f2ff; padding: 10px; border-radius: 5px; font-size: 0.8em;'>
159
  <ol>
160
  <li>Take a screenshot of the protein structure above.</li>
161
  <li>Click the 'Share on X' button below to open a pre-filled post with your protein seed-words and score.</li>
@@ -180,18 +213,3 @@ if st.session_state.structure_info:
180
  **Remember, this folding is based on randomly generated sequences. Interpret the results with caution.
181
  Enjoy exploring the world of protein sequences!
182
  """)
183
-
184
-
185
- col1, col2 = st.columns(2)
186
- with col1:
187
- if st.button('Analyze Protein'):
188
- perform_blast_analysis(st.session_state.sequence)
189
-
190
- with col2:
191
- st.download_button(
192
- label="Download PDB",
193
- data=info['pdb_string'],
194
- file_name='predicted.pdb',
195
- mime='text/plain',
196
- )
197
-
 
1
+ import streamlit as st
2
  from stmol import showmol
3
  import py3Dmol
4
  import requests
 
14
 
15
  urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
16
 
17
+ # Set page config and apply dark theme
18
  st.set_page_config(layout='wide')
19
+ st.markdown("""
20
+ <style>
21
+ body {
22
+ color: #fff;
23
+ background-color: #0e1117;
24
+ }
25
+ .stApp {
26
+ background-color: #0e1117;
27
+ }
28
+ .stTextInput > div > div > input {
29
+ color: #fff;
30
+ background-color: #262730;
31
+ }
32
+ .stNumberInput > div > div > input {
33
+ color: #fff;
34
+ background-color: #262730;
35
+ }
36
+ .stTextArea > div > div > textarea {
37
+ color: #fff;
38
+ background-color: #262730;
39
+ }
40
+ .stButton > button {
41
+ color: #fff;
42
+ background-color: #0e1117;
43
+ border: 1px solid #fff;
44
+ }
45
+ </style>
46
+ """, unsafe_allow_html=True)
47
+
48
+ st.title('🔮 GenPro2 Protein Generator, Structure Predictor, and Analysis Tool')
49
+ st.write('GenPro2 is an end-to-end protein sequence generator, structure predictor, and analysis tool based [*ESMFold*](https://esmatlas.com/about) and the ESM-2 language model.')
50
 
51
  def generate_sequence_from_words(words, length):
52
  seed = ' '.join(words).encode('utf-8')
 
94
  st.write(f"**Organism Code:** {organism}")
95
  st.write(f"**Sequence Identity:** {identity_percentage:.2f}%")
96
 
 
97
  # Fetch protein function (if available)
98
  if hasattr(alignment, 'description') and alignment.description:
99
  st.write(f"**Potential Function:** {alignment.description}")
100
+ else:
101
  st.write("No significant matches found. This might be a unique protein sequence!")
102
  except Exception as e:
103
  st.error(f"An error occurred during protein analysis: {str(e)}")
 
138
  st.write("Please try again later or contact support if the issue persists.")
139
 
140
  def share_on_twitter(word1, word2, word3, length, plddt):
141
+ tweet_text = f"I just generated a new protein using #GenPro2 from the seed-words '{word1}', '{word2}', and '{word3}' + sequence length of {length}! It's plDDT Score: {plddt}%."
142
  tweet_url = f"https://twitter.com/intent/tweet?text={urllib.parse.quote(tweet_text)}"
143
  return tweet_url
144
 
 
150
  if 'structure_info' not in st.session_state:
151
  st.session_state.structure_info = None
152
 
153
+ # Main layout
154
+ st.subheader("Generate Sequence from Words")
155
+ col1, col2, col3 = st.columns(3)
156
+ with col1:
157
+ word1 = st.text_input("Word 1")
158
+ with col2:
159
+ word2 = st.text_input("Word 2")
160
+ with col3:
161
+ word3 = st.text_input("Word 3")
162
 
163
+ sequence_length = st.number_input("Sequence Length", min_value=50, max_value=400, value=100, step=10)
 
 
 
 
164
 
165
+ if st.button('Generate and Predict'):
166
  if word1 and word2 and word3:
167
  sequence = generate_sequence_from_words([word1, word2, word3], sequence_length)
168
  st.session_state.sequence = sequence
169
+ st.text_area("Generated Sequence", sequence, height=100)
170
+ st.info("Note: The same words and sequence length will always produce the same sequence.")
171
 
172
  with st.spinner("Predicting protein structure... This may take a few minutes."):
173
  update(sequence, word1, word2, word3, sequence_length)
174
  else:
175
+ st.warning("Please enter all three words to generate a sequence.")
176
 
177
  # Display structure information if available
178
  if st.session_state.structure_info:
 
181
  render_mol(info['pdb_string'])
182
 
183
  st.subheader('plDDT Confidence Score')
184
+ st.write('plDDT is a benchmark for scoring the confidence level in protein folding predictions based on a scale from 0-100%. 70% or more is good!')
185
  plddt_score = int(info["b_value"] * 100)
186
+ st.info(f'Your plDDT score is: {plddt_score}%')
187
 
188
  st.subheader("Share your unique protein on X(Twitter)")
189
 
190
  st.markdown("""
191
+ <div style='background-color: #262730; padding: 10px; border-radius: 5px; font-size: 0.8em;'>
192
  <ol>
193
  <li>Take a screenshot of the protein structure above.</li>
194
  <li>Click the 'Share on X' button below to open a pre-filled post with your protein seed-words and score.</li>
 
213
  **Remember, this folding is based on randomly generated sequences. Interpret the results with caution.
214
  Enjoy exploring the world of protein sequences!
215
  """)