Update app.py
Browse files
app.py
CHANGED
@@ -15,8 +15,8 @@ import urllib.parse
|
|
15 |
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
16 |
|
17 |
st.set_page_config(layout='wide')
|
18 |
-
st.sidebar.title('🔮 GenPro2 Protein Generator
|
19 |
-
st.sidebar.write('GenPro2 is an end-to-end
|
20 |
|
21 |
def generate_sequence_from_words(words, length):
|
22 |
seed = ' '.join(words).encode('utf-8')
|
@@ -36,7 +36,7 @@ def render_mol(pdb):
|
|
36 |
|
37 |
def perform_blast_analysis(sequence):
|
38 |
st.subheader('Protein Analysis')
|
39 |
-
with st.spinner("Analyzing generated protein... This may take a
|
40 |
progress_bar = st.progress(0)
|
41 |
for i in range(100):
|
42 |
progress_bar.progress(i + 1)
|
@@ -61,18 +61,14 @@ def perform_blast_analysis(sequence):
|
|
61 |
identity_percentage = (hsp.identities / alignment.length) * 100
|
62 |
|
63 |
st.write(f"**Top Match:** {protein_name}")
|
64 |
-
st.write(f"**Organism:** {organism}")
|
65 |
st.write(f"**Sequence Identity:** {identity_percentage:.2f}%")
|
66 |
-
|
67 |
|
68 |
# Fetch protein function (if available)
|
69 |
if hasattr(alignment, 'description') and alignment.description:
|
70 |
st.write(f"**Potential Function:** {alignment.description}")
|
71 |
|
72 |
-
# Link to BLAST results
|
73 |
-
blast_link = f"https://blast.ncbi.nlm.nih.gov/Blast.cgi?PROGRAM=blastp&PAGE_TYPE=BlastSearch&LINK_LOC=blasthome"
|
74 |
-
st.markdown(f"[View full BLAST results (may require re-running the search)]({blast_link})")
|
75 |
-
else:
|
76 |
st.write("No significant matches found. This might be a unique protein sequence!")
|
77 |
except Exception as e:
|
78 |
st.error(f"An error occurred during protein analysis: {str(e)}")
|
@@ -113,7 +109,7 @@ def update(sequence, word1, word2, word3, sequence_length):
|
|
113 |
st.write("Please try again later or contact support if the issue persists.")
|
114 |
|
115 |
def share_on_twitter(word1, word2, word3, length, plddt):
|
116 |
-
tweet_text = f"I generated a new protein using GenPro2
|
117 |
tweet_url = f"https://twitter.com/intent/tweet?text={urllib.parse.quote(tweet_text)}"
|
118 |
return tweet_url
|
119 |
|
@@ -151,8 +147,8 @@ if st.session_state.structure_info:
|
|
151 |
st.subheader(f'Predicted protein structure using seed: {info["word1"]}, {info["word2"]}, and {info["word3"]} + length {info["sequence_length"]}')
|
152 |
render_mol(info['pdb_string'])
|
153 |
|
154 |
-
st.subheader('plDDT Score')
|
155 |
-
st.write('plDDT is
|
156 |
plddt_score = int(info["b_value"] * 100)
|
157 |
st.info(f'Average plDDT: {plddt_score}%')
|
158 |
|
@@ -162,32 +158,27 @@ if st.session_state.structure_info:
|
|
162 |
<div style='background-color: #e6f2ff; padding: 10px; border-radius: 5px; font-size: 0.8em;'>
|
163 |
<ol>
|
164 |
<li>Take a screenshot of the protein structure above.</li>
|
165 |
-
<li>Click the 'Share on X' button below to open a pre-filled
|
166 |
-
<li>Attach your screenshot to
|
167 |
</ol>
|
168 |
</div>
|
169 |
""", unsafe_allow_html=True)
|
170 |
|
171 |
-
st.write("1. Take a screenshot of the protein structure above.")
|
172 |
-
st.write("2. Click the 'Share Results' button below to open a pre-filled tweet.")
|
173 |
-
st.write("3. Attach your protein screenshot to the post.")
|
174 |
-
|
175 |
tweet_url = share_on_twitter(info["word1"], info["word2"], info["word3"], info["sequence_length"], plddt_score)
|
176 |
st.markdown(f"[Share Results]({tweet_url})")
|
177 |
|
178 |
st.markdown("""
|
179 |
-
##
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
5. Click the "Analyze Protein" button to get more information about your generated protein.
|
186 |
**Remember, this folding is based on randomly generated sequences. Interpret the results with caution.
|
187 |
-
Enjoy exploring the world of protein sequences!
|
188 |
""")
|
189 |
|
190 |
-
|
191 |
col1, col2 = st.columns(2)
|
192 |
with col1:
|
193 |
if st.button('Analyze Protein'):
|
@@ -200,14 +191,4 @@ if st.session_state.structure_info:
|
|
200 |
file_name='predicted.pdb',
|
201 |
mime='text/plain',
|
202 |
)
|
203 |
-
|
204 |
-
## What to do next:
|
205 |
-
If you find interesting results from the sequence folding, you can explore further:
|
206 |
-
1. Learn more about protein structures and sequences.
|
207 |
-
2. Visit the [Protein Data Bank (PDB)](https://www.rcsb.org/) for known protein structures.
|
208 |
-
3. Compare your folded structure with known functional proteins by downloading your results.
|
209 |
-
4. Read about similar proteins to gain insights into potential functions.
|
210 |
-
5. Click the "Analyze Protein" button to get more information about your generated protein.
|
211 |
-
**Remember, this folding is based on randomly generated sequences. Interpret the results with caution.
|
212 |
-
Enjoy exploring the world of protein sequences! Share your high-confidence protein images with us on X [*@WandsAI*](https://x.com/wandsai)!
|
213 |
-
""")
|
|
|
15 |
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
16 |
|
17 |
st.set_page_config(layout='wide')
|
18 |
+
st.sidebar.title('🔮 GenPro2 Protein Generator, Structure Predictor, and Analysis Tool')
|
19 |
+
st.sidebar.write('GenPro2 is an end-to-end sequence protein generator, structure predictor, analysis tool based [*ESMFold*](https://esmatlas.com/about), the ESM-2 language model, and known proteins.')
|
20 |
|
21 |
def generate_sequence_from_words(words, length):
|
22 |
seed = ' '.join(words).encode('utf-8')
|
|
|
36 |
|
37 |
def perform_blast_analysis(sequence):
|
38 |
st.subheader('Protein Analysis')
|
39 |
+
with st.spinner("Analyzing generated protein... This may take a several minutes. Stay tuned!"):
|
40 |
progress_bar = st.progress(0)
|
41 |
for i in range(100):
|
42 |
progress_bar.progress(i + 1)
|
|
|
61 |
identity_percentage = (hsp.identities / alignment.length) * 100
|
62 |
|
63 |
st.write(f"**Top Match:** {protein_name}")
|
64 |
+
st.write(f"**Organism Code:** {organism}")
|
65 |
st.write(f"**Sequence Identity:** {identity_percentage:.2f}%")
|
66 |
+
|
67 |
|
68 |
# Fetch protein function (if available)
|
69 |
if hasattr(alignment, 'description') and alignment.description:
|
70 |
st.write(f"**Potential Function:** {alignment.description}")
|
71 |
|
|
|
|
|
|
|
|
|
72 |
st.write("No significant matches found. This might be a unique protein sequence!")
|
73 |
except Exception as e:
|
74 |
st.error(f"An error occurred during protein analysis: {str(e)}")
|
|
|
109 |
st.write("Please try again later or contact support if the issue persists.")
|
110 |
|
111 |
def share_on_twitter(word1, word2, word3, length, plddt):
|
112 |
+
tweet_text = f"I just generated a new protein using #GenPro2 from the seed-words '{word1}', '{word2}', and '{word3}' + sequence length {length}! It's Predictive Protein Score is: {plddt}%. -- made by @WandAI"
|
113 |
tweet_url = f"https://twitter.com/intent/tweet?text={urllib.parse.quote(tweet_text)}"
|
114 |
return tweet_url
|
115 |
|
|
|
147 |
st.subheader(f'Predicted protein structure using seed: {info["word1"]}, {info["word2"]}, and {info["word3"]} + length {info["sequence_length"]}')
|
148 |
render_mol(info['pdb_string'])
|
149 |
|
150 |
+
st.subheader('plDDT Confidence Score')
|
151 |
+
st.write('plDDT is bench mark for scoring the confidence in prediction on a scale from 0-100%. 70% or more is really good!')
|
152 |
plddt_score = int(info["b_value"] * 100)
|
153 |
st.info(f'Average plDDT: {plddt_score}%')
|
154 |
|
|
|
158 |
<div style='background-color: #e6f2ff; padding: 10px; border-radius: 5px; font-size: 0.8em;'>
|
159 |
<ol>
|
160 |
<li>Take a screenshot of the protein structure above.</li>
|
161 |
+
<li>Click the 'Share on X' button below to open a pre-filled protein seed-words and score.</li>
|
162 |
+
<li>Attach your screenshot to your post before posting.</li>
|
163 |
</ol>
|
164 |
</div>
|
165 |
""", unsafe_allow_html=True)
|
166 |
|
|
|
|
|
|
|
|
|
167 |
tweet_url = share_on_twitter(info["word1"], info["word2"], info["word3"], info["sequence_length"], plddt_score)
|
168 |
st.markdown(f"[Share Results]({tweet_url})")
|
169 |
|
170 |
st.markdown("""
|
171 |
+
## Think might have discovered a useful and unique protein? Here is what to do next:
|
172 |
+
1. Analyze your protein using (BLAST)](https://blast.ncbi.nlm.nih.gov/Blast.cgi?PROGRAM=blastp&PAGE_TYPE=BlastSearch&LINK_LOC=blasthome).
|
173 |
+
2. Download protein data and visit the [Protein Data Bank (PDB)](https://www.rcsb.org/) for known protein structures.
|
174 |
+
3. Compare your folded protein structure and data with known functional proteins.
|
175 |
+
|
176 |
+
|
|
|
177 |
**Remember, this folding is based on randomly generated sequences. Interpret the results with caution.
|
178 |
+
Enjoy exploring the world of protein sequences!
|
179 |
""")
|
180 |
|
181 |
+
|
182 |
col1, col2 = st.columns(2)
|
183 |
with col1:
|
184 |
if st.button('Analyze Protein'):
|
|
|
191 |
file_name='predicted.pdb',
|
192 |
mime='text/plain',
|
193 |
)
|
194 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|