|
import streamlit as st |
|
from stmol import showmol |
|
import py3Dmol |
|
import requests |
|
import biotite.structure.io as bsio |
|
import random |
|
import hashlib |
|
import urllib3 |
|
|
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) |
|
|
|
st.set_page_config(layout='wide') |
|
st.sidebar.title('π ESMFold Protein Structure Predictor') |
|
st.sidebar.write('[*ESMFold*](https://esmatlas.com/about) is an end-to-end single sequence protein structure predictor based on the ESM-2 language model. For more information, read the [research article](https://www.biorxiv.org/content/10.1101/2022.07.20.500902v2) and the [news article](https://www.nature.com/articles/d41586-022-03539-1) published in *Nature*.') |
|
|
|
|
|
def generate_sequence_from_words(words, length): |
|
seed = ' '.join(words).encode('utf-8') |
|
random.seed(hashlib.md5(seed).hexdigest()) |
|
amino_acids = "ACDEFGHIKLMNPQRSTVWY" |
|
return ''.join(random.choice(amino_acids) for _ in range(length)) |
|
|
|
|
|
def render_mol(pdb): |
|
pdbview = py3Dmol.view() |
|
pdbview.addModel(pdb,'pdb') |
|
pdbview.setStyle({'cartoon':{'color':'spectrum'}}) |
|
pdbview.setBackgroundColor('white') |
|
pdbview.zoomTo() |
|
pdbview.zoom(2, 800) |
|
pdbview.spin(True) |
|
showmol(pdbview, height = 500,width=800) |
|
|
|
|
|
def update(sequence, word1, word2, word3, sequence_length): |
|
headers = { |
|
'Content-Type': 'application/x-www-form-urlencoded', |
|
} |
|
try: |
|
response = requests.post('https://api.esmatlas.com/foldSequence/v1/pdb/', |
|
headers=headers, |
|
data=sequence, |
|
verify=False, |
|
timeout=300) |
|
response.raise_for_status() |
|
pdb_string = response.content.decode('utf-8') |
|
|
|
with open('predicted.pdb', 'w') as f: |
|
f.write(pdb_string) |
|
|
|
struct = bsio.load_structure('predicted.pdb', extra_fields=["b_factor"]) |
|
b_value = round(struct.b_factor.mean(), 2) |
|
|
|
|
|
st.subheader(f'Predicted protein structure using seed: {word1}, {word2}, and {word3} + length ({sequence_length})') |
|
render_mol(pdb_string) |
|
|
|
|
|
st.subheader('plDDT Score') |
|
st.write('plDDT is a per-residue estimate of the confidence in prediction on a scale from 0-100.') |
|
st.info(f'Average plDDT: {b_value}%') |
|
|
|
st.download_button( |
|
label="Download PDB", |
|
data=pdb_string, |
|
file_name='predicted.pdb', |
|
mime='text/plain', |
|
) |
|
except requests.exceptions.RequestException as e: |
|
st.error(f"An error occurred while calling the API: {str(e)}") |
|
st.write("Please try again later or contact support if the issue persists.") |
|
|
|
|
|
st.title("Word-Seeded Protein Sequence Generator and Structure Predictor") |
|
|
|
|
|
st.sidebar.subheader("Generate Sequence from Words") |
|
word1 = st.sidebar.text_input("Word 1") |
|
word2 = st.sidebar.text_input("Word 2") |
|
word3 = st.sidebar.text_input("Word 3") |
|
sequence_length = st.sidebar.number_input("Sequence Length", min_value=50, max_value=400, value=100, step=10) |
|
|
|
|
|
if st.sidebar.button('Generate and Predict'): |
|
if word1 and word2 and word3: |
|
sequence = generate_sequence_from_words([word1, word2, word3], sequence_length) |
|
st.sidebar.text_area("Generated Sequence", sequence, height=100) |
|
st.sidebar.info("Note: The same words and length will always produce the same sequence.") |
|
|
|
with st.spinner("Predicting protein structure... This may take a few minutes."): |
|
update(sequence, word1, word2, word3, sequence_length) |
|
else: |
|
st.sidebar.warning("Please enter all three words to generate a sequence.") |
|
|
|
|
|
st.sidebar.markdown(""" |
|
## What to do next: |
|
1. Enter three words and a sequence length. |
|
2. Click 'Generate and Predict' to generate the sequence, visualize the protein, and get its plDDT score. |
|
3. Explore the 3D structure and download the PDB file if desired. |
|
4. Experiment with different words or sequence lengths to see how they affect the predicted structure. |
|
|
|
Remember, these predictions are based on AI models and should be interpreted with caution. |
|
""") |