import streamlit as st from stmol import showmol import py3Dmol import requests import biotite.structure.io as bsio import random import hashlib import urllib3 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) st.set_page_config(layout='wide') st.sidebar.title('🔮 GenPro2 Protein Generator & Structure Predictor') st.sidebar.write('GenPro2 is an end-to-end single sequence protein generator and structure predictor based [*ESMFold*](https://esmatlas.com/about) and the ESM-2 language model.') # Function to generate protein sequence from random words def generate_sequence_from_words(words, length): seed = ' '.join(words).encode('utf-8') random.seed(hashlib.md5(seed).hexdigest()) amino_acids = "ACDEFGHIKLMNPQRSTVWY" return ''.join(random.choice(amino_acids) for _ in range(length)) # stmol def render_mol(pdb): pdbview = py3Dmol.view() pdbview.addModel(pdb,'pdb') pdbview.setStyle({'cartoon':{'color':'spectrum'}}) pdbview.setBackgroundColor('white') pdbview.zoomTo() pdbview.zoom(2, 800) pdbview.spin(True) showmol(pdbview, height = 500,width=800) # ESMfold def update(sequence, word1, word2, word3, sequence_length): headers = { 'Content-Type': 'application/x-www-form-urlencoded', } try: response = requests.post('https://api.esmatlas.com/foldSequence/v1/pdb/', headers=headers, data=sequence, verify=False, # Disable SSL verification timeout=300) # Set a longer timeout response.raise_for_status() # Raise an exception for bad status codes pdb_string = response.content.decode('utf-8') with open('predicted.pdb', 'w') as f: f.write(pdb_string) struct = bsio.load_structure('predicted.pdb', extra_fields=["b_factor"]) b_value = round(struct.b_factor.mean(), 2) # Display protein structure st.subheader(f'Predicted protein structure using seed: {word1}, {word2}, and {word3} + length {sequence_length}') render_mol(pdb_string) # plDDT value is stored in the B-factor field st.subheader('plDDT Score') st.write('plDDT is a per-residue estimate of the confidence in prediction on a scale from 0-100%.') st.info(f'Average plDDT: {int(b_value * 100)}%') st.download_button( label="Download PDB", data=pdb_string, file_name='predicted.pdb', mime='text/plain', ) except requests.exceptions.RequestException as e: st.error(f"An error occurred while calling the API: {str(e)}") st.write("Please try again later or contact support if the issue persists.") # Streamlit app st.title("Word-Seeded Protein Sequence Generator and Structure Predictor") # Input for word-seeded sequence generation st.sidebar.subheader("Generate Sequence from Words") word1 = st.sidebar.text_input("Word 1") word2 = st.sidebar.text_input("Word 2") word3 = st.sidebar.text_input("Word 3") sequence_length = st.sidebar.number_input("Sequence Length", min_value=50, max_value=400, value=100, step=10) # Generate and predict button if st.sidebar.button('Generate and Predict'): if word1 and word2 and word3: sequence = generate_sequence_from_words([word1, word2, word3], sequence_length) st.sidebar.text_area("Generated Sequence", sequence, height=100) st.sidebar.info("Note: The same words and sequence length will always produce the same sequence.") with st.spinner("Predicting protein structure... This may take a few minutes."): update(sequence, word1, word2, word3, sequence_length) else: st.sidebar.warning("Please enter all three words to generate a sequence.") # Information display st.sidebar.markdown(""" ## What to do next: If you find interesting results from the sequence folding, you can explore further: 1. Learn more about protein structures and sequences. 2. Visit the [Protein Data Bank (PDB)](https://www.rcsb.org/) for known protein structures. 3. Compare your folded structure with known functional proteins by downloading your results. 4. Read about similar proteins to gain insights into potential functions. **Remember, this folding is based on randomly generated sequences. Interpret the results with caution. Enjoy exploring the world of protein sequences! Share your high-confidence protein images with us on X [*@WandsAI*](https://x.com/wandsai)! """)