File size: 3,092 Bytes
f8517eb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c3a875f
 
 
 
 
f8517eb
c3a875f
f8517eb
c3a875f
f8517eb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c3a875f
f8517eb
 
 
 
 
 
 
 
 
 
 
 
c3a875f
 
f8517eb
 
 
 
 
c3a875f
 
f8517eb
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import streamlit as st
import random
import hashlib
import py3Dmol
import requests
import io
from Bio import PDB

def generate_sequence_from_words(words, length):
    seed = ' '.join(words).encode('utf-8')
    random.seed(hashlib.md5(seed).hexdigest())
    amino_acids = "ACDEFGHIKLMNPQRSTVWY"
    return ''.join(random.choice(amino_acids) for _ in range(length))

def predict_structure(sequence):
    url = "https://api.esmatlas.com/foldSequence/v1/pdb/"
    headers = {"Content-Type": "application/x-www-form-urlencoded"}
    data = {"sequence": sequence}
    
    response = requests.post(url, headers=headers, data=data, timeout=300)
    if response.status_code == 200:
        return response.text
    else:
        st.error(f"Error in structure prediction: {response.status_code} - {response.text}")
        return None

def visualize_protein(pdb_string):
    view = py3Dmol.view(width=800, height=400)
    view.addModel(pdb_string, 'pdb')
    view.setStyle({'cartoon': {'color': 'spectrum'}})
    view.zoomTo()
    return view

st.title("Protein Sequence Generator and Structure Predictor")

st.write("Enter three random words to seed your protein sequence:")
word1 = st.text_input("Word 1")
word2 = st.text_input("Word 2")
word3 = st.text_input("Word 3")

sequence_length = st.number_input("Enter desired sequence length", 
                                  min_value=50, 
                                  max_value=400, 
                                  value=100, 
                                  step=10)

if st.button("Generate Sequence and Predict Structure"):
    if word1 and word2 and word3:
        words = [word1, word2, word3]
        sequence = generate_sequence_from_words(words, sequence_length)
        st.write(f"Generated sequence inspired by '{word1}', '{word2}', and '{word3}' with length '{sequence_length}':")
        st.code(sequence)

        st.header("Protein Structure Prediction")
        with st.spinner("Predicting protein structure... This may take a few minutes."):
            pdb_string = predict_structure(sequence)
            if pdb_string:
                view = visualize_protein(pdb_string)
                
                st_py3dmol = py3Dmol.show3d(view, width=800, height=400)
                st.components.v1.html(st_py3dmol.startjs, height=400)

                st.success("Structure prediction complete!")
                st.write("Note: This is a computational prediction and may not represent the actual biological structure.")
            else:
                st.error("Failed to predict structure. Please try again.")
    else:
        st.error("Please enter all three words.")

st.markdown("""
## What to do next:
1. Experiment with different seed words and sequence lengths.
2. Learn about how protein sequences relate to their predicted structures.
3. Remember that these are computational predictions and may not represent the actual biological structure.
4. For real protein structures, visit the [Protein Data Bank (PDB)](https://www.rcsb.org/).
Enjoy exploring the world of protein sequences and predicted structures!
""")