Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import random
|
3 |
+
import hashlib
|
4 |
+
import py3Dmol
|
5 |
+
import requests
|
6 |
+
import io
|
7 |
+
from Bio import PDB
|
8 |
+
|
9 |
+
def generate_sequence_from_words(words, length):
|
10 |
+
seed = ' '.join(words).encode('utf-8')
|
11 |
+
random.seed(hashlib.md5(seed).hexdigest())
|
12 |
+
amino_acids = "ACDEFGHIKLMNPQRSTVWY"
|
13 |
+
return ''.join(random.choice(amino_acids) for _ in range(length))
|
14 |
+
|
15 |
+
def predict_structure(sequence):
|
16 |
+
url = "https://api.colabfold.com/batch"
|
17 |
+
data = {
|
18 |
+
"queries": [["query", sequence]],
|
19 |
+
"num_relax": 0,
|
20 |
+
"use_templates": False,
|
21 |
+
"num_models": 1
|
22 |
+
}
|
23 |
+
response = requests.post(url, json=data)
|
24 |
+
if response.status_code == 200:
|
25 |
+
return response.json()
|
26 |
+
else:
|
27 |
+
st.error(f"Error in structure prediction: {response.text}")
|
28 |
+
return None
|
29 |
+
|
30 |
+
def visualize_protein(pdb_string):
|
31 |
+
view = py3Dmol.view(width=800, height=400)
|
32 |
+
view.addModel(pdb_string, 'pdb')
|
33 |
+
view.setStyle({'cartoon': {'color': 'spectrum'}})
|
34 |
+
view.zoomTo()
|
35 |
+
return view
|
36 |
+
|
37 |
+
st.title("Protein Sequence Generator and Structure Predictor")
|
38 |
+
|
39 |
+
st.write("Enter three random words to seed your protein sequence:")
|
40 |
+
word1 = st.text_input("Word 1")
|
41 |
+
word2 = st.text_input("Word 2")
|
42 |
+
word3 = st.text_input("Word 3")
|
43 |
+
|
44 |
+
sequence_length = st.number_input("Enter desired sequence length",
|
45 |
+
min_value=50,
|
46 |
+
max_value=200,
|
47 |
+
value=100,
|
48 |
+
step=10)
|
49 |
+
|
50 |
+
if st.button("Generate Sequence and Predict Structure"):
|
51 |
+
if word1 and word2 and word3:
|
52 |
+
words = [word1, word2, word3]
|
53 |
+
sequence = generate_sequence_from_words(words, sequence_length)
|
54 |
+
st.write(f"Generated sequence inspired by '{word1}', '{word2}', and '{word3}' with length '{sequence_length}':")
|
55 |
+
st.code(sequence)
|
56 |
+
|
57 |
+
st.header("Protein Structure Prediction")
|
58 |
+
with st.spinner("Predicting protein structure... This may take a few minutes."):
|
59 |
+
prediction = predict_structure(sequence)
|
60 |
+
if prediction and 'pdb_string' in prediction[0]:
|
61 |
+
pdb_string = prediction[0]['pdb_string']
|
62 |
+
view = visualize_protein(pdb_string)
|
63 |
+
|
64 |
+
st_py3dmol = py3Dmol.show3d(view, width=800, height=400)
|
65 |
+
st.components.v1.html(st_py3dmol.startjs, height=400)
|
66 |
+
|
67 |
+
# Display confidence scores
|
68 |
+
plddt_scores = prediction[0].get('plddt', [])
|
69 |
+
if plddt_scores:
|
70 |
+
avg_plddt = sum(plddt_scores) / len(plddt_scores)
|
71 |
+
st.write(f"Average pLDDT score: {avg_plddt:.2f}")
|
72 |
+
st.write("pLDDT > 90: Very high confidence")
|
73 |
+
st.write("90 > pLDDT > 70: Confident")
|
74 |
+
st.write("70 > pLDDT > 50: Low confidence")
|
75 |
+
st.write("pLDDT < 50: Very low confidence")
|
76 |
+
else:
|
77 |
+
st.error("Failed to predict structure. Please try again.")
|
78 |
+
else:
|
79 |
+
st.error("Please enter all three words.")
|
80 |
+
|
81 |
+
st.markdown("""
|
82 |
+
## What to do next:
|
83 |
+
1. Experiment with different seed words and sequence lengths.
|
84 |
+
2. Learn about how protein sequences relate to their predicted structures.
|
85 |
+
3. Remember that these are computational predictions and may not represent the actual biological structure.
|
86 |
+
4. For real protein structures, visit the [Protein Data Bank (PDB)](https://www.rcsb.org/).
|
87 |
+
Enjoy exploring the world of protein sequences and predicted structures!
|
88 |
+
""")
|