Accelernate commited on
Commit
f8517eb
·
verified ·
1 Parent(s): 1dbd0a0

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -0
app.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import random
3
+ import hashlib
4
+ import py3Dmol
5
+ import requests
6
+ import io
7
+ from Bio import PDB
8
+
9
+ def generate_sequence_from_words(words, length):
10
+ seed = ' '.join(words).encode('utf-8')
11
+ random.seed(hashlib.md5(seed).hexdigest())
12
+ amino_acids = "ACDEFGHIKLMNPQRSTVWY"
13
+ return ''.join(random.choice(amino_acids) for _ in range(length))
14
+
15
+ def predict_structure(sequence):
16
+ url = "https://api.colabfold.com/batch"
17
+ data = {
18
+ "queries": [["query", sequence]],
19
+ "num_relax": 0,
20
+ "use_templates": False,
21
+ "num_models": 1
22
+ }
23
+ response = requests.post(url, json=data)
24
+ if response.status_code == 200:
25
+ return response.json()
26
+ else:
27
+ st.error(f"Error in structure prediction: {response.text}")
28
+ return None
29
+
30
+ def visualize_protein(pdb_string):
31
+ view = py3Dmol.view(width=800, height=400)
32
+ view.addModel(pdb_string, 'pdb')
33
+ view.setStyle({'cartoon': {'color': 'spectrum'}})
34
+ view.zoomTo()
35
+ return view
36
+
37
+ st.title("Protein Sequence Generator and Structure Predictor")
38
+
39
+ st.write("Enter three random words to seed your protein sequence:")
40
+ word1 = st.text_input("Word 1")
41
+ word2 = st.text_input("Word 2")
42
+ word3 = st.text_input("Word 3")
43
+
44
+ sequence_length = st.number_input("Enter desired sequence length",
45
+ min_value=50,
46
+ max_value=200,
47
+ value=100,
48
+ step=10)
49
+
50
+ if st.button("Generate Sequence and Predict Structure"):
51
+ if word1 and word2 and word3:
52
+ words = [word1, word2, word3]
53
+ sequence = generate_sequence_from_words(words, sequence_length)
54
+ st.write(f"Generated sequence inspired by '{word1}', '{word2}', and '{word3}' with length '{sequence_length}':")
55
+ st.code(sequence)
56
+
57
+ st.header("Protein Structure Prediction")
58
+ with st.spinner("Predicting protein structure... This may take a few minutes."):
59
+ prediction = predict_structure(sequence)
60
+ if prediction and 'pdb_string' in prediction[0]:
61
+ pdb_string = prediction[0]['pdb_string']
62
+ view = visualize_protein(pdb_string)
63
+
64
+ st_py3dmol = py3Dmol.show3d(view, width=800, height=400)
65
+ st.components.v1.html(st_py3dmol.startjs, height=400)
66
+
67
+ # Display confidence scores
68
+ plddt_scores = prediction[0].get('plddt', [])
69
+ if plddt_scores:
70
+ avg_plddt = sum(plddt_scores) / len(plddt_scores)
71
+ st.write(f"Average pLDDT score: {avg_plddt:.2f}")
72
+ st.write("pLDDT > 90: Very high confidence")
73
+ st.write("90 > pLDDT > 70: Confident")
74
+ st.write("70 > pLDDT > 50: Low confidence")
75
+ st.write("pLDDT < 50: Very low confidence")
76
+ else:
77
+ st.error("Failed to predict structure. Please try again.")
78
+ else:
79
+ st.error("Please enter all three words.")
80
+
81
+ st.markdown("""
82
+ ## What to do next:
83
+ 1. Experiment with different seed words and sequence lengths.
84
+ 2. Learn about how protein sequences relate to their predicted structures.
85
+ 3. Remember that these are computational predictions and may not represent the actual biological structure.
86
+ 4. For real protein structures, visit the [Protein Data Bank (PDB)](https://www.rcsb.org/).
87
+ Enjoy exploring the world of protein sequences and predicted structures!
88
+ """)