Accelernate commited on
Commit
3af0361
·
verified ·
1 Parent(s): 5708f34

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -38
app.py CHANGED
@@ -17,14 +17,12 @@ st.set_page_config(layout='wide')
17
  st.sidebar.title('🔮 GenPro2 Protein Generator & Structure Predictor')
18
  st.sidebar.write('GenPro2 is an end-to-end single sequence protein generator and structure predictor based [*ESMFold*](https://esmatlas.com/about) and the ESM-2 language model.')
19
 
20
- # Function to generate protein sequence from random words
21
  def generate_sequence_from_words(words, length):
22
  seed = ' '.join(words).encode('utf-8')
23
  random.seed(hashlib.md5(seed).hexdigest())
24
  amino_acids = "ACDEFGHIKLMNPQRSTVWY"
25
  return ''.join(random.choice(amino_acids) for _ in range(length))
26
 
27
- # stmol
28
  def render_mol(pdb):
29
  pdbview = py3Dmol.view()
30
  pdbview.addModel(pdb,'pdb')
@@ -35,7 +33,6 @@ def render_mol(pdb):
35
  pdbview.spin(True)
36
  showmol(pdbview, height = 500,width=800)
37
 
38
- # BLAST analysis function
39
  def perform_blast_analysis(sequence):
40
  st.subheader('Protein Analysis')
41
  with st.spinner("Analyzing generated protein... This may take a few minutes."):
@@ -53,17 +50,13 @@ def perform_blast_analysis(sequence):
53
  if blast_record.alignments:
54
  alignment = blast_record.alignments[0] # Get the top hit
55
 
56
- # Extract organism
57
  organism = alignment.title.split('OS=')[-1].split('OX=')[0].strip()
58
-
59
- # Simplify organism name if it's too complex
60
  organism = organism.split()[0] if len(organism.split()) > 1 else organism
61
 
62
  st.write(f"**Estimated Organism:** This protein sequence shares similarities with proteins found in {organism}.")
63
 
64
- # Fetch protein function (if available)
65
  if hasattr(alignment, 'description') and alignment.description:
66
- function = alignment.description.split('[')[0].strip() # Remove organism info in brackets
67
  st.write(f"**Potential Function:** This protein might be involved in {function.lower()}.")
68
  else:
69
  st.write("**Potential Function:** Unable to determine a specific function for this protein sequence.")
@@ -74,7 +67,6 @@ def perform_blast_analysis(sequence):
74
  except Exception as e:
75
  st.error("An error occurred during protein analysis. Please try again later.")
76
 
77
- # ESMfold
78
  def update(sequence, word1, word2, word3, sequence_length):
79
  headers = {
80
  'Content-Type': 'application/x-www-form-urlencoded',
@@ -83,9 +75,9 @@ def update(sequence, word1, word2, word3, sequence_length):
83
  response = requests.post('https://api.esmatlas.com/foldSequence/v1/pdb/',
84
  headers=headers,
85
  data=sequence,
86
- verify=False, # Disable SSL verification
87
- timeout=300) # Set a longer timeout
88
- response.raise_for_status() # Raise an exception for bad status codes
89
  pdb_string = response.content.decode('utf-8')
90
 
91
  with open('predicted.pdb', 'w') as f:
@@ -94,11 +86,9 @@ def update(sequence, word1, word2, word3, sequence_length):
94
  struct = bsio.load_structure('predicted.pdb', extra_fields=["b_factor"])
95
  b_value = round(struct.b_factor.mean(), 2)
96
 
97
- # Display protein structure
98
  st.subheader(f'Predicted protein structure using seed: {word1}, {word2}, and {word3} + length {sequence_length}')
99
  render_mol(pdb_string)
100
 
101
- # plDDT value is stored in the B-factor field
102
  st.subheader('plDDT Score')
103
  st.write('plDDT is a per-residue estimate of the confidence in prediction on a scale from 0-100%.')
104
  st.info(f'Average plDDT: {int(b_value * 100)}%')
@@ -110,48 +100,41 @@ def update(sequence, word1, word2, word3, sequence_length):
110
  mime='text/plain',
111
  )
112
 
113
- # Perform BLAST analysis
114
- perform_blast_analysis(sequence)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  except requests.exceptions.RequestException as e:
116
  st.error(f"An error occurred while calling the API: {str(e)}")
117
  st.write("Please try again later or contact support if the issue persists.")
118
 
119
- # Streamlit app
120
  st.title("Word-Seeded Protein Sequence Generator and Structure Predictor")
121
 
122
- # Input for word-seeded sequence generation
123
  st.sidebar.subheader("Generate Sequence from Words")
124
  word1 = st.sidebar.text_input("Word 1")
125
  word2 = st.sidebar.text_input("Word 2")
126
  word3 = st.sidebar.text_input("Word 3")
127
  sequence_length = st.sidebar.number_input("Sequence Length", min_value=50, max_value=400, value=100, step=10)
128
 
129
- # Generate and predict button
130
  if st.sidebar.button('Generate and Predict'):
131
  if word1 and word2 and word3:
132
  sequence = generate_sequence_from_words([word1, word2, word3], sequence_length)
133
- st.session_state.sequence = sequence # Store the sequence in session state
134
  st.sidebar.text_area("Generated Sequence", sequence, height=100)
135
  st.sidebar.info("Note: The same words and sequence length will always produce the same sequence.")
136
 
137
  with st.spinner("Predicting protein structure... This may take a few minutes."):
138
  update(sequence, word1, word2, word3, sequence_length)
139
-
140
- # Add Analyze Protein button
141
- if st.button('Analyze Protein'):
142
- perform_blast_analysis(st.session_state.sequence)
143
  else:
144
- st.sidebar.warning("Please enter all three words to generate a sequence.")
145
-
146
-
147
- # Information display
148
- st.sidebar.markdown("""
149
- ## What to do next:
150
- If you find interesting results from the sequence folding, you can explore further:
151
- 1. Learn more about protein structures and sequences.
152
- 2. Visit the [Protein Data Bank (PDB)](https://www.rcsb.org/) for known protein structures.
153
- 3. Compare your folded structure with known functional proteins by downloading your results.
154
- 4. Read about similar proteins to gain insights into potential functions.
155
- **Remember, this folding is based on randomly generated sequences. Interpret the results with caution.
156
- Enjoy exploring the world of protein sequences! Share your high-confidence protein images with us on X [*@WandsAI*](https://x.com/wandsai)!
157
- """)
 
17
  st.sidebar.title('🔮 GenPro2 Protein Generator & Structure Predictor')
18
  st.sidebar.write('GenPro2 is an end-to-end single sequence protein generator and structure predictor based [*ESMFold*](https://esmatlas.com/about) and the ESM-2 language model.')
19
 
 
20
  def generate_sequence_from_words(words, length):
21
  seed = ' '.join(words).encode('utf-8')
22
  random.seed(hashlib.md5(seed).hexdigest())
23
  amino_acids = "ACDEFGHIKLMNPQRSTVWY"
24
  return ''.join(random.choice(amino_acids) for _ in range(length))
25
 
 
26
  def render_mol(pdb):
27
  pdbview = py3Dmol.view()
28
  pdbview.addModel(pdb,'pdb')
 
33
  pdbview.spin(True)
34
  showmol(pdbview, height = 500,width=800)
35
 
 
36
  def perform_blast_analysis(sequence):
37
  st.subheader('Protein Analysis')
38
  with st.spinner("Analyzing generated protein... This may take a few minutes."):
 
50
  if blast_record.alignments:
51
  alignment = blast_record.alignments[0] # Get the top hit
52
 
 
53
  organism = alignment.title.split('OS=')[-1].split('OX=')[0].strip()
 
 
54
  organism = organism.split()[0] if len(organism.split()) > 1 else organism
55
 
56
  st.write(f"**Estimated Organism:** This protein sequence shares similarities with proteins found in {organism}.")
57
 
 
58
  if hasattr(alignment, 'description') and alignment.description:
59
+ function = alignment.description.split('[')[0].strip()
60
  st.write(f"**Potential Function:** This protein might be involved in {function.lower()}.")
61
  else:
62
  st.write("**Potential Function:** Unable to determine a specific function for this protein sequence.")
 
67
  except Exception as e:
68
  st.error("An error occurred during protein analysis. Please try again later.")
69
 
 
70
  def update(sequence, word1, word2, word3, sequence_length):
71
  headers = {
72
  'Content-Type': 'application/x-www-form-urlencoded',
 
75
  response = requests.post('https://api.esmatlas.com/foldSequence/v1/pdb/',
76
  headers=headers,
77
  data=sequence,
78
+ verify=False,
79
+ timeout=300)
80
+ response.raise_for_status()
81
  pdb_string = response.content.decode('utf-8')
82
 
83
  with open('predicted.pdb', 'w') as f:
 
86
  struct = bsio.load_structure('predicted.pdb', extra_fields=["b_factor"])
87
  b_value = round(struct.b_factor.mean(), 2)
88
 
 
89
  st.subheader(f'Predicted protein structure using seed: {word1}, {word2}, and {word3} + length {sequence_length}')
90
  render_mol(pdb_string)
91
 
 
92
  st.subheader('plDDT Score')
93
  st.write('plDDT is a per-residue estimate of the confidence in prediction on a scale from 0-100%.')
94
  st.info(f'Average plDDT: {int(b_value * 100)}%')
 
100
  mime='text/plain',
101
  )
102
 
103
+ st.markdown("""
104
+ ## What to do next:
105
+ If you find interesting results from the sequence folding, you can explore further:
106
+ 1. Learn more about protein structures and sequences.
107
+ 2. Visit the [Protein Data Bank (PDB)](https://www.rcsb.org/) for known protein structures.
108
+ 3. Compare your folded structure with known functional proteins by downloading your results.
109
+ 4. Read about similar proteins to gain insights into potential functions.
110
+ 5. Click the "Analyze Protein" button below to get more information about your generated protein.
111
+
112
+ **Remember, this folding is based on randomly generated sequences. Interpret the results with caution.
113
+ Enjoy exploring the world of protein sequences! Share your high-confidence protein images with us on X [*@WandsAI*](https://x.com/wandsai)!
114
+ """)
115
+
116
+ if st.button('Analyze Protein'):
117
+ perform_blast_analysis(sequence)
118
+
119
  except requests.exceptions.RequestException as e:
120
  st.error(f"An error occurred while calling the API: {str(e)}")
121
  st.write("Please try again later or contact support if the issue persists.")
122
 
 
123
  st.title("Word-Seeded Protein Sequence Generator and Structure Predictor")
124
 
 
125
  st.sidebar.subheader("Generate Sequence from Words")
126
  word1 = st.sidebar.text_input("Word 1")
127
  word2 = st.sidebar.text_input("Word 2")
128
  word3 = st.sidebar.text_input("Word 3")
129
  sequence_length = st.sidebar.number_input("Sequence Length", min_value=50, max_value=400, value=100, step=10)
130
 
 
131
  if st.sidebar.button('Generate and Predict'):
132
  if word1 and word2 and word3:
133
  sequence = generate_sequence_from_words([word1, word2, word3], sequence_length)
 
134
  st.sidebar.text_area("Generated Sequence", sequence, height=100)
135
  st.sidebar.info("Note: The same words and sequence length will always produce the same sequence.")
136
 
137
  with st.spinner("Predicting protein structure... This may take a few minutes."):
138
  update(sequence, word1, word2, word3, sequence_length)
 
 
 
 
139
  else:
140
+ st.sidebar.warning("Please enter all three words to generate a sequence.")