Accelernate commited on
Commit
9512b53
·
verified ·
1 Parent(s): af9e04c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -40
app.py CHANGED
@@ -49,27 +49,33 @@ def perform_blast_analysis(sequence):
49
 
50
  if blast_record.alignments:
51
  alignment = blast_record.alignments[0] # Get the top hit
 
52
 
53
- organism = alignment.title.split('OS=')[-1].split('OX=')[0].strip()
54
- organism = organism.split()[0] if len(organism.split()) > 1 else organism
 
 
55
 
56
- st.write(f"**Estimated Organism:** This protein sequence shares similarities with proteins found in {organism}.")
 
57
 
 
 
 
 
 
 
58
  if hasattr(alignment, 'description') and alignment.description:
59
- function = alignment.description.split('[')[0].strip()
60
- st.write(f"**Potential Function:** This protein might be involved in {function.lower()}.")
61
- else:
62
- st.write("**Potential Function:** Unable to determine a specific function for this protein sequence.")
63
 
64
- st.markdown("[Learn more about protein functions](https://www.nature.com/scitable/topicpage/protein-function-14123348/)")
 
 
65
  else:
66
- st.write("No close matches found. This might be a unique protein sequence!")
67
  except Exception as e:
68
- st.error("An error occurred during protein analysis. Please try again later.")
69
-
70
- import streamlit as st
71
-
72
- # ... (keep all previous imports and functions)
73
 
74
  def update(sequence, word1, word2, word3, sequence_length):
75
  headers = {
@@ -90,33 +96,15 @@ def update(sequence, word1, word2, word3, sequence_length):
90
  struct = bsio.load_structure('predicted.pdb', extra_fields=["b_factor"])
91
  b_value = round(struct.b_factor.mean(), 2)
92
 
93
- st.subheader(f'Predicted protein structure using seed: {word1}, {word2}, and {word3} + length {sequence_length}')
94
- render_mol(pdb_string)
95
-
96
- st.subheader('plDDT Score')
97
- st.write('plDDT is a per-residue estimate of the confidence in prediction on a scale from 0-100%.')
98
- st.info(f'Average plDDT: {int(b_value * 100)}%')
 
 
99
 
100
- st.download_button(
101
- label="Download PDB",
102
- data=pdb_string,
103
- file_name='predicted.pdb',
104
- mime='text/plain',
105
- )
106
-
107
- st.markdown("""
108
- ## What to do next:
109
- If you find interesting results from the sequence folding, you can explore further:
110
- 1. Learn more about protein structures and sequences.
111
- 2. Visit the [Protein Data Bank (PDB)](https://www.rcsb.org/) for known protein structures.
112
- 3. Compare your folded structure with known functional proteins by downloading your results.
113
- 4. Read about similar proteins to gain insights into potential functions.
114
- 5. Click the "Analyze Protein" button below to get more information about your generated protein.
115
-
116
- **Remember, this folding is based on randomly generated sequences. Interpret the results with caution.
117
- Enjoy exploring the world of protein sequences! Share your high-confidence protein images with us on X [*@WandsAI*](https://x.com/wandsai)!
118
- """)
119
-
120
  st.session_state.show_analyze_button = True
121
 
122
  except requests.exceptions.RequestException as e:
@@ -128,6 +116,8 @@ if 'sequence' not in st.session_state:
128
  st.session_state.sequence = None
129
  if 'show_analyze_button' not in st.session_state:
130
  st.session_state.show_analyze_button = False
 
 
131
 
132
  st.title("Word-Seeded Protein Sequence Generator and Structure Predictor")
133
 
@@ -149,6 +139,36 @@ if st.sidebar.button('Generate and Predict'):
149
  else:
150
  st.sidebar.warning("Please enter all three words to generate a sequence.")
151
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  # Show the Analyze Protein button if a sequence has been generated
153
  if st.session_state.show_analyze_button:
154
  if st.button('Analyze Protein'):
 
49
 
50
  if blast_record.alignments:
51
  alignment = blast_record.alignments[0] # Get the top hit
52
+ hsp = alignment.hsps[0] # Get the first (best) HSP
53
 
54
+ # Extract protein name and organism
55
+ title_parts = alignment.title.split('|')
56
+ protein_name = title_parts[-1].strip()
57
+ organism = title_parts[-2].split('OS=')[-1].split('OX=')[0].strip()
58
 
59
+ # Calculate identity percentage
60
+ identity_percentage = (hsp.identities / alignment.length) * 100
61
 
62
+ st.write(f"**Top Match:** {protein_name}")
63
+ st.write(f"**Organism:** {organism}")
64
+ st.write(f"**Sequence Identity:** {identity_percentage:.2f}%")
65
+ st.write(f"**E-value:** {hsp.expect:.2e}")
66
+
67
+ # Fetch protein function (if available)
68
  if hasattr(alignment, 'description') and alignment.description:
69
+ st.write(f"**Potential Function:** {alignment.description}")
 
 
 
70
 
71
+ # Link to BLAST results
72
+ blast_link = f"https://blast.ncbi.nlm.nih.gov/Blast.cgi?PROGRAM=blastp&PAGE_TYPE=BlastSearch&LINK_LOC=blasthome"
73
+ st.markdown(f"[View full BLAST results (may require re-running the search)]({blast_link})")
74
  else:
75
+ st.write("No significant matches found. This might be a unique protein sequence!")
76
  except Exception as e:
77
+ st.error(f"An error occurred during protein analysis: {str(e)}")
78
+ st.write("Please try again later or contact support if the issue persists.")
 
 
 
79
 
80
  def update(sequence, word1, word2, word3, sequence_length):
81
  headers = {
 
96
  struct = bsio.load_structure('predicted.pdb', extra_fields=["b_factor"])
97
  b_value = round(struct.b_factor.mean(), 2)
98
 
99
+ st.session_state.structure_info = {
100
+ 'pdb_string': pdb_string,
101
+ 'b_value': b_value,
102
+ 'word1': word1,
103
+ 'word2': word2,
104
+ 'word3': word3,
105
+ 'sequence_length': sequence_length
106
+ }
107
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  st.session_state.show_analyze_button = True
109
 
110
  except requests.exceptions.RequestException as e:
 
116
  st.session_state.sequence = None
117
  if 'show_analyze_button' not in st.session_state:
118
  st.session_state.show_analyze_button = False
119
+ if 'structure_info' not in st.session_state:
120
+ st.session_state.structure_info = None
121
 
122
  st.title("Word-Seeded Protein Sequence Generator and Structure Predictor")
123
 
 
139
  else:
140
  st.sidebar.warning("Please enter all three words to generate a sequence.")
141
 
142
+ # Display structure information if available
143
+ if st.session_state.structure_info:
144
+ info = st.session_state.structure_info
145
+ st.subheader(f'Predicted protein structure using seed: {info["word1"]}, {info["word2"]}, and {info["word3"]} + length {info["sequence_length"]}')
146
+ render_mol(info['pdb_string'])
147
+
148
+ st.subheader('plDDT Score')
149
+ st.write('plDDT is a per-residue estimate of the confidence in prediction on a scale from 0-100%.')
150
+ st.info(f'Average plDDT: {int(info["b_value"] * 100)}%')
151
+
152
+ st.download_button(
153
+ label="Download PDB",
154
+ data=info['pdb_string'],
155
+ file_name='predicted.pdb',
156
+ mime='text/plain',
157
+ )
158
+
159
+ st.markdown("""
160
+ ## What to do next:
161
+ If you find interesting results from the sequence folding, you can explore further:
162
+ 1. Learn more about protein structures and sequences.
163
+ 2. Visit the [Protein Data Bank (PDB)](https://www.rcsb.org/) for known protein structures.
164
+ 3. Compare your folded structure with known functional proteins by downloading your results.
165
+ 4. Read about similar proteins to gain insights into potential functions.
166
+ 5. Click the "Analyze Protein" button below to get more information about your generated protein.
167
+
168
+ **Remember, this folding is based on randomly generated sequences. Interpret the results with caution.
169
+ Enjoy exploring the world of protein sequences! Share your high-confidence protein images with us on X [*@WandsAI*](https://x.com/wandsai)!
170
+ """)
171
+
172
  # Show the Analyze Protein button if a sequence has been generated
173
  if st.session_state.show_analyze_button:
174
  if st.button('Analyze Protein'):