Accelernate commited on
Commit
5708f34
·
verified ·
1 Parent(s): 9a00137

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -21
app.py CHANGED
@@ -37,13 +37,12 @@ def render_mol(pdb):
37
 
38
  # BLAST analysis function
39
  def perform_blast_analysis(sequence):
40
- st.subheader('BLAST Analysis')
41
  with st.spinner("Analyzing generated protein... This may take a few minutes."):
42
  progress_bar = st.progress(0)
43
  for i in range(100):
44
  progress_bar.progress(i + 1)
45
- if i == 99: # Simulate longer process at the end
46
- time.sleep(2)
47
 
48
  try:
49
  record = SeqRecord(Seq(sequence), id='random_protein')
@@ -51,35 +50,29 @@ def perform_blast_analysis(sequence):
51
 
52
  blast_record = NCBIXML.read(result_handle)
53
 
54
- st.write('Top BLAST Match:')
55
  if blast_record.alignments:
56
  alignment = blast_record.alignments[0] # Get the top hit
57
- hsp = alignment.hsps[0] # Get the first (best) HSP
58
 
59
- # Extract protein name and organism
60
- title_parts = alignment.title.split('|')
61
- protein_name = title_parts[-1].strip()
62
- organism = title_parts[-2].split('OS=')[-1].split('OX=')[0].strip()
63
 
64
- # Calculate identity percentage
65
- identity_percentage = (hsp.identities / alignment.length) * 100
66
 
67
- st.write(f"**Protein:** {protein_name}")
68
- st.write(f"**Organism:** {organism}")
69
- st.write(f"**Sequence Identity:** {identity_percentage:.2f}%")
70
 
71
  # Fetch protein function (if available)
72
  if hasattr(alignment, 'description') and alignment.description:
73
- st.write(f"**Possible Function:** {alignment.description}")
 
 
 
74
 
75
- # Link to BLAST
76
- blast_link = f"https://blast.ncbi.nlm.nih.gov/Blast.cgi?PROGRAM=blastp&PAGE_TYPE=BlastSearch&LINK_LOC=blasthome"
77
- st.markdown(f"[View full BLAST results]({blast_link})")
78
  else:
79
- st.write("No significant matches found.")
80
  except Exception as e:
81
- st.error(f"An error occurred during BLAST analysis: {str(e)}")
82
- st.write("Please try again later or contact support if the issue persists.")
83
 
84
  # ESMfold
85
  def update(sequence, word1, word2, word3, sequence_length):
@@ -137,14 +130,20 @@ sequence_length = st.sidebar.number_input("Sequence Length", min_value=50, max_v
137
  if st.sidebar.button('Generate and Predict'):
138
  if word1 and word2 and word3:
139
  sequence = generate_sequence_from_words([word1, word2, word3], sequence_length)
 
140
  st.sidebar.text_area("Generated Sequence", sequence, height=100)
141
  st.sidebar.info("Note: The same words and sequence length will always produce the same sequence.")
142
 
143
  with st.spinner("Predicting protein structure... This may take a few minutes."):
144
  update(sequence, word1, word2, word3, sequence_length)
 
 
 
 
145
  else:
146
  st.sidebar.warning("Please enter all three words to generate a sequence.")
147
 
 
148
  # Information display
149
  st.sidebar.markdown("""
150
  ## What to do next:
 
37
 
38
  # BLAST analysis function
39
  def perform_blast_analysis(sequence):
40
+ st.subheader('Protein Analysis')
41
  with st.spinner("Analyzing generated protein... This may take a few minutes."):
42
  progress_bar = st.progress(0)
43
  for i in range(100):
44
  progress_bar.progress(i + 1)
45
+ time.sleep(0.1) # Simulate analysis time
 
46
 
47
  try:
48
  record = SeqRecord(Seq(sequence), id='random_protein')
 
50
 
51
  blast_record = NCBIXML.read(result_handle)
52
 
 
53
  if blast_record.alignments:
54
  alignment = blast_record.alignments[0] # Get the top hit
 
55
 
56
+ # Extract organism
57
+ organism = alignment.title.split('OS=')[-1].split('OX=')[0].strip()
 
 
58
 
59
+ # Simplify organism name if it's too complex
60
+ organism = organism.split()[0] if len(organism.split()) > 1 else organism
61
 
62
+ st.write(f"**Estimated Organism:** This protein sequence shares similarities with proteins found in {organism}.")
 
 
63
 
64
  # Fetch protein function (if available)
65
  if hasattr(alignment, 'description') and alignment.description:
66
+ function = alignment.description.split('[')[0].strip() # Remove organism info in brackets
67
+ st.write(f"**Potential Function:** This protein might be involved in {function.lower()}.")
68
+ else:
69
+ st.write("**Potential Function:** Unable to determine a specific function for this protein sequence.")
70
 
71
+ st.markdown("[Learn more about protein functions](https://www.nature.com/scitable/topicpage/protein-function-14123348/)")
 
 
72
  else:
73
+ st.write("No close matches found. This might be a unique protein sequence!")
74
  except Exception as e:
75
+ st.error("An error occurred during protein analysis. Please try again later.")
 
76
 
77
  # ESMfold
78
  def update(sequence, word1, word2, word3, sequence_length):
 
130
  if st.sidebar.button('Generate and Predict'):
131
  if word1 and word2 and word3:
132
  sequence = generate_sequence_from_words([word1, word2, word3], sequence_length)
133
+ st.session_state.sequence = sequence # Store the sequence in session state
134
  st.sidebar.text_area("Generated Sequence", sequence, height=100)
135
  st.sidebar.info("Note: The same words and sequence length will always produce the same sequence.")
136
 
137
  with st.spinner("Predicting protein structure... This may take a few minutes."):
138
  update(sequence, word1, word2, word3, sequence_length)
139
+
140
+ # Add Analyze Protein button
141
+ if st.button('Analyze Protein'):
142
+ perform_blast_analysis(st.session_state.sequence)
143
  else:
144
  st.sidebar.warning("Please enter all three words to generate a sequence.")
145
 
146
+
147
  # Information display
148
  st.sidebar.markdown("""
149
  ## What to do next: