|
import streamlit as st |
|
import random |
|
import hashlib |
|
import py3Dmol |
|
import requests |
|
import io |
|
from Bio import PDB |
|
|
|
def generate_sequence_from_words(words, length): |
|
seed = ' '.join(words).encode('utf-8') |
|
random.seed(hashlib.md5(seed).hexdigest()) |
|
amino_acids = "ACDEFGHIKLMNPQRSTVWY" |
|
return ''.join(random.choice(amino_acids) for _ in range(length)) |
|
|
|
def predict_structure(sequence): |
|
url = "https://api.esmatlas.com/foldSequence/v1/pdb/" |
|
headers = {"Content-Type": "application/x-www-form-urlencoded"} |
|
data = {"sequence": sequence} |
|
|
|
response = requests.post(url, headers=headers, data=data, timeout=300) |
|
if response.status_code == 200: |
|
return response.text |
|
else: |
|
st.error(f"Error in structure prediction: {response.status_code} - {response.text}") |
|
return None |
|
|
|
def visualize_protein(pdb_string): |
|
view = py3Dmol.view(width=800, height=400) |
|
view.addModel(pdb_string, 'pdb') |
|
view.setStyle({'cartoon': {'color': 'spectrum'}}) |
|
view.zoomTo() |
|
return view |
|
|
|
st.title("Protein Sequence Generator and Structure Predictor") |
|
|
|
st.write("Enter three random words to seed your protein sequence:") |
|
word1 = st.text_input("Word 1") |
|
word2 = st.text_input("Word 2") |
|
word3 = st.text_input("Word 3") |
|
|
|
sequence_length = st.number_input("Enter desired sequence length", |
|
min_value=50, |
|
max_value=400, |
|
value=100, |
|
step=10) |
|
|
|
if st.button("Generate Sequence and Predict Structure"): |
|
if word1 and word2 and word3: |
|
words = [word1, word2, word3] |
|
sequence = generate_sequence_from_words(words, sequence_length) |
|
st.write(f"Generated sequence inspired by '{word1}', '{word2}', and '{word3}' with length '{sequence_length}':") |
|
st.code(sequence) |
|
|
|
st.header("Protein Structure Prediction") |
|
with st.spinner("Predicting protein structure... This may take a few minutes."): |
|
pdb_string = predict_structure(sequence) |
|
if pdb_string: |
|
view = visualize_protein(pdb_string) |
|
|
|
st_py3dmol = py3Dmol.show3d(view, width=800, height=400) |
|
st.components.v1.html(st_py3dmol.startjs, height=400) |
|
|
|
st.success("Structure prediction complete!") |
|
st.write("Note: This is a computational prediction and may not represent the actual biological structure.") |
|
else: |
|
st.error("Failed to predict structure. Please try again.") |
|
else: |
|
st.error("Please enter all three words.") |
|
|
|
st.markdown(""" |
|
## What to do next: |
|
1. Experiment with different seed words and sequence lengths. |
|
2. Learn about how protein sequences relate to their predicted structures. |
|
3. Remember that these are computational predictions and may not represent the actual biological structure. |
|
4. For real protein structures, visit the [Protein Data Bank (PDB)](https://www.rcsb.org/). |
|
Enjoy exploring the world of protein sequences and predicted structures! |
|
""") |