Spaces:
Running
Running
File size: 6,094 Bytes
02970c0 db541e4 02970c0 4d5beeb f73076c 4d5beeb 02970c0 2ec65d5 02970c0 db541e4 4d5beeb c72f5fe f73076c c72f5fe 7f5c48e 4d5beeb 7f5c48e affd796 2ec65d5 7f5c48e 02970c0 4d5beeb 7f5c48e 4d5beeb 7f5c48e d92a3e6 4d5beeb affd796 2f14da2 f73076c 4d5beeb 2f14da2 affd796 4d5beeb db541e4 affd796 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
import re
from pathlib import Path
import gradio as gr
from evodiff.pretrained import OA_DM_38M, D3PM_UNIFORM_38M, MSA_OA_DM_MAXSUB
from evodiff.generate import generate_oaardm, generate_d3pm
from evodiff.generate_msa import generate_query_oadm_msa_simple
import py3Dmol
from colabfold.download import download_alphafold_params, default_data_dir
from colabfold.utils import setup_logging
from colabfold.batch import get_queries, run, set_model_type
from colabfold.plot import plot_msa_v2
def a3m_file(file):
return "tmp.a3m"
def predict_protein(sequence):
download_alphafold_params("alphafold2_ptm", Path("."))
results = run(
queries=[('evodiff_protein',sequence, None)],
result_dir='evodiff_protein',
use_templates=False,
num_relax=0,
msa_mode="mmseqs2_uniref_env",
model_type="alphafold2_ptm",
num_models=1,
num_recycles=1,
model_order=[1],
is_complex=False,
data_dir=Path("."),
keep_existing_results=False,
rank_by="auto",
stop_at_score=float(100),
zip_results=False,
user_agent="colabfold/google-colab-main",
)
return f"evodiff_protein/evodiff_protein_unrelaxed_rank_001_alphafold2_ptm_model_1_seed_000.pdb"
def display_pdb(path_to_pdb):
'''
#function to display pdb in py3dmol
SOURCE: https://huggingface.co/spaces/merle/PROTEIN_GENERATOR/blob/main/app.py
'''
pdb = open(path_to_pdb, "r").read()
view = py3Dmol.view(width=500, height=500)
view.addModel(pdb, "pdb")
view.setStyle({'model': -1}, {"cartoon": {'colorscheme':{'prop':'b','gradient':'roygb','min':0,'max':1}}})#'linear', 'min': 0, 'max': 1, 'colors': ["#ff9ef0","#a903fc",]}}})
view.zoomTo()
output = view._make_html().replace("'", '"')
print(view._make_html())
x = f"""<!DOCTYPE html><html></center> {output} </center></html>""" # do not use ' in this input
return f"""<iframe height="500px" width="100%" name="result" allow="midi; geolocation; microphone; camera;
display-capture; encrypted-media;" sandbox="allow-modals allow-forms
allow-scripts allow-same-origin allow-popups
allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
allowpaymentrequest="" frameborder="0" srcdoc='{x}'></iframe>"""
'''
return f"""<iframe style="width: 100%; height:700px" name="result" allow="midi; geolocation; microphone; camera;
display-capture; encrypted-media;" sandbox="allow-modals allow-forms
allow-scripts allow-same-origin allow-popups
allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
allowpaymentrequest="" frameborder="0" srcdoc='{x}'></iframe>"""
'''
def make_uncond_seq(seq_len, model_type):
if model_type == "EvoDiff-Seq-OADM 38M":
checkpoint = OA_DM_38M()
model, collater, tokenizer, scheme = checkpoint
tokeinzed_sample, generated_sequence = generate_oaardm(model, tokenizer, seq_len, batch_size=1, device='cpu')
if model_type == "EvoDiff-D3PM-Uniform 38M":
checkpoint = D3PM_UNIFORM_38M(return_all=True)
model, collater, tokenizer, scheme, timestep, Q_bar, Q = checkpoint
tokeinzed_sample, generated_sequence = generate_d3pm(model, tokenizer, Q, Q_bar, timestep, seq_len, batch_size=1, device='cpu')
path_to_pdb = predict_protein(generated_sequence)
molhtml = display_pdb(path_to_pdb)
return generated_sequence, molhtml
def make_cond_seq(seq_len, msa_file, model_type):
if model_type == "EvoDiff-MSA":
checkpoint = MSA_OA_DM_MAXSUB()
model, collater, tokenizer, scheme = checkpoint
tokeinzed_sample, generated_sequence = generate_query_oadm_msa_simple(msa_file.name, model, tokenizer, n_sequences=64, seq_length=seq_len, device='cpu', selection_type='random')
path_to_pdb = predict_protein(generated_sequence)
molhtml = display_pdb(path_to_pdb)
return generated_sequence, molhtml
usg_app = gr.Interface(
fn=make_uncond_seq,
inputs=[
gr.Slider(10, 100, label = "Sequence Length"),
gr.Dropdown(["EvoDiff-Seq-OADM 38M", "EvoDiff-D3PM-Uniform 38M"], type="value", label = "Model")
],
outputs=[
"text",
gr.HTML()
],
title = "Unconditional sequence generation",
description="Generate a sequence with `EvoDiff-Seq-OADM 38M` (smaller/faster) or `EvoDiff-D3PM-Uniform 38M` (larger/slower) models."
)
csg_app = gr.Interface(
fn=make_cond_seq,
inputs=[
gr.Slider(10, 100, label = "Sequence Length"),
gr.File(file_types=["a3m"], label = "MSA File"),
gr.Dropdown(["EvoDiff-MSA"], type="value", label = "Model")
],
outputs=[
"text",
gr.HTML()
],
# examples=[["https://github.com/microsoft/evodiff/raw/main/examples/example_files/bfd_uniclust_hits.a3m"]],
title = "Conditional sequence generation",
description="Evolutionary guided sequence generation with the `EvoDiff-MSA` model."
)
with gr.Blocks() as edapp:
with gr.Row():
gr.Markdown(
"""
# EvoDiff
## Generation of protein sequences and evolutionary alignments via discrete diffusion models
Created By: Microsoft Research [Sarah Alamdari, Nitya Thakkar, Rianne van den Berg, Alex X. Lu, Nicolo Fusi, ProfileAva P. Amini, and Kevin K. Yang]
Spaces App By: [Colby T. Ford](httos://github.com/colbyford)
"""
)
with gr.Row():
gr.TabbedInterface([usg_app, csg_app], ["Unconditional sequence generation", "Conditional generation"])
if __name__ == "__main__":
edapp.launch() |