File size: 6,094 Bytes
02970c0
 
db541e4
02970c0
4d5beeb
f73076c
4d5beeb
02970c0
2ec65d5
02970c0
 
 
 
db541e4
4d5beeb
 
c72f5fe
f73076c
c72f5fe
7f5c48e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4d5beeb
7f5c48e
affd796
2ec65d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7f5c48e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
02970c0
4d5beeb
 
 
 
 
 
7f5c48e
 
 
 
4d5beeb
 
 
 
 
 
 
 
 
 
 
7f5c48e
 
 
 
d92a3e6
4d5beeb
 
 
affd796
 
 
2f14da2
 
 
 
f73076c
 
 
 
4d5beeb
2f14da2
 
affd796
4d5beeb
 
 
db541e4
affd796
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import re
from pathlib import Path
import gradio as gr

from evodiff.pretrained import OA_DM_38M, D3PM_UNIFORM_38M, MSA_OA_DM_MAXSUB
from evodiff.generate import generate_oaardm, generate_d3pm
from evodiff.generate_msa import generate_query_oadm_msa_simple

import py3Dmol
from colabfold.download import download_alphafold_params, default_data_dir
from colabfold.utils import setup_logging
from colabfold.batch import get_queries, run, set_model_type
from colabfold.plot import plot_msa_v2

def a3m_file(file):
    return "tmp.a3m"



def predict_protein(sequence):
    download_alphafold_params("alphafold2_ptm", Path("."))
    results = run(
        queries=[('evodiff_protein',sequence, None)],
        result_dir='evodiff_protein',
        use_templates=False,
        num_relax=0,
        msa_mode="mmseqs2_uniref_env",
        model_type="alphafold2_ptm",
        num_models=1,
        num_recycles=1,
        model_order=[1],
        is_complex=False,
        data_dir=Path("."),
        keep_existing_results=False,
        rank_by="auto",
        stop_at_score=float(100),
        zip_results=False,
        user_agent="colabfold/google-colab-main",
    )

    return f"evodiff_protein/evodiff_protein_unrelaxed_rank_001_alphafold2_ptm_model_1_seed_000.pdb"

def display_pdb(path_to_pdb):
    '''
        #function to display pdb in py3dmol
        SOURCE: https://huggingface.co/spaces/merle/PROTEIN_GENERATOR/blob/main/app.py
    '''
    pdb = open(path_to_pdb, "r").read()
    
    view = py3Dmol.view(width=500, height=500)
    view.addModel(pdb, "pdb")
    view.setStyle({'model': -1}, {"cartoon": {'colorscheme':{'prop':'b','gradient':'roygb','min':0,'max':1}}})#'linear', 'min': 0, 'max': 1, 'colors': ["#ff9ef0","#a903fc",]}}}) 
    view.zoomTo()
    output = view._make_html().replace("'", '"')
    print(view._make_html())
    x = f"""<!DOCTYPE html><html></center> {output} </center></html>"""  # do not use ' in this input
    
    return f"""<iframe height="500px" width="100%"  name="result" allow="midi; geolocation; microphone; camera;
                            display-capture; encrypted-media;" sandbox="allow-modals allow-forms
                            allow-scripts allow-same-origin allow-popups
                            allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
                            allowpaymentrequest="" frameborder="0" srcdoc='{x}'></iframe>"""

'''
    return f"""<iframe  style="width: 100%; height:700px" name="result" allow="midi; geolocation; microphone; camera; 
                            display-capture; encrypted-media;" sandbox="allow-modals allow-forms 
                            allow-scripts allow-same-origin allow-popups 
                            allow-top-navigation-by-user-activation allow-downloads" allowfullscreen="" 
                            allowpaymentrequest="" frameborder="0" srcdoc='{x}'></iframe>"""
'''

def make_uncond_seq(seq_len, model_type):
    if model_type == "EvoDiff-Seq-OADM 38M":
        checkpoint = OA_DM_38M()
        model, collater, tokenizer, scheme = checkpoint
        tokeinzed_sample, generated_sequence = generate_oaardm(model, tokenizer, seq_len, batch_size=1, device='cpu')
    
    if model_type == "EvoDiff-D3PM-Uniform 38M":
        checkpoint = D3PM_UNIFORM_38M(return_all=True)
        model, collater, tokenizer, scheme, timestep, Q_bar, Q = checkpoint
        tokeinzed_sample, generated_sequence = generate_d3pm(model, tokenizer, Q, Q_bar, timestep, seq_len, batch_size=1, device='cpu')

    path_to_pdb = predict_protein(generated_sequence)
    molhtml = display_pdb(path_to_pdb)

    return generated_sequence, molhtml

def make_cond_seq(seq_len, msa_file, model_type):
    if model_type == "EvoDiff-MSA":
        checkpoint = MSA_OA_DM_MAXSUB()
        model, collater, tokenizer, scheme = checkpoint
        tokeinzed_sample, generated_sequence  = generate_query_oadm_msa_simple(msa_file.name, model, tokenizer, n_sequences=64, seq_length=seq_len, device='cpu', selection_type='random')

    path_to_pdb = predict_protein(generated_sequence)
    molhtml = display_pdb(path_to_pdb)

    return generated_sequence, molhtml

usg_app = gr.Interface(
            fn=make_uncond_seq,
            inputs=[
                gr.Slider(10, 100, label = "Sequence Length"),
                gr.Dropdown(["EvoDiff-Seq-OADM 38M", "EvoDiff-D3PM-Uniform 38M"], type="value", label = "Model")
                ],
            outputs=[
                "text",
                gr.HTML()
            ],
            title = "Unconditional sequence generation",
            description="Generate a sequence with `EvoDiff-Seq-OADM 38M` (smaller/faster) or `EvoDiff-D3PM-Uniform 38M` (larger/slower) models."
            )

csg_app = gr.Interface(
            fn=make_cond_seq,
            inputs=[
                gr.Slider(10, 100, label = "Sequence Length"),
                gr.File(file_types=["a3m"], label = "MSA File"),
                gr.Dropdown(["EvoDiff-MSA"], type="value", label = "Model")
                ],
            outputs=[
                "text",
                gr.HTML()
            ],
            # examples=[["https://github.com/microsoft/evodiff/raw/main/examples/example_files/bfd_uniclust_hits.a3m"]], 
            title = "Conditional sequence generation",
            description="Evolutionary guided sequence generation with the `EvoDiff-MSA` model."
            )


with gr.Blocks() as edapp:
    with gr.Row():
        gr.Markdown(
            """
            # EvoDiff
            ## Generation of protein sequences and evolutionary alignments via discrete diffusion models

            Created By: Microsoft Research [Sarah Alamdari, Nitya Thakkar, Rianne van den Berg, Alex X. Lu, Nicolo Fusi, ProfileAva P. Amini, and Kevin K. Yang]
            
            Spaces App By: [Colby T. Ford](httos://github.com/colbyford)
            """
        )
    with gr.Row():
        gr.TabbedInterface([usg_app, csg_app], ["Unconditional sequence generation", "Conditional generation"])



if __name__ == "__main__":
    edapp.launch()