Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| from fuse.data.tokenizers.modular_tokenizer.op import ModularTokenizerOp | |
| from mammal.examples.dti_bindingdb_kd.task import DtiBindingdbKdTask | |
| from mammal.keys import * | |
| from mammal.model import Mammal | |
| from abc import ABC, abstractmethod | |
| class MammalObjectBroker(): | |
| def __init__(self, model_path: str, name:str= None, task_list: list[str]=None) -> None: | |
| self.model_path = model_path | |
| if name is None: | |
| name = model_path | |
| self.name = name | |
| if task_list is not None: | |
| self.tasks=task_list | |
| else: | |
| self.task = [] | |
| self._model = None | |
| self._tokenizer_op = None | |
| def model(self)-> Mammal: | |
| if self._model is None: | |
| self._model = Mammal.from_pretrained(self.model_path) | |
| self._model.eval() | |
| return self._model | |
| def tokenizer_op(self): | |
| if self._tokenizer_op is None: | |
| self._tokenizer_op = ModularTokenizerOp.from_pretrained(self.model_path) | |
| return self._tokenizer_op | |
| class MammalTask(ABC): | |
| def __init__(self, name:str) -> None: | |
| self.name = name | |
| self.description = None | |
| self._demo = None | |
| def generate_prompt(self, **kwargs) -> str: | |
| """Formatting prompt to match pre-training syntax | |
| Args: | |
| prot1 (_type_): _description_ | |
| prot2 (_type_): _description_ | |
| Raises: | |
| No: _description_ | |
| """ | |
| raise NotImplementedError() | |
| def crate_sample_dict(self, prompt: str, **kwargs) -> dict: | |
| """Formatting prompt to match pre-training syntax | |
| Args: | |
| prompt (str): _description_ | |
| Returns: | |
| dict: sample_dict for feeding into model | |
| """ | |
| raise NotImplementedError() | |
| # @abstractmethod | |
| def run_model(self, sample_dict, model:Mammal): | |
| raise NotImplementedError() | |
| def create_demo(self, model_name_dropdown): | |
| """create an gradio demo group | |
| Returns: | |
| _type_: _description_ | |
| """ | |
| raise NotImplementedError() | |
| def demo(self,model_name_dropdown=None): | |
| if self._demo is None: | |
| self._demo = self.create_demo(model_name_dropdown=model_name_dropdown) | |
| return self._demo | |
| def decode_output(self,batch_dict, model:Mammal): | |
| raise NotImplementedError() | |
| #self._setup() | |
| # def _setup(self): | |
| # pass | |
| all_tasks = dict() | |
| all_models= dict() | |
| class PpiTask(MammalTask): | |
| def __init__(self): | |
| super().__init__(name="PPI") | |
| self.description = "Protein-Protein Interaction (PPI)" | |
| self.examples = { | |
| "protein_calmodulin": "MADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDMISELDQDGFIDKEDLHDGDGKISFEEFLNLVNKEMTADVDGDGQVNYEEFVTMMTSK", | |
| "protein_calcineurin": "MSSKLLLAGLDIERVLAEKNFYKEWDTWIIEAMNVGDEEVDRIKEFKEDEIFEEAKTLGTAEMQEYKKQKLEEAIEGAFDIFDKDGNGYISAAELRHVMTNLGEKLTDEEVDEMIRQMWDQNGDWDRIKELKFGEIKKLSAKDTRGTIFIKVFENLGTGVDSEYEDVSKYMLKHQ", | |
| } | |
| self.markup_text = """ | |
| # Mammal based {self.description} demonstration | |
| Given two protein sequences, estimate if the proteins interact or not.""" | |
| def positive_token_id(model_holder: MammalObjectBroker): | |
| """token for positive binding | |
| Args: | |
| model (MammalTrainedModel): model holding tokenizer | |
| Returns: | |
| int: id of positive binding token | |
| """ | |
| return model_holder.tokenizer_op.get_token_id("<1>") | |
| def generate_prompt(self, prot1, prot2): | |
| """Formatting prompt to match pre-training syntax | |
| Args: | |
| prot1 (str): sequance of protein number 1 | |
| prot2 (str): sequance of protein number 2 | |
| Returns: | |
| str: prompt | |
| """ | |
| prompt = "<@TOKENIZER-TYPE=AA><BINDING_AFFINITY_CLASS><SENTINEL_ID_0>"\ | |
| "<MOLECULAR_ENTITY><MOLECULAR_ENTITY_GENERAL_PROTEIN>"\ | |
| f"<SEQUENCE_NATURAL_START>{prot1}<SEQUENCE_NATURAL_END>"\ | |
| "<MOLECULAR_ENTITY><MOLECULAR_ENTITY_GENERAL_PROTEIN>"\ | |
| f"<SEQUENCE_NATURAL_START>{prot2}<SEQUENCE_NATURAL_END><EOS>" | |
| return prompt | |
| def crate_sample_dict(self,prompt: str, model_holder:MammalObjectBroker): | |
| # Create and load sample | |
| sample_dict = dict() | |
| sample_dict[ENCODER_INPUTS_STR] = prompt | |
| # Tokenize | |
| sample_dict = model_holder.tokenizer_op( | |
| sample_dict=sample_dict, | |
| key_in=ENCODER_INPUTS_STR, | |
| key_out_tokens_ids=ENCODER_INPUTS_TOKENS, | |
| key_out_attention_mask=ENCODER_INPUTS_ATTENTION_MASK, | |
| ) | |
| sample_dict[ENCODER_INPUTS_TOKENS] = torch.tensor( | |
| sample_dict[ENCODER_INPUTS_TOKENS] | |
| ) | |
| sample_dict[ENCODER_INPUTS_ATTENTION_MASK] = torch.tensor( | |
| sample_dict[ENCODER_INPUTS_ATTENTION_MASK] | |
| ) | |
| return sample_dict | |
| def run_model(self, sample_dict, model: Mammal): | |
| # Generate Prediction | |
| batch_dict = model.generate( | |
| [sample_dict], | |
| output_scores=True, | |
| return_dict_in_generate=True, | |
| max_new_tokens=5, | |
| ) | |
| return batch_dict | |
| def decode_output(self,batch_dict, model_holder): | |
| # Get output | |
| generated_output = model_holder.tokenizer_op._tokenizer.decode(batch_dict[CLS_PRED][0]) | |
| score = batch_dict["model.out.scores"][0][1][self.positive_token_id(model_holder)].item() | |
| return generated_output, score | |
| def create_and_run_prompt(self,model_name,protein1, protein2): | |
| model_holder = all_models[model_name] | |
| prompt = self.generate_prompt(protein1, protein2) | |
| sample_dict = self.crate_sample_dict(prompt=prompt, model_holder=model_holder) | |
| batch_dict = self.run_model(sample_dict=sample_dict, model=model_holder.model) | |
| res = prompt, *self.decode_output(batch_dict,model_holder=model_holder) | |
| return res | |
| def create_demo(self,model_name_dropdown): | |
| # """ | |
| # ### Using the model from | |
| # ```{model} ``` | |
| # """ | |
| with gr.Group() as demo: | |
| gr.Markdown(self.markup_text) | |
| with gr.Row(): | |
| prot1 = gr.Textbox( | |
| label="Protein 1 sequence", | |
| # info="standard", | |
| interactive=True, | |
| lines=3, | |
| value=self.examples["protein_calmodulin"], | |
| ) | |
| prot2 = gr.Textbox( | |
| label="Protein 2 sequence", | |
| # info="standard", | |
| interactive=True, | |
| lines=3, | |
| value=self.examples["protein_calcineurin"], | |
| ) | |
| with gr.Row(): | |
| run_mammal = gr.Button( | |
| "Run Mammal prompt for Protein-Protein Interaction", variant="primary" | |
| ) | |
| with gr.Row(): | |
| prompt_box = gr.Textbox(label="Mammal prompt", lines=5) | |
| with gr.Row(): | |
| decoded = gr.Textbox(label="Mammal output") | |
| run_mammal.click( | |
| fn=self.create_and_run_prompt, | |
| inputs=[model_name_dropdown, prot1, prot2], | |
| outputs=[prompt_box, decoded, gr.Number(label="PPI score")], | |
| ) | |
| with gr.Row(): | |
| gr.Markdown( | |
| "```<SENTINEL_ID_0>``` contains the binding affinity class, which is ```<1>``` for interacting and ```<0>``` for non-interacting" | |
| ) | |
| demo.visible = True | |
| return demo | |
| ppi_task = PpiTask() | |
| all_tasks[ppi_task.name]=ppi_task | |
| ppi_model = MammalObjectBroker(model_path="ibm/biomed.omics.bl.sm.ma-ted-458m", task_list=["PPI"]) | |
| all_models[ppi_model.name]=ppi_model | |
| # tdi_model = MammalTrainedModel(model_path="ibm/biomed.omics.bl.sm.ma-ted-458m.dti_bindingdb_pkd") TODO: ## task list still empty | |
| # all_models.append(tdi_model) | |
| def create_application(): | |
| def task_change(value): | |
| choices=[model_name for model_name, model in all_models.items() if value in model.tasks] | |
| if choices: | |
| return gr.update(choices=choices, value=choices[0]) | |
| else: | |
| return | |
| # return model_name_dropdown | |
| with gr.Blocks() as demo: | |
| task_dropdown = gr.Dropdown(choices=["select demo"] + list(all_tasks.keys())) | |
| task_dropdown.interactive = True | |
| model_name_dropdown = gr.Dropdown(choices=[model_name for model_name, model in all_models.items() if task_dropdown.value in model.tasks], interactive=True) | |
| task_dropdown.change(task_change,inputs=[task_dropdown],outputs=[model_name_dropdown]) | |
| ppi_demo = all_tasks["PPI"].demo(model_name_dropdown = model_name_dropdown) | |
| ppi_demo.visible = True | |
| # dtb_demo = create_tdb_demo() | |
| def set_ppi_vis(main_text): | |
| main_text=main_text | |
| print(f"main text is {main_text}") | |
| return gr.Group(visible=True) | |
| #return gr.Group(visible=(main_text == "PPI")) | |
| # , gr.Group( visible=(main_text == "DTI") ) | |
| task_dropdown.change( | |
| set_ppi_vis, inputs=task_dropdown, outputs=[ppi_demo] | |
| ) | |
| return demo | |
| full_demo=None | |
| def main(): | |
| global full_demo | |
| full_demo = create_application() | |
| full_demo.launch(show_error=True, share=False) | |
| if __name__ == "__main__": | |
| main() | |