import gradio as gr import torch from fuse.data.tokenizers.modular_tokenizer.op import ModularTokenizerOp from mammal.examples.dti_bindingdb_kd.task import DtiBindingdbKdTask from mammal.keys import * from mammal.model import Mammal from abc import ABC, abstractmethod class MammalObjectBroker(): def __init__(self, model_path: str, name:str= None, task_list: list[str]=None) -> None: self.model_path = model_path if name is None: name = model_path self.name = name if task_list is not None: self.tasks=task_list else: self.task = [] self._model = None self._tokenizer_op = None @property def model(self)-> Mammal: if self._model is None: self._model = Mammal.from_pretrained(self.model_path) self._model.eval() return self._model @property def tokenizer_op(self): if self._tokenizer_op is None: self._tokenizer_op = ModularTokenizerOp.from_pretrained(self.model_path) return self._tokenizer_op class MammalTask(ABC): def __init__(self, name:str) -> None: self.name = name self.description = None self._demo = None @abstractmethod def generate_prompt(self, **kwargs) -> str: """Formatting prompt to match pre-training syntax Args: prot1 (_type_): _description_ prot2 (_type_): _description_ Raises: No: _description_ """ raise NotImplementedError() @abstractmethod def crate_sample_dict(self, prompt: str, **kwargs) -> dict: """Formatting prompt to match pre-training syntax Args: prompt (str): _description_ Returns: dict: sample_dict for feeding into model """ raise NotImplementedError() # @abstractmethod def run_model(self, sample_dict, model:Mammal): raise NotImplementedError() @abstractmethod def create_demo(self, model_name_dropdown): """create an gradio demo group Returns: _type_: _description_ """ raise NotImplementedError() def demo(self,model_name_dropdown=None): if self._demo is None: self._demo = self.create_demo(model_name_dropdown=model_name_dropdown) return self._demo @abstractmethod def decode_output(self,batch_dict, model:Mammal): raise NotImplementedError() #self._setup() # def _setup(self): # pass all_tasks = dict() all_models= dict() class PpiTask(MammalTask): def __init__(self): super().__init__(name="PPI") self.description = "Protein-Protein Interaction (PPI)" self.examples = { "protein_calmodulin": "MADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDMISELDQDGFIDKEDLHDGDGKISFEEFLNLVNKEMTADVDGDGQVNYEEFVTMMTSK", "protein_calcineurin": "MSSKLLLAGLDIERVLAEKNFYKEWDTWIIEAMNVGDEEVDRIKEFKEDEIFEEAKTLGTAEMQEYKKQKLEEAIEGAFDIFDKDGNGYISAAELRHVMTNLGEKLTDEEVDEMIRQMWDQNGDWDRIKELKFGEIKKLSAKDTRGTIFIKVFENLGTGVDSEYEDVSKYMLKHQ", } self.markup_text = """ # Mammal based {self.description} demonstration Given two protein sequences, estimate if the proteins interact or not.""" @staticmethod def positive_token_id(model_holder: MammalObjectBroker): """token for positive binding Args: model (MammalTrainedModel): model holding tokenizer Returns: int: id of positive binding token """ return model_holder.tokenizer_op.get_token_id("<1>") def generate_prompt(self, prot1, prot2): """Formatting prompt to match pre-training syntax Args: prot1 (str): sequance of protein number 1 prot2 (str): sequance of protein number 2 Returns: str: prompt """ prompt = "<@TOKENIZER-TYPE=AA>"\ ""\ f"{prot1}"\ ""\ f"{prot2}" return prompt def crate_sample_dict(self,prompt: str, model_holder:MammalObjectBroker): # Create and load sample sample_dict = dict() sample_dict[ENCODER_INPUTS_STR] = prompt # Tokenize sample_dict = model_holder.tokenizer_op( sample_dict=sample_dict, key_in=ENCODER_INPUTS_STR, key_out_tokens_ids=ENCODER_INPUTS_TOKENS, key_out_attention_mask=ENCODER_INPUTS_ATTENTION_MASK, ) sample_dict[ENCODER_INPUTS_TOKENS] = torch.tensor( sample_dict[ENCODER_INPUTS_TOKENS] ) sample_dict[ENCODER_INPUTS_ATTENTION_MASK] = torch.tensor( sample_dict[ENCODER_INPUTS_ATTENTION_MASK] ) return sample_dict def run_model(self, sample_dict, model: Mammal): # Generate Prediction batch_dict = model.generate( [sample_dict], output_scores=True, return_dict_in_generate=True, max_new_tokens=5, ) return batch_dict def decode_output(self,batch_dict, model_holder): # Get output generated_output = model_holder.tokenizer_op._tokenizer.decode(batch_dict[CLS_PRED][0]) score = batch_dict["model.out.scores"][0][1][self.positive_token_id(model_holder)].item() return generated_output, score def create_and_run_prompt(self,model_name,protein1, protein2): model_holder = all_models[model_name] prompt = self.generate_prompt(protein1, protein2) sample_dict = self.crate_sample_dict(prompt=prompt, model_holder=model_holder) batch_dict = self.run_model(sample_dict=sample_dict, model=model_holder.model) res = prompt, *self.decode_output(batch_dict,model_holder=model_holder) return res def create_demo(self,model_name_dropdown): # """ # ### Using the model from # ```{model} ``` # """ with gr.Group() as demo: gr.Markdown(self.markup_text) with gr.Row(): prot1 = gr.Textbox( label="Protein 1 sequence", # info="standard", interactive=True, lines=3, value=self.examples["protein_calmodulin"], ) prot2 = gr.Textbox( label="Protein 2 sequence", # info="standard", interactive=True, lines=3, value=self.examples["protein_calcineurin"], ) with gr.Row(): run_mammal = gr.Button( "Run Mammal prompt for Protein-Protein Interaction", variant="primary" ) with gr.Row(): prompt_box = gr.Textbox(label="Mammal prompt", lines=5) with gr.Row(): decoded = gr.Textbox(label="Mammal output") run_mammal.click( fn=self.create_and_run_prompt, inputs=[model_name_dropdown, prot1, prot2], outputs=[prompt_box, decoded, gr.Number(label="PPI score")], ) with gr.Row(): gr.Markdown( "`````` contains the binding affinity class, which is ```<1>``` for interacting and ```<0>``` for non-interacting" ) demo.visible = True return demo ppi_task = PpiTask() all_tasks[ppi_task.name]=ppi_task ppi_model = MammalObjectBroker(model_path="ibm/biomed.omics.bl.sm.ma-ted-458m", task_list=["PPI"]) all_models[ppi_model.name]=ppi_model # tdi_model = MammalTrainedModel(model_path="ibm/biomed.omics.bl.sm.ma-ted-458m.dti_bindingdb_pkd") TODO: ## task list still empty # all_models.append(tdi_model) def create_application(): def task_change(value): choices=[model_name for model_name, model in all_models.items() if value in model.tasks] if choices: return gr.update(choices=choices, value=choices[0]) else: return # return model_name_dropdown with gr.Blocks() as demo: task_dropdown = gr.Dropdown(choices=["select demo"] + list(all_tasks.keys())) task_dropdown.interactive = True model_name_dropdown = gr.Dropdown(choices=[model_name for model_name, model in all_models.items() if task_dropdown.value in model.tasks], interactive=True) task_dropdown.change(task_change,inputs=[task_dropdown],outputs=[model_name_dropdown]) ppi_demo = all_tasks["PPI"].demo(model_name_dropdown = model_name_dropdown) ppi_demo.visible = True # dtb_demo = create_tdb_demo() def set_ppi_vis(main_text): main_text=main_text print(f"main text is {main_text}") return gr.Group(visible=True) #return gr.Group(visible=(main_text == "PPI")) # , gr.Group( visible=(main_text == "DTI") ) task_dropdown.change( set_ppi_vis, inputs=task_dropdown, outputs=[ppi_demo] ) return demo full_demo=None def main(): global full_demo full_demo = create_application() full_demo.launch(show_error=True, share=False) if __name__ == "__main__": main()