Spaces:
Runtime error
Runtime error
| import uuid | |
| import gradio as gr | |
| import torch | |
| import os | |
| import pandas as pd | |
| from rdkit import Chem | |
| from scripts.pla_net_inference import main | |
| from utils.args import ArgsInit | |
| os.system("nvidia-smi") | |
| print("TORCH_CUDA", torch.cuda.is_available()) | |
| PROJECT_URL = "https://www.nature.com/articles/s41598-022-12180-x" | |
| DEFAULT_PATH_DOCKER = "/home/user/app" | |
| ENABLED_MODELS = [ | |
| 'aa2ar', 'abl1', 'ace', 'aces', 'ada', 'ada17', 'adrb1', 'adrb2', | |
| 'akt1', 'akt2', 'aldr', 'ampc', 'andr', 'aofb', 'bace1', 'braf', | |
| 'cah2', 'casp3', 'cdk2', 'comt', 'cp2c9', 'cp3a4', 'csf1r', | |
| 'cxcr4', 'def', 'dhi1', 'dpp4', 'drd3', 'dyr', 'egfr', 'esr1', | |
| 'esr2', 'fa10', 'fa7', 'fabp4', 'fak1', 'fgfr1', 'fkb1a', 'fnta', | |
| 'fpps', 'gcr', 'glcm', 'gria2', 'grik1', 'hdac2', 'hdac8', | |
| 'hivint', 'hivpr', 'hivrt', 'hmdh', 'hs90a', 'hxk4', 'igf1r', | |
| 'inha', 'ital', 'jak2', 'kif11', 'kit', 'kith', 'kpcb', 'lck', | |
| 'lkha4', 'mapk2', 'mcr', 'met', 'mk01', 'mk10', 'mk14', 'mmp13', | |
| 'mp2k1', 'nos1', 'nram', 'pa2ga', 'parp1', 'pde5a', 'pgh1', 'pgh2', | |
| 'plk1', 'pnph', 'ppara', 'ppard', 'pparg', 'prgr', 'ptn1', 'pur2', | |
| 'pygm', 'pyrd', 'reni', 'rock1', 'rxra', 'sahh', 'src', 'tgfr1', | |
| 'thb', 'thrb', 'try1', 'tryb1', 'tysy', 'urok', 'vgfr2', 'wee1', | |
| 'xiap' | |
| ] | |
| def load_and_filter_data(protein_id, ligand_smiles): | |
| # generate random short id, make short | |
| random_id = str(uuid.uuid4())[:8] | |
| print("Inference ID: ", random_id) | |
| # check that ligand_smiles is not empty | |
| if not ligand_smiles or ligand_smiles.strip() == "": | |
| error_msg = f"!SMILES string is required 💥" | |
| raise gr.Error(error_msg, duration=5) | |
| if protein_id not in ENABLED_MODELS: | |
| error_msg = f"!Invalid 💥 target protein ID, the available options are: {ENABLED_MODELS}. To do inference other proteins, you can run the model locally an train the model for each target protein." | |
| raise gr.Error(error_msg, duration=5) | |
| # Split the input SMILES string by ':' to get a list | |
| smiles_list = ligand_smiles.split(':') | |
| print("Smiles to predict: ", smiles_list) | |
| print("Target Protein ID: ", protein_id) | |
| # Validate SMILES | |
| invalid_smiles = [] | |
| for smiles in smiles_list: | |
| mol = Chem.MolFromSmiles(smiles.strip()) | |
| if mol is None: | |
| invalid_smiles.append(smiles.strip()) | |
| if invalid_smiles: | |
| error_msg = f"!Invalid 💥 SMILES string(s) : {', '.join(invalid_smiles)}" | |
| raise gr.Error(error_msg, duration=5) | |
| # Create tmp folder | |
| os.makedirs(f"{DEFAULT_PATH_DOCKER}/example/tmp", exist_ok=True) | |
| # Save SMILES to CSV | |
| df = pd.DataFrame({"smiles": [s.strip() for s in smiles_list if s.strip()]}) | |
| df.to_csv(f"{DEFAULT_PATH_DOCKER}/example/tmp/{random_id}_input_smiles.csv", index=False) | |
| # Run inference | |
| args = ArgsInit().args | |
| args.nclasses = 2 | |
| args.batch_size = 10 | |
| args.use_prot = True | |
| args.freeze_molecule = True | |
| args.conv_encode_edge = True | |
| args.learn_t = True | |
| args.binary = True | |
| args.use_gpu = True | |
| args.target = protein_id | |
| args.target_list = f"{DEFAULT_PATH_DOCKER}/data/datasets/AD/Targets_Fasta.csv" | |
| args.target_checkpoint_path = f"{DEFAULT_PATH_DOCKER}/checkpoints/PLA-Net/BINARY_{protein_id}" | |
| args.input_file_smiles = f"{DEFAULT_PATH_DOCKER}/example/tmp/{random_id}_input_smiles.csv" | |
| args.output_file = f"{DEFAULT_PATH_DOCKER}/example/tmp/{random_id}_output_predictions.csv" | |
| print("Args: ", args) | |
| main(args) | |
| # Load the CSV file | |
| df = pd.read_csv(f'{DEFAULT_PATH_DOCKER}/example/tmp/{random_id}_output_predictions.csv') | |
| print("Prediction Results output: ", df) | |
| return df | |
| def load_description(fp): | |
| with open(fp, 'r', encoding='utf-8') as f: | |
| content = f.read() | |
| return content | |
| def run_inference(protein_id, ligand_smile): | |
| result_df = load_and_filter_data(protein_id, ligand_smile) | |
| return result_df | |
| def create_interface(): | |
| with gr.Blocks(title="PLA-Net Web Inference") as inference: | |
| gr.HTML(load_description("gradio/title.md")) | |
| gr.Markdown("### Input") | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("#### Target Protein") | |
| protein_id = gr.Dropdown( | |
| choices=ENABLED_MODELS, | |
| label="Target Protein ID", | |
| info="Select the target protein from the dropdown menu.", | |
| value="ada" | |
| ) | |
| gr.Markdown(" Check the available target proteins [here](https://github.com/juliocesar-io/PLA-Net/blob/main/data/targets.md). The corresponding protein sequences are available in [here](https://github.com/juliocesar-io/PLA-Net/blob/main/data/datasets/AD/Targets_Fasta.csv).") | |
| with gr.Column(): | |
| gr.Markdown("#### Ligand") | |
| ligand_smile = gr.Textbox( | |
| info="Provide SMILES input (separate multiple SMILES with ':' )", | |
| placeholder="SMILES input", | |
| label="SMILES string(s)", | |
| ) | |
| gr.Examples( | |
| examples=[ | |
| "Cn4c(CCC(=O)Nc3ccc2ccn(CC[C@H](CO)n1cnc(C(N)=O)c1)c2c3)nc5ccccc45", | |
| "OCCCCCn1cnc2C(O)CN=CNc12", | |
| "Nc4nc(c1ccco1)c3ncn(C(=O)NCCc2ccccc2)c3n4" | |
| ], | |
| inputs=ligand_smile, | |
| label="Example SMILES" | |
| ) | |
| btn = gr.Button("Run") | |
| gr.Markdown("### Output") | |
| out = gr.Dataframe( | |
| headers=["target", "smiles", "interaction_probability", "interaction_class"], | |
| datatype=["str", "str", "number", "number"], | |
| label="Prediction Results" | |
| ) | |
| btn.click(fn=run_inference, inputs=[protein_id, ligand_smile], outputs=out) | |
| gr.Markdown(""" | |
| PLA-Net model for predicting interactions | |
| between small organic molecules and one of the 102 target proteins in the AD dataset. Graph representations | |
| of the molecule and a given target protein are generated from SMILES and FASTA sequences and are used as | |
| input to the Ligand Module (LM) and Protein Module (PM), respectively. Each module comprises a deep GCN | |
| followed by an average pooling layer, which extracts relevant features of their corresponding input graph. Both | |
| representations are finally concatenated and combined through a fully connected layer to predict the target– | |
| ligand interaction probability. | |
| """) | |
| gr.Markdown(""" | |
| Ruiz Puentes, P., Rueda-Gensini, L., Valderrama, N. et al. | |
| Predicting target–ligand interactions with graph convolutional networks | |
| for interpretable pharmaceutical discovery. Sci Rep 12, 8434 (2022). | |
| [https://doi.org/10.1038/s41598-022-12180-x](https://doi.org/10.1038/s41598-022-12180-x) | |
| """) | |
| return inference | |
| if __name__ == "__main__": | |
| interface = create_interface() | |
| interface.launch() |