Spaces:
Running
Running
File size: 4,468 Bytes
8e66b23 ec53722 8e66b23 ec53722 8e66b23 ec53722 8e66b23 ec53722 8e66b23 ec53722 8e66b23 ec53722 8e66b23 ec53722 8e66b23 ec53722 8e66b23 ec53722 8e66b23 ec53722 8e66b23 ec53722 8e66b23 ec53722 8e66b23 ec53722 8e66b23 ec53722 8e66b23 ec53722 8e66b23 ec53722 8e66b23 ec53722 8e66b23 ec53722 8e66b23 ec53722 8e66b23 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
import logging
import pathlib
from typing import List, Optional
from rdkit import Chem
from tqdm import tqdm
import gradio as gr
from submission import submission
import pandas as pd
from configuration import GENE_EXPRESSION_METADATA
logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())
site_mapper = {
"central_nervous_system": "CNS",
"haematopoietic_and_lymphoid_tissue": "Haema_lymph",
"upper_aerodigestive_tract": "digestive",
"autonomic_ganglia": "ganglia",
}
def run_inference(
smiles: Optional[str],
smiles_path: Optional[str],
omic_path: Optional[str],
confidence: bool,
):
# Read SMILES
if not isinstance(smiles_path, (str, type(None))):
raise TypeError(
f"SMILES file pass has to be None or str, not {type(smiles_path)}"
)
if smiles is None and smiles_path is None:
raise TypeError(f"Pass either single SMILES or a file")
elif smiles is not None:
smiles = [smiles]
elif smiles_path is not None:
smiles_data = pd.read_csv(smiles_path, sep="\t", header=False)
smiles = smiles_data[0]
for smi in smiles:
if Chem.MolFromSmiles(smi) is None:
raise ValueError(f"Found invalid SMILES {smi}")
# Read omics and otherwise load baseline
if not isinstance(omic_path, (str, type(None))):
raise TypeError(f"Omics file pass has to be None or str, not {type(omic_path)}")
# ToDo: Add progress bar for multiple smiles
results = {}
for smi in tqdm(smiles, total=len(smiles)):
result = submission(
drug={"smiles": smi},
workspace_id="emulated_workspace_id",
task_id="emulated_task_id",
estimate_confidence=confidence,
omics_file=omic_path,
)
# For the moment no attention analysis
result.pop("gene_attention")
result.pop("smiles_attention", None)
result.pop("IC50")
results[f"IC50_{smi}"] = result["log_micromolar_IC50"].squeeze().round(3)
results[f"IC50_{smi}"].shape
if confidence:
results[f"aleatoric_confidence_{smi}"] = (
result["aleatoric_confidence"].squeeze().round(3)
)
results[f"epistemic_confidence_{smi}"] = (
result["aleatoric_confidence"].squeeze().round(3)
)
print(results)
predicted_df = pd.DataFrame(results)
# Prepare DF to visualize
if omic_path is None:
df = GENE_EXPRESSION_METADATA
print(df.columns)
df.drop(
[
"histology",
"cell_line_name",
"IC50 (min/max scaled)",
"IC50 (log(μmol))",
],
axis=1,
inplace=True,
)
df["site"] = df["site"].apply(lambda x: site_mapper.get(x, x))
df["cell_line"] = df["cell_line"].apply(lambda x: x.split("_")[0])
else:
pass
result_df = pd.concat(
[df["cell_line"], predicted_df, df.drop(["cell_line"], axis=1)], axis=1
)
return result_df, result_df
if __name__ == "__main__":
# Load metadata
metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards")
examples = pd.read_csv(
metadata_root.joinpath("examples.csv"), header=None, sep="|"
).fillna("")
with open(metadata_root.joinpath("article.md"), "r") as f:
article = f.read()
with open(metadata_root.joinpath("description.md"), "r") as f:
description = f.read()
demo = gr.Interface(
fn=run_inference,
title="PaccMann",
inputs=[
gr.Textbox(
label="SMILES",
placeholder="COc1cc(O)c2c(c1)C=CCC(O)C(O)C(=O)C=CCC(C)OC2=O",
lines=1,
),
gr.File(
file_types=[".smi", ".tsv"],
label="List of SMILES (tab-separated file with SMILES in first column)",
),
gr.File(
file_types=[".csv"],
label="Transcriptomics data with cell lines in rows and genes in columns",
),
gr.Radio(choices=[True, False], label="Estimate confidence", value=False),
],
outputs=[gr.DataFrame(label="Output"), gr.File()],
article=article,
description=description,
# examples=examples.values.tolist(),
)
demo.launch(debug=True, show_error=True)
|