paccmann

Running

App Files Files Community

paccmann / app.py

jannisborn

update

8aab0ae unverified about 2 years ago

raw

history blame

4.72 kB

	import logging
	import os
	import pathlib
	import tempfile
	from typing import List, Optional

	import gradio as gr
	import pandas as pd
	from rdkit import Chem
	from tqdm import tqdm

	from configuration import GENE_EXPRESSION_METADATA
	from submission import submission

	logger = logging.getLogger(__name__)
	logger.addHandler(logging.NullHandler())

	site_mapper = {
	"central_nervous_system": "CNS",
	"haematopoietic_and_lymphoid_tissue": "Haema_lymph",
	"upper_aerodigestive_tract": "digestive",
	"autonomic_ganglia": "ganglia",
	}


	def run_inference(
	smiles: Optional[str],
	smiles_path: Optional[str],
	omic_path: Optional[str],
	confidence: bool,
	):

	# Read SMILES
	if smiles is None and smiles_path is None:
	raise TypeError("Pass either single SMILES or a file")
	elif smiles is not None:
	smiles = [smiles]
	elif smiles_path is not None:
	smiles_data = pd.read_csv(smiles_path.name, sep="\t", header=None)
	smiles = smiles_data[0]
	for smi in smiles:
	if Chem.MolFromSmiles(smi) is None:
	raise ValueError(f"Found invalid SMILES {smi}")

	# Read omics and otherwise load baseline
	if not isinstance(omic_path, (str, type(None))):
	raise TypeError(f"Omics file pass has to be None or str, not {type(omic_path)}")

	# ToDo: Add progress bar for multiple smiles
	results = {}
	for smi in tqdm(smiles, total=len(smiles)):
	result = submission(
	drug={"smiles": smi},
	workspace_id="emulated_workspace_id",
	task_id="emulated_task_id",
	estimate_confidence=confidence,
	omics_file=omic_path,
	)
	# For the moment no attention analysis
	result.pop("gene_attention")
	result.pop("smiles_attention", None)
	result.pop("IC50")

	results[f"IC50_{smi}"] = result["log_micromolar_IC50"].squeeze().round(3)
	results[f"IC50_{smi}"].shape
	if confidence:
	results[f"aleatoric_confidence_{smi}"] = (
	result["aleatoric_confidence"].squeeze().round(3)
	)
	results[f"epistemic_confidence_{smi}"] = (
	result["aleatoric_confidence"].squeeze().round(3)
	)
	predicted_df = pd.DataFrame(results)

	# Prepare DF to visualize
	if omic_path is None:
	df = GENE_EXPRESSION_METADATA.copy()
	df.drop(
	[
	"histology",
	"cell_line_name",
	"IC50 (min/max scaled)",
	"IC50 (log(μmol))",
	],
	axis=1,
	inplace=True,
	)
	df["site"] = df["site"].apply(lambda x: site_mapper.get(x, x))
	df["cell_line"] = df["cell_line"].apply(lambda x: x.split("_")[0])
	if confidence:
	df.drop(
	["aleatoric_confidence", "epistemic_confidence"], axis=1, inplace=True
	)
	else:
	pass

	result_df = pd.concat(
	[df["cell_line"], predicted_df, df.drop(["cell_line"], axis=1)], axis=1
	)

	# Save to temporary dir
	temp_path = os.path.join(tempfile.gettempdir(), "paccmann_result.csv")
	result_df.to_csv(temp_path)

	return temp_path, result_df.head(25)


	if __name__ == "__main__":

	# Load metadata
	metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards")

	examples = [
	["COc1cc(O)c2c(c1)C=CCC(O)C(O)C(=O)C=CCC(C)OC2=O", None, None, False],
	["COC1=C(C=C2C(=C1)N=CN=C2NC3=CC(=C(C=C3)F)Cl)OCCCN4CCOCC4", None, None, True],
	[None, metadata_root.joinpath("molecules.smi"), None, False],
	]
	with open(metadata_root.joinpath("article.md"), "r") as f:
	article = f.read()
	with open(metadata_root.joinpath("description.md"), "r") as f:
	description = f.read()

	demo = gr.Interface(
	fn=run_inference,
	title="PaccMann",
	inputs=[
	gr.Textbox(
	label="SMILES",
	placeholder="COc1cc(O)c2c(c1)C=CCC(O)C(O)C(=O)C=CCC(C)OC2=O",
	lines=1,
	),
	gr.File(
	file_types=[".smi", ".tsv"],
	label="Multiple SMILES",
	),
	gr.File(
	file_types=[".csv"],
	label="Transcriptomics data file",
	),
	gr.Radio(choices=[True, False], label="Estimate confidence", value=False),
	],
	outputs=[
	gr.File(label="Download full results"),
	gr.DataFrame(label="Preview of results for 25 cell lines"),
	],
	article=article,
	description=description,
	examples=examples,
	)
	demo.launch(debug=True, show_error=True)