Drug_Classifier / app.py
bcadkins01's picture
Create app.py
fef0be9 verified
raw
history blame
3.01 kB
import gradio as gr
from rdkit import Chem
from rdkit.Chem import Draw, Descriptors, AllChem
from rdkit.DataStructs.cDataStructs import ConvertToNumpyArray
import numpy as np
import xgboost as xgb
from PIL import Image
# Load the XGBoost model
model = xgb.XGBClassifier()
model.load_model("xg3.json")
# Function to convert SMILES string to descriptors and Lipinski features
def predict_smiles(smiles):
mol = Chem.MolFromSmiles(smiles)
if mol is None: # Check if the conversion was unsuccessful
return None, "Invalid SMILES string. Please enter a valid SMILES."
fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius=2, nBits=2048)
fp_array = np.zeros((1,), dtype=int)
ConvertToNumpyArray(fp, fp_array)
lipinski_features = np.array([[Descriptors.MolWt(mol), Descriptors.MolLogP(mol), Descriptors.NumHAcceptors(mol), Descriptors.NumHDonors(mol)]])
# Combine fingerprint and Lipinski features
features = np.concatenate([fp_array, lipinski_features.flatten()])
return features, lipinski_features
# Updated function to predict from SMILES and visualize Lipinski's features
def predict_and_visualize(smiles):
features, lipinski_features = predict_smiles(smiles)
if features is None:
# Return a clear message for invalid SMILES, a placeholder for the features, and None for the image
error_message = "Invalid SMILES string. Please enter a valid SMILES."
placeholder_features = "<b><u>RDKit estimates</u></b><br>Not applicable due to invalid SMILES input."
return error_message, placeholder_features, None
# Process valid SMILES
molecular_weight, alogp, hba, hbd = lipinski_features.flatten()
molecular_weight = round(molecular_weight, 1)
alogp = round(alogp, 1)
hba = int(hba)
hbd = int(hbd)
lipinski_features = np.array([[molecular_weight, alogp, hba, hbd]])
prediction = model.predict(features.reshape(1, -1))
result = "drug-like" if prediction == 1 else "not drug-like"
mol = Chem.MolFromSmiles(smiles)
img = Draw.MolToImage(mol)
img = img.resize((800, 800), Image.Resampling.LANCZOS)
features_names = ["Molecular Weight", "AlogP", "HBA", "HBD"]
lipinski_str = "<b><u>RDKit estimates</u></b><br>" + "<br>".join([f"{name}: {value}" for name, value in zip(features_names, [molecular_weight, alogp, hba, hbd])])
return result, lipinski_str, img
# Gradio interface
iface = gr.Interface(fn=predict_and_visualize,
inputs=gr.Textbox(lines=2, placeholder="Enter SMILES string here..."),
outputs=[gr.Text(label="Prediction"),
gr.HTML(label="Lipinski's Features"),
gr.Image(label="Molecule Visualization")],
title="XGBoost Drug-like Classifier",
description="This application predicts whether a molecule is drug-like based on its SMILES representation.")
if __name__ == "__main__":
iface.launch()