bcadkins01 commited on
Commit
fef0be9
·
verified ·
1 Parent(s): 5c63f46

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -0
app.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from rdkit import Chem
3
+ from rdkit.Chem import Draw, Descriptors, AllChem
4
+ from rdkit.DataStructs.cDataStructs import ConvertToNumpyArray
5
+ import numpy as np
6
+ import xgboost as xgb
7
+ from PIL import Image
8
+
9
+
10
+ # Load the XGBoost model
11
+ model = xgb.XGBClassifier()
12
+ model.load_model("xg3.json")
13
+
14
+ # Function to convert SMILES string to descriptors and Lipinski features
15
+ def predict_smiles(smiles):
16
+ mol = Chem.MolFromSmiles(smiles)
17
+ if mol is None: # Check if the conversion was unsuccessful
18
+ return None, "Invalid SMILES string. Please enter a valid SMILES."
19
+
20
+ fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius=2, nBits=2048)
21
+ fp_array = np.zeros((1,), dtype=int)
22
+ ConvertToNumpyArray(fp, fp_array)
23
+ lipinski_features = np.array([[Descriptors.MolWt(mol), Descriptors.MolLogP(mol), Descriptors.NumHAcceptors(mol), Descriptors.NumHDonors(mol)]])
24
+
25
+ # Combine fingerprint and Lipinski features
26
+ features = np.concatenate([fp_array, lipinski_features.flatten()])
27
+ return features, lipinski_features
28
+
29
+ # Updated function to predict from SMILES and visualize Lipinski's features
30
+ def predict_and_visualize(smiles):
31
+ features, lipinski_features = predict_smiles(smiles)
32
+ if features is None:
33
+ # Return a clear message for invalid SMILES, a placeholder for the features, and None for the image
34
+ error_message = "Invalid SMILES string. Please enter a valid SMILES."
35
+ placeholder_features = "<b><u>RDKit estimates</u></b><br>Not applicable due to invalid SMILES input."
36
+ return error_message, placeholder_features, None
37
+
38
+ # Process valid SMILES
39
+ molecular_weight, alogp, hba, hbd = lipinski_features.flatten()
40
+ molecular_weight = round(molecular_weight, 1)
41
+ alogp = round(alogp, 1)
42
+ hba = int(hba)
43
+ hbd = int(hbd)
44
+
45
+ lipinski_features = np.array([[molecular_weight, alogp, hba, hbd]])
46
+
47
+ prediction = model.predict(features.reshape(1, -1))
48
+ result = "drug-like" if prediction == 1 else "not drug-like"
49
+
50
+ mol = Chem.MolFromSmiles(smiles)
51
+ img = Draw.MolToImage(mol)
52
+ img = img.resize((800, 800), Image.Resampling.LANCZOS)
53
+
54
+ features_names = ["Molecular Weight", "AlogP", "HBA", "HBD"]
55
+ lipinski_str = "<b><u>RDKit estimates</u></b><br>" + "<br>".join([f"{name}: {value}" for name, value in zip(features_names, [molecular_weight, alogp, hba, hbd])])
56
+
57
+ return result, lipinski_str, img
58
+
59
+ # Gradio interface
60
+ iface = gr.Interface(fn=predict_and_visualize,
61
+ inputs=gr.Textbox(lines=2, placeholder="Enter SMILES string here..."),
62
+ outputs=[gr.Text(label="Prediction"),
63
+ gr.HTML(label="Lipinski's Features"),
64
+ gr.Image(label="Molecule Visualization")],
65
+ title="XGBoost Drug-like Classifier",
66
+ description="This application predicts whether a molecule is drug-like based on its SMILES representation.")
67
+
68
+ if __name__ == "__main__":
69
+ iface.launch()