5 / morgan_desc.py
ligdis's picture
Upload 8 files
0676715 verified
from rdkit import Chem, DataStructs
import numpy as np
from rdkit.Chem import rdFingerprintGenerator
from sklearn.feature_selection import VarianceThreshold
class MorganFingerprint(object):
def __init__(self):
self.variance_filter = VarianceThreshold(threshold=0)
def get_ecfp_fingerprint(self, smiles_list):
R = []
for smiles in smiles_list:
mol = Chem.MolFromSmiles(smiles)
fingerprints_vect = rdFingerprintGenerator.GetCountFPs(
[mol], fpType=rdFingerprintGenerator.MorganFP
)[0]
fingerprint = np.zeros((0,), np.float32) # Generate target pointer to fill
DataStructs.ConvertToNumpyArray(fingerprints_vect, fingerprint)
R += [fingerprint]
X = np.array(R, dtype=int)
return X
def fit(self, smiles):
X = self.get_ecfp_fingerprint(smiles)
self.variance_filter.fit(X)
def transform(self, smiles):
X = self.get_ecfp_fingerprint(smiles)
X = self.variance_filter.transform(X)
return X