5
File size: 1,073 Bytes
0676715
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
from rdkit import Chem, DataStructs
import numpy as np
from rdkit.Chem import rdFingerprintGenerator
from sklearn.feature_selection import VarianceThreshold

class MorganFingerprint(object):
    def __init__(self):
        self.variance_filter = VarianceThreshold(threshold=0)

    def get_ecfp_fingerprint(self, smiles_list):
        R = []
        for smiles in smiles_list:
            mol = Chem.MolFromSmiles(smiles)
            fingerprints_vect = rdFingerprintGenerator.GetCountFPs(
                [mol], fpType=rdFingerprintGenerator.MorganFP
            )[0]
            fingerprint = np.zeros((0,), np.float32)  # Generate target pointer to fill
            DataStructs.ConvertToNumpyArray(fingerprints_vect, fingerprint)
            R += [fingerprint]
        X = np.array(R, dtype=int)
        return X
    
    def fit(self, smiles):
        X = self.get_ecfp_fingerprint(smiles)
        self.variance_filter.fit(X)

    def transform(self, smiles):
        X = self.get_ecfp_fingerprint(smiles)
        X = self.variance_filter.transform(X)
        return X