|
import os |
|
import pkg_resources |
|
import pickle |
|
from typing import Dict |
|
|
|
from config import config |
|
|
|
import h5py |
|
import numpy as np |
|
import pandas as pd |
|
from rdkit import Chem |
|
from rdkit.Chem import AllChem |
|
from joblib import Memory |
|
|
|
|
|
home_dir = os.path.expanduser('~') |
|
cachedir = os.path.join(home_dir, '.cache', 'protac_degradation_predictor') |
|
memory = Memory(cachedir, verbose=0) |
|
|
|
|
|
@memory.cache |
|
def load_protein2embedding() -> Dict[str, np.ndarray]: |
|
embeddings_path = pkg_resources.resource_stream(__name__, 'data/uniprot2embedding.h5') |
|
protein2embedding = {} |
|
with h5py.File(embeddings_path, "r") as file: |
|
for sequence_id in file.keys(): |
|
embedding = file[sequence_id][:] |
|
protein2embedding[sequence_id] = np.array(embedding) |
|
return protein2embedding |
|
|
|
|
|
@memory.cache |
|
def load_cell2embedding() -> Dict[str, np.ndarray]: |
|
embeddings_path = pkg_resources.resource_stream(__name__, 'data/cell2embedding.pkl') |
|
with open(embeddings_path, 'rb') as f: |
|
cell2embedding = pickle.load(f) |
|
return cell2embedding |
|
|
|
|
|
def get_fingerprint(smiles: str) -> np.ndarray: |
|
morgan_fpgen = AllChem.GetMorganGenerator( |
|
radius=config.morgan_radius, |
|
fpSize=config.fingerprint_size, |
|
includeChirality=True, |
|
) |
|
return morgan_fpgen.GetFingerprint(Chem.MolFromSmiles(smiles)) |