File size: 1,339 Bytes
5e01175 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
import os
import pkg_resources
import pickle
from typing import Dict
from config import config
import h5py
import numpy as np
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
from joblib import Memory
home_dir = os.path.expanduser('~')
cachedir = os.path.join(home_dir, '.cache', 'protac_degradation_predictor')
memory = Memory(cachedir, verbose=0)
@memory.cache
def load_protein2embedding() -> Dict[str, np.ndarray]:
embeddings_path = pkg_resources.resource_stream(__name__, 'data/uniprot2embedding.h5')
protein2embedding = {}
with h5py.File(embeddings_path, "r") as file:
for sequence_id in file.keys():
embedding = file[sequence_id][:]
protein2embedding[sequence_id] = np.array(embedding)
return protein2embedding
@memory.cache
def load_cell2embedding() -> Dict[str, np.ndarray]:
embeddings_path = pkg_resources.resource_stream(__name__, 'data/cell2embedding.pkl')
with open(embeddings_path, 'rb') as f:
cell2embedding = pickle.load(f)
return cell2embedding
def get_fingerprint(smiles: str) -> np.ndarray:
morgan_fpgen = AllChem.GetMorganGenerator(
radius=config.morgan_radius,
fpSize=config.fingerprint_size,
includeChirality=True,
)
return morgan_fpgen.GetFingerprint(Chem.MolFromSmiles(smiles)) |