5 / fragment_embedder.py
ligdis's picture
Upload 8 files
0676715 verified
import os
import joblib
import numpy as np
import onnxruntime as rt
import sys
import requests
from io import BytesIO
import urllib.request
from morgan_desc import *
from physchem_desc import *
import tempfile
CHUNKSIZE = 1024
# root = os.path.abspath(os.path.dirname(__file__))
# sys.path.append(root)
class FragmentEmbedder(object):
def __init__(self):
url = 'https://huggingface.co/ligdis/fpred/resolve/main/morgan_descriptor.joblib' # The URL of the file you want to load
with urllib.request.urlopen(url) as response: # Download the file
self.morgan_desc = joblib.load(BytesIO(response.read()))
url = 'https://huggingface.co/ligdis/fpred/resolve/main/physchem_descriptor.joblib' # The URL of the file you want to load
with urllib.request.urlopen(url) as response: # Download the file
self.physchem_desc = joblib.load(BytesIO(response.read()))
def _chunker(self, l, n):
for i in range(0, len(l), n):
yield l[i : i + n]
def encoder_inference(self, X):
# sess = rt.InferenceSession(os.path.join(self.models_dir, "encoder_model.onnx"))
url = 'https://huggingface.co/ligdis/fpred/resolve/main/encoder_model.onnx'
response = requests.get(url)
if response.status_code == 200: # Ensure the request was successful
with tempfile.NamedTemporaryFile(delete=False, suffix='.onnx') as temp_file: # Create a temporary file to save the model
temp_file.write(response.content)
temp_file_path = temp_file.name
sess = rt.InferenceSession(temp_file_path) # Load the model using InferenceSession
# model_bytes = BytesIO(response.content) # Create a file-like object from the byte data
# sess = rt.InferenceSession(model_bytes)
else:
st.write(f"Failed to fetch model from {url}. Status code: {response.status_code}")
input_name = sess.get_inputs()[0].name
output_name = sess.get_outputs()[0].name
output_data = sess.run(
[output_name], {input_name: np.array(X, dtype=np.float32)}
)
Y = np.array(output_data[0])
return Y
def transform(self, smiles):
X = None
for smiles_chunk in self._chunker(smiles, CHUNKSIZE):
X_0 = self.morgan_desc.transform(smiles_chunk)
X_1 = self.physchem_desc.transform(smiles_chunk)
X_i = np.hstack([X_0, X_1])
X_o = self.encoder_inference(X_i)
if X is None:
X = X_o
else:
X = np.vstack([X, X_o])
return X