5 / fragment_embedder.py
ligdis's picture
Upload 8 files
0676715 verified
raw
history blame
2.67 kB
import os
import joblib
import numpy as np
import onnxruntime as rt
import sys
import requests
from io import BytesIO
import urllib.request
from morgan_desc import *
from physchem_desc import *
import tempfile
CHUNKSIZE = 1024
# root = os.path.abspath(os.path.dirname(__file__))
# sys.path.append(root)
class FragmentEmbedder(object):
def __init__(self):
url = 'https://huggingface.co/ligdis/fpred/resolve/main/morgan_descriptor.joblib' # The URL of the file you want to load
with urllib.request.urlopen(url) as response: # Download the file
self.morgan_desc = joblib.load(BytesIO(response.read()))
url = 'https://huggingface.co/ligdis/fpred/resolve/main/physchem_descriptor.joblib' # The URL of the file you want to load
with urllib.request.urlopen(url) as response: # Download the file
self.physchem_desc = joblib.load(BytesIO(response.read()))
def _chunker(self, l, n):
for i in range(0, len(l), n):
yield l[i : i + n]
def encoder_inference(self, X):
# sess = rt.InferenceSession(os.path.join(self.models_dir, "encoder_model.onnx"))
url = 'https://huggingface.co/ligdis/fpred/resolve/main/encoder_model.onnx'
response = requests.get(url)
if response.status_code == 200: # Ensure the request was successful
with tempfile.NamedTemporaryFile(delete=False, suffix='.onnx') as temp_file: # Create a temporary file to save the model
temp_file.write(response.content)
temp_file_path = temp_file.name
sess = rt.InferenceSession(temp_file_path) # Load the model using InferenceSession
# model_bytes = BytesIO(response.content) # Create a file-like object from the byte data
# sess = rt.InferenceSession(model_bytes)
else:
st.write(f"Failed to fetch model from {url}. Status code: {response.status_code}")
input_name = sess.get_inputs()[0].name
output_name = sess.get_outputs()[0].name
output_data = sess.run(
[output_name], {input_name: np.array(X, dtype=np.float32)}
)
Y = np.array(output_data[0])
return Y
def transform(self, smiles):
X = None
for smiles_chunk in self._chunker(smiles, CHUNKSIZE):
X_0 = self.morgan_desc.transform(smiles_chunk)
X_1 = self.physchem_desc.transform(smiles_chunk)
X_i = np.hstack([X_0, X_1])
X_o = self.encoder_inference(X_i)
if X is None:
X = X_o
else:
X = np.vstack([X, X_o])
return X