import os import joblib import numpy as np import onnxruntime as rt import sys import requests from io import BytesIO import urllib.request from morgan_desc import * from physchem_desc import * import tempfile CHUNKSIZE = 1024 # root = os.path.abspath(os.path.dirname(__file__)) # sys.path.append(root) class FragmentEmbedder(object): def __init__(self): url = 'https://huggingface.co/ligdis/fpred/resolve/main/morgan_descriptor.joblib' # The URL of the file you want to load with urllib.request.urlopen(url) as response: # Download the file self.morgan_desc = joblib.load(BytesIO(response.read())) url = 'https://huggingface.co/ligdis/fpred/resolve/main/physchem_descriptor.joblib' # The URL of the file you want to load with urllib.request.urlopen(url) as response: # Download the file self.physchem_desc = joblib.load(BytesIO(response.read())) def _chunker(self, l, n): for i in range(0, len(l), n): yield l[i : i + n] def encoder_inference(self, X): # sess = rt.InferenceSession(os.path.join(self.models_dir, "encoder_model.onnx")) url = 'https://huggingface.co/ligdis/fpred/resolve/main/encoder_model.onnx' response = requests.get(url) if response.status_code == 200: # Ensure the request was successful with tempfile.NamedTemporaryFile(delete=False, suffix='.onnx') as temp_file: # Create a temporary file to save the model temp_file.write(response.content) temp_file_path = temp_file.name sess = rt.InferenceSession(temp_file_path) # Load the model using InferenceSession # model_bytes = BytesIO(response.content) # Create a file-like object from the byte data # sess = rt.InferenceSession(model_bytes) else: st.write(f"Failed to fetch model from {url}. Status code: {response.status_code}") input_name = sess.get_inputs()[0].name output_name = sess.get_outputs()[0].name output_data = sess.run( [output_name], {input_name: np.array(X, dtype=np.float32)} ) Y = np.array(output_data[0]) return Y def transform(self, smiles): X = None for smiles_chunk in self._chunker(smiles, CHUNKSIZE): X_0 = self.morgan_desc.transform(smiles_chunk) X_1 = self.physchem_desc.transform(smiles_chunk) X_i = np.hstack([X_0, X_1]) X_o = self.encoder_inference(X_i) if X is None: X = X_o else: X = np.vstack([X, X_o]) return X