|
import os |
|
import joblib |
|
import numpy as np |
|
import onnxruntime as rt |
|
import sys |
|
import requests |
|
from io import BytesIO |
|
import urllib.request |
|
from morgan_desc import * |
|
from physchem_desc import * |
|
import tempfile |
|
|
|
CHUNKSIZE = 1024 |
|
|
|
|
|
|
|
|
|
class FragmentEmbedder(object): |
|
def __init__(self): |
|
|
|
url = 'https://huggingface.co/ligdis/fpred/resolve/main/morgan_descriptor.joblib' |
|
with urllib.request.urlopen(url) as response: |
|
self.morgan_desc = joblib.load(BytesIO(response.read())) |
|
|
|
url = 'https://huggingface.co/ligdis/fpred/resolve/main/physchem_descriptor.joblib' |
|
with urllib.request.urlopen(url) as response: |
|
self.physchem_desc = joblib.load(BytesIO(response.read())) |
|
|
|
def _chunker(self, l, n): |
|
for i in range(0, len(l), n): |
|
yield l[i : i + n] |
|
|
|
def encoder_inference(self, X): |
|
|
|
url = 'https://huggingface.co/ligdis/fpred/resolve/main/encoder_model.onnx' |
|
response = requests.get(url) |
|
|
|
if response.status_code == 200: |
|
with tempfile.NamedTemporaryFile(delete=False, suffix='.onnx') as temp_file: |
|
temp_file.write(response.content) |
|
temp_file_path = temp_file.name |
|
sess = rt.InferenceSession(temp_file_path) |
|
|
|
|
|
else: |
|
st.write(f"Failed to fetch model from {url}. Status code: {response.status_code}") |
|
|
|
input_name = sess.get_inputs()[0].name |
|
output_name = sess.get_outputs()[0].name |
|
output_data = sess.run( |
|
[output_name], {input_name: np.array(X, dtype=np.float32)} |
|
) |
|
Y = np.array(output_data[0]) |
|
return Y |
|
|
|
def transform(self, smiles): |
|
X = None |
|
for smiles_chunk in self._chunker(smiles, CHUNKSIZE): |
|
X_0 = self.morgan_desc.transform(smiles_chunk) |
|
X_1 = self.physchem_desc.transform(smiles_chunk) |
|
X_i = np.hstack([X_0, X_1]) |
|
X_o = self.encoder_inference(X_i) |
|
if X is None: |
|
X = X_o |
|
else: |
|
X = np.vstack([X, X_o]) |
|
return X |
|
|