Spaces:

emmas96
/

hyper-dti

Running

App Files Files Community

knfn081 commited on Dec 7, 2023

Commit

6a624f6

1 Parent(s): e3bf276

develop working retrieval app for pre-defined target

Browse files

Files changed (3) hide show

app.py +108 -317
figures/multi_molecules.png +0 -0
requirements.txt +0 -1

app.py CHANGED Viewed

@@ -1,30 +1,37 @@
 import os
 import sys
-#import torch
-import numpy as np
 import pandas as pd
 import streamlit as st
-#import esm
 from rdkit import Chem
 from rdkit.Chem import Draw
 sys.path.insert(0, os.path.abspath("src/"))
-st.set_page_config(layout="wide")
-basepath = os.path.dirname(__file__)
-datapath = os.path.join(basepath, "data")
-st.title('HyperDTI: Task-conditioned modeling of drug-target interactions.\n')
 st.markdown('')
 st.markdown(
     """
     🧬 Github: [ml-jku/hyper-dti](https://https://github.com/ml-jku/hyper-dti)    📝 NeurIPS 2022 AI4Science workshop paper: [OpenReview](https://openreview.net/forum?id=dIX34JWnIAL)\n
     """
 )
-st.error('WARNING! This app is currently under development and should not be used!')
 def about_page():
     st.markdown(
@@ -41,355 +48,139 @@ def about_page():
         In this work, we propose the HyperPCM model, a task-conditioned HyperNetwork approach for the problem of
         predicting drug-target interactions in drug discovery. Our model learns to generate a QSAR model specialized on
         a given protein target. We demonstrate state-of-the-art performance over previous methods on multiple
-        well-known benchmarks, particularly in zero-shot settings for unseen protein targets.
         """
     )
     st.image('figures/hyper-dti.png', caption='Overview of HyperPCM architecture.')
-'''
-def predict_dti():
-    st.markdown('## Predict drug-target interaction')
-    st.write('In the future this page can be used to predict interactions betweek a query drug compound and a query protein target by the HyperPCM mdoel.')
-    col1, col2 = st.columns(2)
-    with col1:
-        st.markdown('### Drug')
-        mol_col1, mol_col2 = st.columns(2)
-        with mol_col1:
-            smiles = st.text_input('Enter query SMILES', value='CC(=O)OC1=CC=CC=C1C(=O)O', placeholder='CC(=O)OC1=CC=CC=C1C(=O)O')
-            if smiles:
-                mol = Chem.MolFromSmiles(smiles)
-                mol_img = Chem.Draw.MolToImage(mol)
-                st.image(mol_img) #, width = 140)
-        with mol_col2:
-            selected_encoder = st.selectbox(
-                'Select encoder',('None', 'CDDD', 'MolBERT', 'Dummy')
-            )
-            if smiles:
-                if selected_encoder == 'CDDD':
-                    from cddd.inference import InferenceModel
-                    CDDD_MODEL_DIR = 'src/encoders/cddd'
-                    cddd_model = InferenceModel(CDDD_MODEL_DIR)
-                    drug_embedding = cddd_model.seq_to_emb([smiles])
-                    #from huggingface_hub import hf_hub_download
-                    #precomputed_embs = f'{selected_encoder}_encoding.csv'
-                    #REPO_ID = "emmas96/Lenselink"
-                    #embs_path = hf_hub_download(REPO_ID, precomputed_embs)
-                    #embs = pd.read_csv(embs_path)
-                    #embedding = embs[smiles]
-                elif selected_encoder == 'MolBERT':
-                    from molbert.utils.featurizer.molbert_featurizer import MolBertFeaturizer
-                    from huggingface_hub import hf_hub_download
-                    CDDD_MODEL_DIR = 'encoders/molbert/last.ckpt'
-                    REPO_ID = "emmas96/hyperpcm"
-                    checkpoint_path = hf_hub_download(REPO_ID, MOLBERT_MODEL_DIR)
-                    molbert_model = MolBertFeaturizer(checkpoint_path, max_seq_len=500, embedding_type='average-1-cat-pooled')
-                    drug_embedding = molbert_model.transform([smiles])
-                elif selected_encoder == 'Dummy':
-                    drug_embedding = [0,1,2,3,4,5]
-                else:
-                    drug_embedding = None
-                    st.image('figures/molecule_encoder.png')
-                    st.warning('Choose encoder above...')
-                if drug_embedding is not None:
-                    st.image('figures/molecule_encoder_done.png')
-                    st.success('Encoding complete.')
-    with col2:
-        st.markdown('### Target')
-        prot_col1, prot_col2 = st.columns(2)
-        with prot_col1:
-            sequence = st.text_input('Enter query amino-acid sequence', value='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA', placeholder='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA')
-            if sequence == 'HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA':
-                st.image('figures/ex_protein.jpeg')
-            elif sequence:
-                st.error('Visualization comming soon...')
-        with prot_col2:
-            selected_encoder = st.selectbox(
-                'Select encoder for protein target',('None', 'SeqVec', 'UniRep', 'ESM-1b', 'ProtT5')
-            )
-            if sequence:
-                if selected_encoder == 'SeqVec':
-                    with st.spinner('Encoding in progress...'):
-                        from bio_embeddings.embed import SeqVecEmbedder
-                        encoder = SeqVecEmbedder()
-                        embeddings = encoder.embed_batch([sequence])
-                        for emb in embeddings:
-                            prot_embedding = encoder.reduce_per_protein(emb)
-                            break
-                elif selected_encoder == 'UniRep':
-                    with st.spinner('Encoding in progress...'):
-                        from jax_unirep.utils import load_params
-                        params = load_params()
-                        from jax_unirep.featurize import get_reps
-                        embedding, h_final, c_final = get_reps([sequence])
-                        prot_embedding = embedding.mean(axis=0)
-                elif selected_encoder == 'ESM-1b':
-                    with st.spinner('Encoding in progress...'):
-                        from bio_embeddings.embed import ESM1bEmbedder
-                        encoder = ESM1bEmbedder()
-                        embeddings = encoder.embed_batch([sequence])
-                        for emb in embeddings:
-                            prot_embedding = encoder.reduce_per_protein(emb)
-                            break
-                elif selected_encoder == 'ProtT5':
-                    with st.spinner('Encoding in progress...'):
-                        from bio_embeddings.embed import ProtTransT5XLU50Embedder
-                        encoder = ProtTransT5XLU50Embedder()
-                        embeddings = encoder.embed_batch([sequence])
-                        for emb in embeddings:
-                            prot_embedding = encoder.reduce_per_protein(emb)
-                            break
-                else:
-                    prot_embedding = None
-                    st.image('figures/protein_encoder.png')
-                    st.warning('Choose encoder above...')
-                if prot_embedding is not None:
-                    st.image('figures/protein_encoder_done.png')
-                    st.success('Encoding complete.')
-    if drug_embedding is None or prot_embedding is None:
-        st.warning('Waiting for both drug and target embeddings to be computed...')
-    else:
-        st.markdown('### Inference')
-        import time
-        progress_text = "HyperPCM predicts the interaction between the query drug compound toward the query protein target. Please wait."
-        my_bar = st.progress(0, text=progress_text)
-        for i in range(100):
-            time.sleep(0.1)
-            my_bar.progress(i + 1, text=progress_text)
-        my_bar.progress(100, text="HyperPCM predicts the interaction between the query drug compound toward the query protein target. Done.")
-        st.markdown('### Interaction')
-        st.write('HyperPCM predicts an activity of xxx pChEMBL.')
-'''
 def retrieval():
     st.markdown('## Retrieve top-k most active drug compounds')
     st.write('In the furute this page will retrieve the top-k drug compounds that are predicted to have the highest activity toward the given protein target from either the Lenselink or Davis datasets.')
-    st.markdown('### Target')
-    st.write(f'The top-{selected_k} most active drug coupounds from {selected_dataset} predicted by HyperPCM are: ')
-    dummy_smiles = ['CC(=O)OC1=CC=CC=C1C(=O)O', 'COc1cc(C=O)ccc1O', 'CC(=O)Nc1ccc(O)cc1', 'CC(=O)Nc1ccc(OS(=O)(=O)O)cc1', 'CC(=O)Nc1ccc(O[C@@H]2O[C@H](C(=O)O)[C@@H](O)[C@H](O)[C@H]2O)cc1']
-    cols = st.columns(5)
-    for j, col in enumerate(cols):
-        with col:
-            for i in range(int(selected_k/5)):
-                mol = Chem.MolFromSmiles(dummy_smiles[j])
-                mol_img = Chem.Draw.MolToImage(mol)
-                st.image(mol_img)
-    '''
     col1, col2, col3, col4 = st.columns(4)
-    with col2:
-        sequence = st.text_input('Enter query amino-acid sequence', value='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA', placeholder='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA')
-        if sequence == 'HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA':
-            st.image('figures/ex_protein.jpeg')
         elif sequence:
             st.error('Visualization coming soon...')
-    with col3:
         selected_encoder = st.selectbox(
-                'Select encoder for protein target',('SeqVec', 'None')
             )
         if sequence:
             if selected_encoder == 'SeqVec':
                 st.image('figures/protein_encoder_done.png')
                 with st.spinner('Encoding in progress...'):
-                    from bio_embeddings.embed import SeqVecEmbedder
-                    encoder = SeqVecEmbedder()
-                    embeddings = encoder.embed_batch([sequence])
-                    for emb in embeddings:
-                        prot_embedding = encoder.reduce_per_protein(emb)
-                        break
                 st.success('Encoding complete.')
             else:
-                prot_embedding = None
                 st.image('figures/protein_encoder.png')
                 st.warning('Choose encoder above...')
-    if prot_embedding is not None:
         st.markdown('### Inference')
-        import time
-        progress_text = "HyperPCM predicts the QSAR model for the query protein target. Please wait."
         my_bar = st.progress(0, text=progress_text)
-        for i in range(100):
-            time.sleep(0.1)
-            my_bar.progress(i + 1, text=progress_text)
-        my_bar.progress(100, text="HyperPCM predicts the QSAR model for the query protein target. Done.")
         st.markdown('### Retrieval')
-        col1, col2 = st.columns(2)
-        with col1:
-            selected_dataset = st.selectbox(
-                    'Select dataset from which the drug compounds should be retrieved',('Lenselink', 'Davis')
-                )
-        with col2:
-            selected_k = st.selectbox(
-                    'Select the top-k number of drug compounds to retrieve',(5, 10, 15, 20)
-                )
-        st.write(f'The top-{selected_k} most active drug coupounds from {selected_dataset} predicted by HyperPCM are: ')
-        dummy_smiles = ['CC(=O)OC1=CC=CC=C1C(=O)O', 'COc1cc(C=O)ccc1O', 'CC(=O)Nc1ccc(O)cc1', 'CC(=O)Nc1ccc(OS(=O)(=O)O)cc1', 'CC(=O)Nc1ccc(O[C@@H]2O[C@H](C(=O)O)[C@@H](O)[C@H](O)[C@H]2O)cc1']
         cols = st.columns(5)
         for j, col in enumerate(cols):
             with col:
                 for i in range(int(selected_k/5)):
-                    mol = Chem.MolFromSmiles(dummy_smiles[j])
                     mol_img = Chem.Draw.MolToImage(mol)
-                    st.image(mol_img)
-    '''
-'''
-def display_protein():
-    st.markdown('## Display protein structure')
-    st.write('In the future this page will display the ESM predicted sequence of a protein target.')
-    st.markdown('### Target')
-    sequence = st.text_input('Enter the amino-acid sequence of the query protein target', value='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA', placeholder='HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA')
-    if sequence:
-        st.image('figures/ex_protein.jpeg')
-        model = esm.pretrained.esmfold_v1()
-        model = model.eval().cuda()
-        with torch.no_grad():
-            output = model.infer_pdb(sequence)
-            st.write(output)
-        with open("result.pdb", "w") as f:
-            f.write(output)
-        struct = bsio.load_structure("result.pdb", extra_fields=["b_factor"])
-        print(struct.b_factor.mean())
-    """
-        model, alphabet = esm.pretrained.esm2_t33_650M_UR50D()
-        batch_converter = alphabet.get_batch_converter()
-        batch_labels, batch_strs, batch_tokens = batch_converter([("protein1", sequence),])
-        # Extract per-residue representations (on CPU)
-        with torch.no_grad():
-            results = model(batch_tokens, repr_layers=[12], return_contacts=True)
-        token_representations = results["representations"][12]
-        token_list = token_representations.tolist()[0][0][0]
-        client = Client(url=st.secrets["DB_URL"], user=st.secrets["USER"], password=st.secrets["PASSWD"])
-        result = client.fetch("SELECT seq, distance('topK=500')(representations, " + str(token_list) + ')'+ "as dist FROM default.esm_protein_indexer_768")
-        result_temp_seq = []
-        for i in result:
-            # result_temp_coords = i['seq']
-            result_temp_seq.append(i['seq'])
-        result_temp_seq = list(set(result_temp_seq))
-        if st.button(result_temp_seq[0]):
-            print(result_temp_seq[0])
-        elif st.button(result_temp_seq[1]):
-            print(result_temp_seq[1])
-        elif st.button(result_temp_seq[2]):
-            print(result_temp_seq[2])
-        elif st.button(result_temp_seq[3]):
-            print(result_temp_seq[3])
-        elif st.button(result_temp_seq[4]):
-            print(result_temp_seq[4])
-        start[2] = st.pyplot(visualize_3D_Coordinates(result_temp_coords).figure)
-        headers = {
-            'Content-Type': 'application/x-www-form-urlencoded',
-            }
-        response = requests.post('https://api.esmatlas.com/foldSequence/v1/pdb/', headers=headers, data=sequence)
-        name = sequence[:3] + sequence[-3:]
-        pdb_string = response.content.decode('utf-8')
-        with open('predicted.pdb', 'w') as f:
-            f.write(pdb_string)
-        struct = bsio.load_structure('predicted.pdb', extra_fields=["b_factor"])
-        b_value = round(struct.b_factor.mean(), 4)
-        render_mol(pdb_string)
-        if residues_marker:
-            start[3] = showmol(render_pdb_resn(viewer = render_pdb(id = id_PDB),resn_lst = [residues_marker]))
-        else:
-            start[3] = showmol(render_pdb(id = id_PDB))
-        st.session_state['xq'] = st.session_state.model
-    # example proteins ["HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA"], ["AHKLFIGGLPNYLNDDQVKELLTSFGPLKAFNLVKDSATGLSKGYAFCEYVDINVTDQAIAGLNGMQLGDKKLLVQRASVGAKNA"]
-    """
-def display_context():
-    st.markdown('## Display context')
-    st.write('In the future this page will visualize the context module for a given protein, i.e., show important features and highly ranked / related proteins from the context.')
-'''
-def references():
-    st.markdown(
-        '''
-        ## References
-        Schmidhuber, J., “Learning to control fast-weight memories: An alternative to dynamic recurrent networks.” Neural Computation, 1992.
-        Davis, M. I., et al. "Comprehensive analysis of kinase inhibitor selectivity." Nature Biotechnology 29.11 (2011): 1046-1051.
-        Ha, D., et al. “HyperNetworks”. ICLR, 2017.
-        Lenselink, E. B., et al. "Beyond the hype: deep neural networks outperform established methods using a ChEMBL bioactivity benchmark set." Journal of Cheminformatics 9.1 (2017): 1-14.
-        Alley, E. C., et al. "Unified rational protein engineering with sequence-based deep representation learning." Nature Methods 16.12 (2019): 1315-1322.
-        Chang, O., et al., “Principled weight initialization for hypernetworks.” ICLR, 2019.
-        Heinzinger, M., et al. "Modeling aspects of the language of life through transfer-learning protein sequences." BMC Bioinformatics 20.1 (2019): 1-17.
-        Winter, R., et al. "Learning continuous and data-driven molecular descriptors by translating equivalent chemical representations." Chemical Science 10.6 (2019): 1692-1701.
-        Fabian, B., et al. "Molecular representation learning with language models and domain-relevant auxiliary tasks." Workshop for ML4Molecules (2020).
-        Elnaggar, A., et al. "ProtTrans: Toward understanding the language of life through self-supervised learning." IEEE Transactions on Pattern Analysis and Machine Intelligence 44 (2021): 7112–7127.
-        Rives, A., et al. "Biological structure and function emerge from scaling unsupervised learning to 250 million protein sequences." Proceedings of the National Academy of Sciences 118.15 (2021): e2016239118.
-        Kim, P. T., et al. "Unsupervised Representation Learning for Proteochemometric Modeling." International Journal of Molecular Sciences 22.23 (2021): 12882.
-        Schimunek, J., et al., “Context-enriched molecule representations improve few-shot drug discovery.” ICLR, 2023.
-        '''
-    )
 page_names_to_func = {
-    'About': about_page,
-    #'Predict DTI': predict_dti,
-    'Retrieve Top-k': retrieval,
-    #'Display Protein': display_protein,
-    #'Display Context': display_context,
-    #'References': references
 }
 selected_page = st.sidebar.selectbox('Choose function', page_names_to_func.keys())

+import gc
 import os
 import sys
+import torch
+import pickle
+import numpy as np
 import pandas as pd
 import streamlit as st
+from torch.utils.data import DataLoader
 from rdkit import Chem
 from rdkit.Chem import Draw
 sys.path.insert(0, os.path.abspath("src/"))
+from src.dataset import DrugRetrieval, collate_target
+from hyper_dti.models.hyper_pcm import HyperPCM
+base_path = os.path.dirname(__file__)
+data_path = os.path.join(base_path, 'data')
+checkpoint_path = os.path.join(base_path, 'checkpoints/lpo/cv2_test_fold6_1402/model_updated.t7')
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+st.set_page_config(layout="wide")
+st.title('HyperDTI: Robust Task-Conditioned Modeling of Drug-Target Interactions.\n')
 st.markdown('')
 st.markdown(
     """
     🧬 Github: [ml-jku/hyper-dti](https://https://github.com/ml-jku/hyper-dti)    📝 NeurIPS 2022 AI4Science workshop paper: [OpenReview](https://openreview.net/forum?id=dIX34JWnIAL)\n
     """
 )
+#st.error('WARNING! This app is currently under development and should not be used!')
+st.divider()
 def about_page():
     st.markdown(
         In this work, we propose the HyperPCM model, a task-conditioned HyperNetwork approach for the problem of
         predicting drug-target interactions in drug discovery. Our model learns to generate a QSAR model specialized on
         a given protein target. We demonstrate state-of-the-art performance over previous methods on multiple
+        well-known benchmarks, particularly in zero-shot settings for unseen protein targets. This app demonstrates the
+        model as a retrieval task of the top-k most active drug compounds predicted for a given query target.
         """
     )
     st.image('figures/hyper-dti.png', caption='Overview of HyperPCM architecture.')
 def retrieval():
     st.markdown('## Retrieve top-k most active drug compounds')
     st.write('In the furute this page will retrieve the top-k drug compounds that are predicted to have the highest activity toward the given protein target from either the Lenselink or Davis datasets.')
+    col1, col2 = st.columns(2)
+    with col1:
+        st.markdown('### Query Target')
+    with col2:
+        st.markdown('### Drug Database')
     col1, col2, col3, col4 = st.columns(4)
+    with col1:
+        ex_target = 'YTKMKTATNIYIFNLALADALATSTLPFQSVNYLMGTWPFGTILCKIVISIDYYNMFTSIFTLCTMSVDRYIAVCHPVKALDFRTPRNAKTVNVCNWI'
+        sequence = st.text_input('Enter amino-acid sequence', value=ex_target, placeholder=ex_target)
+        if sequence == 'HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA' or sequence == ex_target:
+            st.image('figures/ex_protein.jpeg', use_column_width='always')
         elif sequence:
             st.error('Visualization coming soon...')
+    with col2:
         selected_encoder = st.selectbox(
+                'Select target encoder',('SeqVec', 'None')
             )
         if sequence:
             if selected_encoder == 'SeqVec':
                 st.image('figures/protein_encoder_done.png')
                 with st.spinner('Encoding in progress...'):
+                    # TODO make SeqVec embedding on the spot
+                    with open(os.path.join(data_path, f'Lenselink/processed/SeqVec_encoding_test.pickle'), 'rb') as handle:
+                        test_set = pickle.load(handle)
+                    # TODO handle case if sequence not in test set
+                    query_embedding = test_set[sequence]
                 st.success('Encoding complete.')
             else:
+                query_embedding = None
                 st.image('figures/protein_encoder.png')
                 st.warning('Choose encoder above...')
+    with col3:
+        selected_database = st.selectbox(
+                'Select database',('Lenselink', 'None')
+            )
+        if selected_database == 'Lenselink':
+            c1, c2 = st.columns(2)
+            with c2:
+                st.image('figures/multi_molecules.png', use_column_width='always') #, width=125)
+            with st.spinner('Loading data...'):
+                batch_size = 64
+                dataset = DrugRetrieval(os.path.join(data_path, selected_database), sequence, query_embedding)
+                dataloader = DataLoader(dataset, num_workers=2, batch_size=batch_size, shuffle=False, collate_fn=collate_target)
+            st.success('Data loaded.')
+        else:
+            dataset = None
+            dataloader = None
+            st.warning('Choose database above...')
+    with col4:
+        selected_encoder = st.selectbox(
+                'Select drug encoder',('CDDD', 'None')
+            )
+        if selected_database:
+            if selected_encoder == 'CDDD':
+                st.image('figures/molecule_encoder_done.png')
+                st.success('Encoding complete.')
+            else:
+                st.image('figures/molecule_encoder.png')
+                st.warning('Choose encoder above...')
+    if query_embedding is not None:
         st.markdown('### Inference')
+        progress_text = "HyperPCM is predicting the QSAR model for the query protein target. Please wait."
         my_bar = st.progress(0, text=progress_text)
+        gc.collect()
+        torch.cuda.empty_cache()
+        memory = dataset
+        model = HyperPCM(memory=memory).to(device)
+        model = torch.nn.DataParallel(model)
+        model.load_state_dict(torch.load(checkpoint_path))
+        model.eval()
+        with torch.set_grad_enabled(False):
+            smiles = []
+            preds = []
+            i = 0
+            for batch, labels in dataloader:
+                pids, proteins, mids, molecules = batch['pids'], batch['targets'], batch['mids'], batch['drugs']
+                logits = model(batch)
+                logits = logits.detach().cpu().numpy()
+                smiles.append(mids)
+                preds.append(logits)
+                my_bar.progress((batch_size*i)/len(dataset), text=progress_text)
+                i += 1
+        my_bar.progress(100, text="HyperPCM is predicting the QSAR model for the query protein target. Done.")
         st.markdown('### Retrieval')
+        selected_k = st.slider(f'Top-k most active drug compounds {selected_database} predicted by HyperPCM are, for k = ', 5, 20, 5, 5)
+        results = pd.DataFrame({'SMILES': np.concatenate(smiles), 'Prediction': np.concatenate(preds)})
+        results = results.sort_values(by='Prediction', ascending=False)
+        results = results.reset_index()
+        print(results.head(10))
         cols = st.columns(5)
         for j, col in enumerate(cols):
             with col:
                 for i in range(int(selected_k/5)):
+                    mol = Chem.MolFromSmiles(results.loc[j + 5*i, 'SMILES'])
                     mol_img = Chem.Draw.MolToImage(mol)
+                    st.image(mol_img, caption=f"{results.loc[j + 5*i, 'Prediction']:.2f}")
 page_names_to_func = {
+    'Retrieval': retrieval,
+    'About': about_page
 }
 selected_page = st.sidebar.selectbox('Choose function', page_names_to_func.keys())

figures/multi_molecules.png ADDED Viewed

requirements.txt CHANGED Viewed

@@ -1,4 +1,3 @@
-#setuptools
 rdkit  #==2022.3.5
 #torch
 #jax_unirep

 rdkit  #==2022.3.5
 #torch
 #jax_unirep