Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,7 +2,7 @@ import streamlit as st
|
|
| 2 |
import torch
|
| 3 |
import os
|
| 4 |
from rdkit import Chem
|
| 5 |
-
from rdkit.Chem import Draw
|
| 6 |
from transformers import BartForConditionalGeneration, BartTokenizer
|
| 7 |
from admet_ai import ADMETModel
|
| 8 |
import safe
|
|
@@ -11,6 +11,7 @@ from PIL import Image
|
|
| 11 |
import cairosvg
|
| 12 |
import pandas as pd
|
| 13 |
import streamlit.components.v1 as components
|
|
|
|
| 14 |
|
| 15 |
# **Page Configuration**
|
| 16 |
st.set_page_config(
|
|
@@ -65,16 +66,26 @@ num_molecules = st.sidebar.number_input(
|
|
| 65 |
)
|
| 66 |
|
| 67 |
# **Function to Generate Molecule Images**
|
| 68 |
-
def generate_molecule_image(
|
| 69 |
"""
|
| 70 |
-
Generates an image of the molecule from the
|
|
|
|
| 71 |
"""
|
| 72 |
try:
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
else:
|
| 77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
return img
|
| 79 |
except Exception as e:
|
| 80 |
st.error(f"Error generating molecule image: {e}")
|
|
@@ -82,17 +93,17 @@ def generate_molecule_image(smiles):
|
|
| 82 |
|
| 83 |
# **Function to Create Copy-to-Clipboard Button**
|
| 84 |
def st_copy_button(text, key):
|
| 85 |
-
"""
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
"""
|
| 89 |
-
# Adjusted styling to position the button
|
| 90 |
button_html = f"""
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
|
|
|
|
|
|
| 94 |
"""
|
| 95 |
-
components.html(button_html, height=
|
| 96 |
|
| 97 |
# **Generate Molecules Button**
|
| 98 |
if st.button('Generate Molecules'):
|
|
@@ -134,68 +145,64 @@ if st.button('Generate Molecules'):
|
|
| 134 |
})
|
| 135 |
|
| 136 |
# **Invalid SMILES Check**
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
# **Function to validate SMILES**
|
| 140 |
def is_valid_smile(smile):
|
| 141 |
return Chem.MolFromSmiles(smile) is not None
|
| 142 |
|
| 143 |
-
#
|
| 144 |
df_molecules['Valid'] = df_molecules['SMILES'].apply(is_valid_smile)
|
| 145 |
df_valid = df_molecules[df_molecules['Valid']].copy()
|
| 146 |
|
| 147 |
-
#
|
| 148 |
invalid_molecules = df_molecules[~df_molecules['Valid']]
|
| 149 |
if not invalid_molecules.empty:
|
| 150 |
st.warning(f"{len(invalid_molecules)} generated molecules were invalid and excluded from predictions.")
|
| 151 |
|
| 152 |
-
#
|
| 153 |
if df_valid.empty:
|
| 154 |
st.error("No valid molecules were generated. Please try adjusting the generation parameters.")
|
| 155 |
else:
|
| 156 |
-
#
|
| 157 |
preds = admet_model.predict(smiles=df_valid['SMILES'].tolist())
|
| 158 |
|
| 159 |
-
#
|
| 160 |
if 'SMILES' not in preds.columns:
|
| 161 |
preds['SMILES'] = df_valid['SMILES'].values
|
| 162 |
|
| 163 |
-
#
|
| 164 |
df_results = pd.merge(df_valid, preds, on='SMILES', how='inner')
|
| 165 |
|
| 166 |
-
#
|
| 167 |
df_results.set_index('Molecule Name', inplace=True)
|
| 168 |
|
| 169 |
-
#
|
| 170 |
admet_properties = [
|
| 171 |
-
'
|
| 172 |
'hydrogen_bond_donors', 'QED', 'ClinTox', 'hERG', 'BBB_Martins'
|
| 173 |
]
|
| 174 |
-
df_results_filtered = df_results[[
|
| 175 |
-
'SMILES', 'Valid'] + admet_properties]
|
| 176 |
|
| 177 |
-
#
|
| 178 |
if df_results_filtered.empty:
|
| 179 |
st.error("No valid ADMET predictions were obtained. Please try adjusting the generation parameters.")
|
| 180 |
else:
|
| 181 |
-
#
|
| 182 |
st.subheader('Generated Molecules')
|
| 183 |
-
|
| 184 |
-
# **Determine number of columns per row**
|
| 185 |
cols_per_row = min(3, len(df_results_filtered)) # Max 3 columns
|
| 186 |
-
|
| 187 |
-
# **Create columns in Streamlit**
|
| 188 |
cols = st.columns(cols_per_row)
|
| 189 |
|
| 190 |
-
# **Iterate over each molecule to display**
|
| 191 |
for idx, (mol_name, row) in enumerate(df_results_filtered.iterrows()):
|
| 192 |
smiles = row['SMILES']
|
| 193 |
-
img = generate_molecule_image(smiles)
|
| 194 |
|
| 195 |
-
#
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
|
| 200 |
with cols[idx % cols_per_row]:
|
| 201 |
if img is not None and isinstance(img, Image.Image):
|
|
@@ -203,35 +210,25 @@ if st.button('Generate Molecules'):
|
|
| 203 |
else:
|
| 204 |
st.error(f"Could not generate image for {mol_name}")
|
| 205 |
|
| 206 |
-
#
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
):
|
| 211 |
-
st.session_state[toggle_key] = not st.session_state[toggle_key]
|
| 212 |
|
| 213 |
-
#
|
| 214 |
-
if
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
st.
|
| 222 |
-
else:
|
| 223 |
-
st.code(smiles)
|
| 224 |
|
| 225 |
-
#
|
| 226 |
-
st_copy_button(
|
| 227 |
-
safe_string if st.session_state[toggle_key] else smiles,
|
| 228 |
-
key=f'copy_{mol_name}'
|
| 229 |
-
)
|
| 230 |
-
|
| 231 |
-
# **Display selected ADMET properties**
|
| 232 |
st.write("**ADMET Properties:**")
|
| 233 |
-
# Drop 'SMILES' and 'Valid' columns for display
|
| 234 |
admet_data = row.drop(['SMILES', 'Valid'])
|
| 235 |
st.write(admet_data)
|
| 236 |
else:
|
| 237 |
st.write("Click the 'Generate Molecules' button to generate beta-lactam molecules.")
|
|
|
|
|
|
| 2 |
import torch
|
| 3 |
import os
|
| 4 |
from rdkit import Chem
|
| 5 |
+
from rdkit.Chem import Draw
|
| 6 |
from transformers import BartForConditionalGeneration, BartTokenizer
|
| 7 |
from admet_ai import ADMETModel
|
| 8 |
import safe
|
|
|
|
| 11 |
import cairosvg
|
| 12 |
import pandas as pd
|
| 13 |
import streamlit.components.v1 as components
|
| 14 |
+
import json # For safely encoding text in JavaScript
|
| 15 |
|
| 16 |
# **Page Configuration**
|
| 17 |
st.set_page_config(
|
|
|
|
| 66 |
)
|
| 67 |
|
| 68 |
# **Function to Generate Molecule Images**
|
| 69 |
+
def generate_molecule_image(input_string, use_safe=False):
|
| 70 |
"""
|
| 71 |
+
Generates an image of the molecule from the input string.
|
| 72 |
+
If use_safe is True, input_string is treated as a SAFE string.
|
| 73 |
"""
|
| 74 |
try:
|
| 75 |
+
if use_safe and input_string is not None:
|
| 76 |
+
# Generate image from SAFE encoding
|
| 77 |
+
svg_str = safe.to_image(input_string)
|
| 78 |
+
# Convert SVG to PNG bytes
|
| 79 |
+
png_bytes = cairosvg.svg2png(bytestring=svg_str.encode('utf-8'))
|
| 80 |
+
# Create an image object
|
| 81 |
+
img = Image.open(io.BytesIO(png_bytes))
|
| 82 |
else:
|
| 83 |
+
# Generate standard molecule image
|
| 84 |
+
mol = Chem.MolFromSmiles(input_string)
|
| 85 |
+
if mol:
|
| 86 |
+
img = Draw.MolToImage(mol, size=(200, 200))
|
| 87 |
+
else:
|
| 88 |
+
img = None
|
| 89 |
return img
|
| 90 |
except Exception as e:
|
| 91 |
st.error(f"Error generating molecule image: {e}")
|
|
|
|
| 93 |
|
| 94 |
# **Function to Create Copy-to-Clipboard Button**
|
| 95 |
def st_copy_button(text, key):
|
| 96 |
+
"""Creates a copy-to-clipboard button placed appropriately."""
|
| 97 |
+
# Safely encode the text for JavaScript
|
| 98 |
+
escaped_text = json.dumps(text)
|
|
|
|
|
|
|
| 99 |
button_html = f"""
|
| 100 |
+
<div style="text-align: right; margin-top: -10px; margin-bottom: 10px;">
|
| 101 |
+
<button onclick="navigator.clipboard.writeText({escaped_text})" style="
|
| 102 |
+
padding:5px;
|
| 103 |
+
">Copy</button>
|
| 104 |
+
</div>
|
| 105 |
"""
|
| 106 |
+
components.html(button_html, height=35)
|
| 107 |
|
| 108 |
# **Generate Molecules Button**
|
| 109 |
if st.button('Generate Molecules'):
|
|
|
|
| 145 |
})
|
| 146 |
|
| 147 |
# **Invalid SMILES Check**
|
| 148 |
+
# Function to validate SMILES
|
|
|
|
|
|
|
| 149 |
def is_valid_smile(smile):
|
| 150 |
return Chem.MolFromSmiles(smile) is not None
|
| 151 |
|
| 152 |
+
# Apply validation function
|
| 153 |
df_molecules['Valid'] = df_molecules['SMILES'].apply(is_valid_smile)
|
| 154 |
df_valid = df_molecules[df_molecules['Valid']].copy()
|
| 155 |
|
| 156 |
+
# Inform user if any molecules were invalid
|
| 157 |
invalid_molecules = df_molecules[~df_molecules['Valid']]
|
| 158 |
if not invalid_molecules.empty:
|
| 159 |
st.warning(f"{len(invalid_molecules)} generated molecules were invalid and excluded from predictions.")
|
| 160 |
|
| 161 |
+
# Check if there are valid molecules to proceed
|
| 162 |
if df_valid.empty:
|
| 163 |
st.error("No valid molecules were generated. Please try adjusting the generation parameters.")
|
| 164 |
else:
|
| 165 |
+
# ADMET Predictions
|
| 166 |
preds = admet_model.predict(smiles=df_valid['SMILES'].tolist())
|
| 167 |
|
| 168 |
+
# Ensure 'SMILES' is a column in preds
|
| 169 |
if 'SMILES' not in preds.columns:
|
| 170 |
preds['SMILES'] = df_valid['SMILES'].values
|
| 171 |
|
| 172 |
+
# Merge predictions with valid molecules
|
| 173 |
df_results = pd.merge(df_valid, preds, on='SMILES', how='inner')
|
| 174 |
|
| 175 |
+
# Set 'Molecule Name' as index
|
| 176 |
df_results.set_index('Molecule Name', inplace=True)
|
| 177 |
|
| 178 |
+
# Select only desired ADMET properties
|
| 179 |
admet_properties = [
|
| 180 |
+
'molecular weight', 'logP', 'hydrogen_bond_acceptors',
|
| 181 |
'hydrogen_bond_donors', 'QED', 'ClinTox', 'hERG', 'BBB_Martins'
|
| 182 |
]
|
| 183 |
+
df_results_filtered = df_results[['SMILES', 'Valid'] + admet_properties]
|
|
|
|
| 184 |
|
| 185 |
+
# Check if df_results_filtered is empty after filtering
|
| 186 |
if df_results_filtered.empty:
|
| 187 |
st.error("No valid ADMET predictions were obtained. Please try adjusting the generation parameters.")
|
| 188 |
else:
|
| 189 |
+
# Display Molecules
|
| 190 |
st.subheader('Generated Molecules')
|
|
|
|
|
|
|
| 191 |
cols_per_row = min(3, len(df_results_filtered)) # Max 3 columns
|
|
|
|
|
|
|
| 192 |
cols = st.columns(cols_per_row)
|
| 193 |
|
|
|
|
| 194 |
for idx, (mol_name, row) in enumerate(df_results_filtered.iterrows()):
|
| 195 |
smiles = row['SMILES']
|
|
|
|
| 196 |
|
| 197 |
+
# Attempt to encode to SAFE
|
| 198 |
+
try:
|
| 199 |
+
safe_string = safe.encode(smiles)
|
| 200 |
+
except Exception as e:
|
| 201 |
+
safe_string = None
|
| 202 |
+
st.error(f"Could not convert to SAFE encoding for {mol_name}: {e}")
|
| 203 |
+
|
| 204 |
+
# Generate molecule image (SMILES or SAFE)
|
| 205 |
+
img = generate_molecule_image(smiles)
|
| 206 |
|
| 207 |
with cols[idx % cols_per_row]:
|
| 208 |
if img is not None and isinstance(img, Image.Image):
|
|
|
|
| 210 |
else:
|
| 211 |
st.error(f"Could not generate image for {mol_name}")
|
| 212 |
|
| 213 |
+
# Display SMILES string
|
| 214 |
+
st.write("**SMILES:**")
|
| 215 |
+
st.text(smiles)
|
| 216 |
+
st_copy_button(smiles, key=f'copy_smiles_{mol_name}')
|
|
|
|
|
|
|
| 217 |
|
| 218 |
+
# Display SAFE encoding if available
|
| 219 |
+
if safe_string:
|
| 220 |
+
st.write("**SAFE Encoding:**")
|
| 221 |
+
st.text(safe_string)
|
| 222 |
+
st_copy_button(safe_string, key=f'copy_safe_{mol_name}')
|
| 223 |
+
# Optionally display SAFE visualization
|
| 224 |
+
safe_img = generate_molecule_image(safe_string, use_safe=True)
|
| 225 |
+
if safe_img is not None:
|
| 226 |
+
st.image(safe_img, caption=f"{mol_name} (SAFE Visualization)")
|
|
|
|
|
|
|
| 227 |
|
| 228 |
+
# Display selected ADMET properties
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
st.write("**ADMET Properties:**")
|
|
|
|
| 230 |
admet_data = row.drop(['SMILES', 'Valid'])
|
| 231 |
st.write(admet_data)
|
| 232 |
else:
|
| 233 |
st.write("Click the 'Generate Molecules' button to generate beta-lactam molecules.")
|
| 234 |
+
|