Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,7 +2,7 @@ import streamlit as st
|
|
2 |
import torch
|
3 |
import os
|
4 |
from rdkit import Chem
|
5 |
-
from rdkit.Chem import Draw
|
6 |
from transformers import BartForConditionalGeneration, BartTokenizer
|
7 |
from admet_ai import ADMETModel
|
8 |
import safe
|
@@ -11,6 +11,7 @@ from PIL import Image
|
|
11 |
import cairosvg
|
12 |
import pandas as pd
|
13 |
import streamlit.components.v1 as components
|
|
|
14 |
|
15 |
# **Page Configuration**
|
16 |
st.set_page_config(
|
@@ -65,16 +66,26 @@ num_molecules = st.sidebar.number_input(
|
|
65 |
)
|
66 |
|
67 |
# **Function to Generate Molecule Images**
|
68 |
-
def generate_molecule_image(
|
69 |
"""
|
70 |
-
Generates an image of the molecule from the
|
|
|
71 |
"""
|
72 |
try:
|
73 |
-
|
74 |
-
|
75 |
-
|
|
|
|
|
|
|
|
|
76 |
else:
|
77 |
-
|
|
|
|
|
|
|
|
|
|
|
78 |
return img
|
79 |
except Exception as e:
|
80 |
st.error(f"Error generating molecule image: {e}")
|
@@ -82,17 +93,17 @@ def generate_molecule_image(smiles):
|
|
82 |
|
83 |
# **Function to Create Copy-to-Clipboard Button**
|
84 |
def st_copy_button(text, key):
|
85 |
-
"""
|
86 |
-
|
87 |
-
|
88 |
-
"""
|
89 |
-
# Adjusted styling to position the button
|
90 |
button_html = f"""
|
91 |
-
|
92 |
-
|
93 |
-
|
|
|
|
|
94 |
"""
|
95 |
-
components.html(button_html, height=
|
96 |
|
97 |
# **Generate Molecules Button**
|
98 |
if st.button('Generate Molecules'):
|
@@ -134,68 +145,64 @@ if st.button('Generate Molecules'):
|
|
134 |
})
|
135 |
|
136 |
# **Invalid SMILES Check**
|
137 |
-
|
138 |
-
|
139 |
-
# **Function to validate SMILES**
|
140 |
def is_valid_smile(smile):
|
141 |
return Chem.MolFromSmiles(smile) is not None
|
142 |
|
143 |
-
#
|
144 |
df_molecules['Valid'] = df_molecules['SMILES'].apply(is_valid_smile)
|
145 |
df_valid = df_molecules[df_molecules['Valid']].copy()
|
146 |
|
147 |
-
#
|
148 |
invalid_molecules = df_molecules[~df_molecules['Valid']]
|
149 |
if not invalid_molecules.empty:
|
150 |
st.warning(f"{len(invalid_molecules)} generated molecules were invalid and excluded from predictions.")
|
151 |
|
152 |
-
#
|
153 |
if df_valid.empty:
|
154 |
st.error("No valid molecules were generated. Please try adjusting the generation parameters.")
|
155 |
else:
|
156 |
-
#
|
157 |
preds = admet_model.predict(smiles=df_valid['SMILES'].tolist())
|
158 |
|
159 |
-
#
|
160 |
if 'SMILES' not in preds.columns:
|
161 |
preds['SMILES'] = df_valid['SMILES'].values
|
162 |
|
163 |
-
#
|
164 |
df_results = pd.merge(df_valid, preds, on='SMILES', how='inner')
|
165 |
|
166 |
-
#
|
167 |
df_results.set_index('Molecule Name', inplace=True)
|
168 |
|
169 |
-
#
|
170 |
admet_properties = [
|
171 |
-
'
|
172 |
'hydrogen_bond_donors', 'QED', 'ClinTox', 'hERG', 'BBB_Martins'
|
173 |
]
|
174 |
-
df_results_filtered = df_results[[
|
175 |
-
'SMILES', 'Valid'] + admet_properties]
|
176 |
|
177 |
-
#
|
178 |
if df_results_filtered.empty:
|
179 |
st.error("No valid ADMET predictions were obtained. Please try adjusting the generation parameters.")
|
180 |
else:
|
181 |
-
#
|
182 |
st.subheader('Generated Molecules')
|
183 |
-
|
184 |
-
# **Determine number of columns per row**
|
185 |
cols_per_row = min(3, len(df_results_filtered)) # Max 3 columns
|
186 |
-
|
187 |
-
# **Create columns in Streamlit**
|
188 |
cols = st.columns(cols_per_row)
|
189 |
|
190 |
-
# **Iterate over each molecule to display**
|
191 |
for idx, (mol_name, row) in enumerate(df_results_filtered.iterrows()):
|
192 |
smiles = row['SMILES']
|
193 |
-
img = generate_molecule_image(smiles)
|
194 |
|
195 |
-
#
|
196 |
-
|
197 |
-
|
198 |
-
|
|
|
|
|
|
|
|
|
|
|
199 |
|
200 |
with cols[idx % cols_per_row]:
|
201 |
if img is not None and isinstance(img, Image.Image):
|
@@ -203,35 +210,25 @@ if st.button('Generate Molecules'):
|
|
203 |
else:
|
204 |
st.error(f"Could not generate image for {mol_name}")
|
205 |
|
206 |
-
#
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
):
|
211 |
-
st.session_state[toggle_key] = not st.session_state[toggle_key]
|
212 |
|
213 |
-
#
|
214 |
-
if
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
st.
|
222 |
-
else:
|
223 |
-
st.code(smiles)
|
224 |
|
225 |
-
#
|
226 |
-
st_copy_button(
|
227 |
-
safe_string if st.session_state[toggle_key] else smiles,
|
228 |
-
key=f'copy_{mol_name}'
|
229 |
-
)
|
230 |
-
|
231 |
-
# **Display selected ADMET properties**
|
232 |
st.write("**ADMET Properties:**")
|
233 |
-
# Drop 'SMILES' and 'Valid' columns for display
|
234 |
admet_data = row.drop(['SMILES', 'Valid'])
|
235 |
st.write(admet_data)
|
236 |
else:
|
237 |
st.write("Click the 'Generate Molecules' button to generate beta-lactam molecules.")
|
|
|
|
2 |
import torch
|
3 |
import os
|
4 |
from rdkit import Chem
|
5 |
+
from rdkit.Chem import Draw
|
6 |
from transformers import BartForConditionalGeneration, BartTokenizer
|
7 |
from admet_ai import ADMETModel
|
8 |
import safe
|
|
|
11 |
import cairosvg
|
12 |
import pandas as pd
|
13 |
import streamlit.components.v1 as components
|
14 |
+
import json # For safely encoding text in JavaScript
|
15 |
|
16 |
# **Page Configuration**
|
17 |
st.set_page_config(
|
|
|
66 |
)
|
67 |
|
68 |
# **Function to Generate Molecule Images**
|
69 |
+
def generate_molecule_image(input_string, use_safe=False):
|
70 |
"""
|
71 |
+
Generates an image of the molecule from the input string.
|
72 |
+
If use_safe is True, input_string is treated as a SAFE string.
|
73 |
"""
|
74 |
try:
|
75 |
+
if use_safe and input_string is not None:
|
76 |
+
# Generate image from SAFE encoding
|
77 |
+
svg_str = safe.to_image(input_string)
|
78 |
+
# Convert SVG to PNG bytes
|
79 |
+
png_bytes = cairosvg.svg2png(bytestring=svg_str.encode('utf-8'))
|
80 |
+
# Create an image object
|
81 |
+
img = Image.open(io.BytesIO(png_bytes))
|
82 |
else:
|
83 |
+
# Generate standard molecule image
|
84 |
+
mol = Chem.MolFromSmiles(input_string)
|
85 |
+
if mol:
|
86 |
+
img = Draw.MolToImage(mol, size=(200, 200))
|
87 |
+
else:
|
88 |
+
img = None
|
89 |
return img
|
90 |
except Exception as e:
|
91 |
st.error(f"Error generating molecule image: {e}")
|
|
|
93 |
|
94 |
# **Function to Create Copy-to-Clipboard Button**
|
95 |
def st_copy_button(text, key):
|
96 |
+
"""Creates a copy-to-clipboard button placed appropriately."""
|
97 |
+
# Safely encode the text for JavaScript
|
98 |
+
escaped_text = json.dumps(text)
|
|
|
|
|
99 |
button_html = f"""
|
100 |
+
<div style="text-align: right; margin-top: -10px; margin-bottom: 10px;">
|
101 |
+
<button onclick="navigator.clipboard.writeText({escaped_text})" style="
|
102 |
+
padding:5px;
|
103 |
+
">Copy</button>
|
104 |
+
</div>
|
105 |
"""
|
106 |
+
components.html(button_html, height=35)
|
107 |
|
108 |
# **Generate Molecules Button**
|
109 |
if st.button('Generate Molecules'):
|
|
|
145 |
})
|
146 |
|
147 |
# **Invalid SMILES Check**
|
148 |
+
# Function to validate SMILES
|
|
|
|
|
149 |
def is_valid_smile(smile):
|
150 |
return Chem.MolFromSmiles(smile) is not None
|
151 |
|
152 |
+
# Apply validation function
|
153 |
df_molecules['Valid'] = df_molecules['SMILES'].apply(is_valid_smile)
|
154 |
df_valid = df_molecules[df_molecules['Valid']].copy()
|
155 |
|
156 |
+
# Inform user if any molecules were invalid
|
157 |
invalid_molecules = df_molecules[~df_molecules['Valid']]
|
158 |
if not invalid_molecules.empty:
|
159 |
st.warning(f"{len(invalid_molecules)} generated molecules were invalid and excluded from predictions.")
|
160 |
|
161 |
+
# Check if there are valid molecules to proceed
|
162 |
if df_valid.empty:
|
163 |
st.error("No valid molecules were generated. Please try adjusting the generation parameters.")
|
164 |
else:
|
165 |
+
# ADMET Predictions
|
166 |
preds = admet_model.predict(smiles=df_valid['SMILES'].tolist())
|
167 |
|
168 |
+
# Ensure 'SMILES' is a column in preds
|
169 |
if 'SMILES' not in preds.columns:
|
170 |
preds['SMILES'] = df_valid['SMILES'].values
|
171 |
|
172 |
+
# Merge predictions with valid molecules
|
173 |
df_results = pd.merge(df_valid, preds, on='SMILES', how='inner')
|
174 |
|
175 |
+
# Set 'Molecule Name' as index
|
176 |
df_results.set_index('Molecule Name', inplace=True)
|
177 |
|
178 |
+
# Select only desired ADMET properties
|
179 |
admet_properties = [
|
180 |
+
'molecular weight', 'logP', 'hydrogen_bond_acceptors',
|
181 |
'hydrogen_bond_donors', 'QED', 'ClinTox', 'hERG', 'BBB_Martins'
|
182 |
]
|
183 |
+
df_results_filtered = df_results[['SMILES', 'Valid'] + admet_properties]
|
|
|
184 |
|
185 |
+
# Check if df_results_filtered is empty after filtering
|
186 |
if df_results_filtered.empty:
|
187 |
st.error("No valid ADMET predictions were obtained. Please try adjusting the generation parameters.")
|
188 |
else:
|
189 |
+
# Display Molecules
|
190 |
st.subheader('Generated Molecules')
|
|
|
|
|
191 |
cols_per_row = min(3, len(df_results_filtered)) # Max 3 columns
|
|
|
|
|
192 |
cols = st.columns(cols_per_row)
|
193 |
|
|
|
194 |
for idx, (mol_name, row) in enumerate(df_results_filtered.iterrows()):
|
195 |
smiles = row['SMILES']
|
|
|
196 |
|
197 |
+
# Attempt to encode to SAFE
|
198 |
+
try:
|
199 |
+
safe_string = safe.encode(smiles)
|
200 |
+
except Exception as e:
|
201 |
+
safe_string = None
|
202 |
+
st.error(f"Could not convert to SAFE encoding for {mol_name}: {e}")
|
203 |
+
|
204 |
+
# Generate molecule image (SMILES or SAFE)
|
205 |
+
img = generate_molecule_image(smiles)
|
206 |
|
207 |
with cols[idx % cols_per_row]:
|
208 |
if img is not None and isinstance(img, Image.Image):
|
|
|
210 |
else:
|
211 |
st.error(f"Could not generate image for {mol_name}")
|
212 |
|
213 |
+
# Display SMILES string
|
214 |
+
st.write("**SMILES:**")
|
215 |
+
st.text(smiles)
|
216 |
+
st_copy_button(smiles, key=f'copy_smiles_{mol_name}')
|
|
|
|
|
217 |
|
218 |
+
# Display SAFE encoding if available
|
219 |
+
if safe_string:
|
220 |
+
st.write("**SAFE Encoding:**")
|
221 |
+
st.text(safe_string)
|
222 |
+
st_copy_button(safe_string, key=f'copy_safe_{mol_name}')
|
223 |
+
# Optionally display SAFE visualization
|
224 |
+
safe_img = generate_molecule_image(safe_string, use_safe=True)
|
225 |
+
if safe_img is not None:
|
226 |
+
st.image(safe_img, caption=f"{mol_name} (SAFE Visualization)")
|
|
|
|
|
227 |
|
228 |
+
# Display selected ADMET properties
|
|
|
|
|
|
|
|
|
|
|
|
|
229 |
st.write("**ADMET Properties:**")
|
|
|
230 |
admet_data = row.drop(['SMILES', 'Valid'])
|
231 |
st.write(admet_data)
|
232 |
else:
|
233 |
st.write("Click the 'Generate Molecules' button to generate beta-lactam molecules.")
|
234 |
+
|