Spaces:

bcadkins01
/

beta_lactam_demo

Sleeping

App Files Files Community

bcadkins01 commited on Oct 25, 2024

Commit

111a2ce

verified ·

1 Parent(s): 085e6ac

Update app.py

Browse files

Files changed (1) hide show

app.py +175 -90

app.py CHANGED Viewed

@@ -10,117 +10,92 @@ import io
 from PIL import Image
 import cairosvg
 import pandas as pd
-# Page Configuration
-st.set_page_config(page_title='Beta-Lactam Molecule Generator', layout='wide')
-# Load Models
 @st.cache_resource(show_spinner="Loading Models...", ttl=600)
 def load_models():
-    # Load your molecule generation model
     model_name = "bcadkins01/beta_lactam_generator"  # Replace with your actual model path
     access_token = os.getenv("HUGGING_FACE_TOKEN")
-    model = BartForConditionalGeneration.from_pretrained(model_name, use_auth_token=access_token)
-    tokenizer = BartTokenizer.from_pretrained(model_name, use_auth_token=access_token)
-    # Load ADMET-AI model
     admet_model = ADMETModel()
     return model, tokenizer, admet_model
 model, tokenizer, admet_model = load_models()
-# Set Generation Parameters
 st.sidebar.header('Generation Parameters')
-creativity = st.sidebar.slider('Creativity (Temperature):', 0.0, 2.0, 1.0, step=0.1)
-num_molecules = st.sidebar.number_input('Number of Molecules to Generate:', min_value=1, max_value=5, value=5)
-# String Format Option
-string_format = st.sidebar.radio('String Format:', ('SMILES', 'SAFE'))
-# Generate Molecules Button
-if st.button('Generate Molecules'):
-    st.info("Generating molecules... Please wait.")
-    # Generate molecules
-    core_smiles = "C1C(=O)N(C)C(=O)C1"  # Beta-lactam core structure
-    input_ids = tokenizer(core_smiles, return_tensors='pt').input_ids
-    output_ids = model.generate(
-        input_ids=input_ids,
-        max_length=128,
-        temperature=creativity,
-        do_sample=True,
-        top_k=50,
-        num_return_sequences=num_molecules,
-        num_beams=max(num_molecules, 5)  # Ensure num_beams >= num_return_sequences
-    )
-    generated_smiles = [tokenizer.decode(ids, skip_special_tokens=True) for ids in output_ids]
-    molecule_names = [f"Mol{str(i).zfill(2)}" for i in range(1, len(generated_smiles) + 1)]
-    # Create DataFrame for generated molecules
-    df_molecules = pd.DataFrame({
-        'Molecule Name': molecule_names,
-        'SMILES': generated_smiles
-    })
-    # Display generated SMILES for debugging
-    st.write("Generated SMILES:")
-    st.write(df_molecules)
-    # ADMET Predictions
-    preds = admet_model.predict(smiles=df_molecules['SMILES'].tolist())
-    # Ensure 'SMILES' is a column in preds
-    if 'SMILES' not in preds.columns:
-        preds['SMILES'] = df_molecules['SMILES']
-    # Merge predictions with generated molecules
-    df_results = pd.merge(df_molecules, preds, on='SMILES', how='inner')
-    # Set 'Molecule Name' as index
-    df_results.set_index('Molecule Name', inplace=True)
-    # Display Molecules
-    st.subheader('Generated Molecules')
-    cols_per_row = min(5, len(df_results))
-    cols = st.columns(cols_per_row)
-    for idx, (mol_name, row) in enumerate(df_results.iterrows()):
-        smiles = row['SMILES']
-        img = generate_molecule_image(smiles, use_safe_visualization=(string_format == 'SAFE'))
-        with cols[idx % cols_per_row]:
-            if isinstance(img, Image.Image):
-                st.image(img, caption=mol_name)
-            else:
-                st.error(f"Could not generate image for {mol_name}")
-            # Display molecule string
-            string_to_display = safe.encode(smiles) if string_format == 'SAFE' else smiles
-            st.code(string_to_display)
-            # Copy-to-clipboard functionality
-            st_copy_button(string_to_display, key=f'copy_{mol_name}')
-            # Display ADMET properties
-            st.write("**ADMET Properties:**")
-            st.write(row.drop(['SMILES']))
-else:
-    st.write("Click the 'Generate Molecules' button to generate beta-lactam molecules.")
-# Function Definitions
 def generate_molecule_image(input_string, use_safe_visualization=True):
     try:
         if use_safe_visualization:
             try:
-                # Attempt to decode as SAFE string
                 smiles = safe.decode(input_string)
-                # Encode back to SAFE string
                 safe_string = safe.encode(smiles)
-            except Exception:
-                # If decoding fails, assume input is SMILES and encode to SAFE
-                safe_string = safe.encode(input_string)
-            # Generate SVG image with fragment highlights
             svg_str = safe.to_image(safe_string)
-            # Convert SVG to PNG bytes
             png_bytes = cairosvg.svg2png(bytestring=svg_str.encode('utf-8'))
-            # Create an image object
             img = Image.open(io.BytesIO(png_bytes))
         else:
-            # Generate standard molecule image
             mol = Chem.MolFromSmiles(input_string)
             if mol:
                 img = Draw.MolToImage(mol, size=(200, 200))  # Adjusted size
@@ -128,15 +103,125 @@ def generate_molecule_image(input_string, use_safe_visualization=True):
                 img = None
         return img
     except Exception as e:
-        # Collect exceptions for later reporting
-        return e
-import streamlit.components.v1 as components
 def st_copy_button(text, key):
-    """Creates a copy-to-clipboard button."""
     components.html(f"""
         <button onclick="navigator.clipboard.writeText('{text}')" style="padding:5px;">Copy</button>
     """, height=45)

 from PIL import Image
 import cairosvg
 import pandas as pd
+import streamlit.components.v1 as components
+# **Page Configuration**
+st.set_page_config(
+    page_title='Beta-Lactam Molecule Generator',
+    layout='wide'
+)
+# **Load Models**
 @st.cache_resource(show_spinner="Loading Models...", ttl=600)
 def load_models():
+    """
+    Load the molecule generation model and the ADMET-AI model.
+    Caches the models to avoid reloading on every run.
+    """
+    # **Load your molecule generation model**
     model_name = "bcadkins01/beta_lactam_generator"  # Replace with your actual model path
     access_token = os.getenv("HUGGING_FACE_TOKEN")
+    if access_token is None:
+        st.error("Access token not found. Please set the HUGGING_FACE_TOKEN environment variable.")
+        st.stop()
+    model = BartForConditionalGeneration.from_pretrained(model_name, token=access_token)
+    tokenizer = BartTokenizer.from_pretrained(model_name, token=access_token)
+    # **Load ADMET-AI model**
     admet_model = ADMETModel()
     return model, tokenizer, admet_model
+# **Load models once and reuse**
 model, tokenizer, admet_model = load_models()
+# **Set Generation Parameters in Sidebar**
 st.sidebar.header('Generation Parameters')
+# **Creativity Slider (Temperature)**
+creativity = st.sidebar.slider(
+    'Creativity (Temperature):',
+    min_value=0.0,
+    max_value=2.0,
+    value=1.0,
+    step=0.1,
+    help="Higher values lead to more diverse outputs."
+)
+# **Number of Molecules to Generate**
+num_molecules = st.sidebar.number_input(
+    'Number of Molecules to Generate:',
+    min_value=1,
+    max_value=5,
+    value=5,
+    help="Select the number of molecules you want to generate."
+)
+# **String Format Option (SMILES or SAFE)**
+string_format = st.sidebar.radio(
+    'String Format:',
+    ('SMILES', 'SAFE'),
+    help="Choose the format for displaying molecule strings."
+)
+# **Function to Generate Molecule Images**
 def generate_molecule_image(input_string, use_safe_visualization=True):
+    """
+    Generates an image of the molecule from the input string.
+    Supports SAFE visualization if enabled.
+    """
     try:
         if use_safe_visualization:
             try:
+                # **Attempt to decode as SAFE string**
                 smiles = safe.decode(input_string)
+                # **Encode back to SAFE string**
                 safe_string = safe.encode(smiles)
+            except Exception as e:
+                # **Handle decoding errors**
+                st.error(f"Error decoding SAFE string: {e}")
+                return None
+            # **Generate SVG image with fragment highlights**
             svg_str = safe.to_image(safe_string)
+            # **Convert SVG to PNG bytes**
             png_bytes = cairosvg.svg2png(bytestring=svg_str.encode('utf-8'))
+            # **Create an image object**
             img = Image.open(io.BytesIO(png_bytes))
         else:
+            # **Generate standard molecule image**
             mol = Chem.MolFromSmiles(input_string)
             if mol:
                 img = Draw.MolToImage(mol, size=(200, 200))  # Adjusted size
                 img = None
         return img
     except Exception as e:
+        # **Collect exceptions for later reporting**
+        st.error(f"Error generating molecule image: {e}")
+        return None
+# **Function to Create Copy-to-Clipboard Button**
 def st_copy_button(text, key):
+    """
+    Creates a copy-to-clipboard button for the given text.
+    """
     components.html(f"""
         <button onclick="navigator.clipboard.writeText('{text}')" style="padding:5px;">Copy</button>
     """, height=45)
+# **Generate Molecules Button**
+if st.button('Generate Molecules'):
+    st.info("Generating molecules... Please wait.")
+    # **Beta-lactam core structure**
+    core_smiles = "C1C(=O)N(C)C(=O)C1"
+    # **Tokenize the core SMILES**
+    input_ids = tokenizer(core_smiles, return_tensors='pt').input_ids
+    # **Generate molecules using the model**
+    output_ids = model.generate(
+        input_ids=input_ids,
+        max_length=128,
+        temperature=creativity,
+        do_sample=True,
+        top_k=50,
+        num_return_sequences=num_molecules,
+        num_beams=max(num_molecules, 5)  # Ensure num_beams >= num_return_sequences
+    )
+    # **Decode generated molecule SMILES**
+    generated_smiles = [
+        tokenizer.decode(ids, skip_special_tokens=True)
+        for ids in output_ids
+    ]
+    # **Create molecule names**
+    molecule_names = [
+        f"Mol{str(i).zfill(2)}"
+        for i in range(1, len(generated_smiles) + 1)
+    ]
+    # **Create DataFrame for generated molecules**
+    df_molecules = pd.DataFrame({
+        'Molecule Name': molecule_names,
+        'SMILES': generated_smiles
+    })
+    # **Invalid SMILES Check**
+    from rdkit import Chem
+    # **Function to validate SMILES**
+    def is_valid_smile(smile):
+        return Chem.MolFromSmiles(smile) is not None
+    # **Apply validation function**
+    df_molecules['Valid'] = df_molecules['SMILES'].apply(is_valid_smile)
+    df_valid = df_molecules[df_molecules['Valid']].copy()
+    # **Inform user if any molecules were invalid**
+    invalid_molecules = df_molecules[~df_molecules['Valid']]
+    if not invalid_molecules.empty:
+        st.warning(f"{len(invalid_molecules)} generated molecules were invalid and excluded from predictions.")
+    # **Check if there are valid molecules to proceed**
+    if df_valid.empty:
+        st.error("No valid molecules were generated. Please try adjusting the generation parameters.")
+    else:
+        # **ADMET Predictions**
+        preds = admet_model.predict(smiles=df_valid['SMILES'].tolist())
+        # **Ensure 'SMILES' is a column in preds**
+        if 'SMILES' not in preds.columns:
+            preds['SMILES'] = df_valid['SMILES'].values
+        # **Merge predictions with valid molecules**
+        df_results = pd.merge(df_valid, preds, on='SMILES', how='inner')
+        # **Set 'Molecule Name' as index**
+        df_results.set_index('Molecule Name', inplace=True)
+        # **Check if df_results is empty after merging**
+        if df_results.empty:
+            st.error("No valid molecules were generated after predictions. Please try adjusting the generation parameters.")
+        else:
+            # **Display Molecules**
+            st.subheader('Generated Molecules')
+            # **Determine number of columns per row**
+            cols_per_row = min(5, len(df_results))
+            # **Create columns in Streamlit**
+            cols = st.columns(cols_per_row)
+            # **Iterate over each molecule to display**
+            for idx, (mol_name, row) in enumerate(df_results.iterrows()):
+                smiles = row['SMILES']
+                img = generate_molecule_image(
+                    smiles,
+                    use_safe_visualization=(string_format == 'SAFE')
+                )
+                with cols[idx % cols_per_row]:
+                    if img is not None and isinstance(img, Image.Image):
+                        st.image(img, caption=mol_name)
+                    else:
+                        st.error(f"Could not generate image for {mol_name}")
+                    # **Display molecule string in chosen format**
+                    string_to_display = safe.encode(smiles) if string_format == 'SAFE' else smiles
+                    st.code(string_to_display)
+                    # **Copy-to-clipboard functionality**
+                    st_copy_button(string_to_display, key=f'copy_{mol_name}')
+                    # **Display ADMET properties**
+                    st.write("**ADMET Properties:**")
+                    st.write(row.drop(['SMILES', 'Valid']))
+else:
+    st.write("Click the 'Generate Molecules' button to generate beta-lactam molecules.")