Spaces:

InstaDeepAI
/

InstaNovo

Running on Zero

App Files Files Community

BioGeek commited on Mar 27

Commit

dd42569

1 Parent(s): e649e86

feat: add logging (WIP)

Browse files

Files changed (3) hide show

app.py +47 -32
pyproject.toml +1 -0
uv.lock +14 -0

app.py CHANGED Viewed

@@ -5,8 +5,10 @@ import tempfile
 import time
 import polars as pl
 import numpy as np
 from pathlib import Path
 from omegaconf import OmegaConf, DictConfig
 # --- InstaNovo Imports ---
 try:
@@ -40,26 +42,35 @@ KNAPSACK: Knapsack | None = None
 MODEL_CONFIG: DictConfig | None = None
 RESIDUE_SET: ResidueSet | None = None
-# Assets
 gr.set_static_paths(paths=[Path.cwd().absolute()/"assets"])
 def load_model_and_knapsack():
     """Loads the InstaNovo model and generates/loads the knapsack."""
     global MODEL, KNAPSACK, MODEL_CONFIG, RESIDUE_SET
     if MODEL is not None:
-        print("Model already loaded.")
         return
-    print(f"Loading InstaNovo model: {MODEL_ID} to {DEVICE}...")
     try:
         MODEL, MODEL_CONFIG = InstaNovo.from_pretrained(MODEL_ID)
         MODEL.to(DEVICE)
         MODEL.eval()
         RESIDUE_SET = MODEL.residue_set
-        print("Model loaded successfully.")
     except Exception as e:
-        print(f"Error loading model: {e}")
         raise gr.Error(f"Failed to load InstaNovo model: {MODEL_ID}. Error: {e}")
     # --- Knapsack Handling ---
@@ -70,17 +81,17 @@ def load_model_and_knapsack():
     )
     if knapsack_exists:
-        print(f"Loading pre-generated knapsack from {KNAPSACK_DIR}...")
         try:
             KNAPSACK = Knapsack.from_file(str(KNAPSACK_DIR))
-            print("Knapsack loaded successfully.")
         except Exception as e:
-            print(f"Error loading knapsack: {e}. Will attempt to regenerate.")
             KNAPSACK = None # Force regeneration
             knapsack_exists = False # Ensure generation happens
     if not knapsack_exists:
-        print("Knapsack not found or failed to load. Generating knapsack...")
         if RESIDUE_SET is None:
              raise gr.Error("Cannot generate knapsack because ResidueSet failed to load.")
         try:
@@ -88,7 +99,7 @@ def load_model_and_knapsack():
             residue_masses_knapsack = dict(RESIDUE_SET.residue_masses.copy())
             negative_residues = [k for k, v in residue_masses_knapsack.items() if v <= 0]
             if negative_residues:
-                print(f"Warning: Non-positive masses found in residues: {negative_residues}. "
                       "Excluding from knapsack generation.")
                 for res in negative_residues:
                     del residue_masses_knapsack[res]
@@ -110,11 +121,11 @@ def load_model_and_knapsack():
                 max_mass=MAX_MASS,
                 mass_scale=MASS_SCALE,
             )
-            print(f"Knapsack generated. Saving to {KNAPSACK_DIR}...")
             KNAPSACK.save(str(KNAPSACK_DIR)) # Save for future runs
-            print("Knapsack saved.")
         except Exception as e:
-            print(f"Error generating or saving knapsack: {e}")
             gr.Warning("Failed to generate Knapsack. Knapsack Beam Search will not be available. {e}")
             KNAPSACK = None # Ensure it's None if generation failed
@@ -131,7 +142,7 @@ def create_inference_config(
     if DEFAULT_CONFIG_PATH.exists():
          base_cfg = OmegaConf.load(DEFAULT_CONFIG_PATH)
     else:
-         print(f"Warning: Default config not found at {DEFAULT_CONFIG_PATH}. Using minimal config.")
          # Create a minimal config if default is missing
          base_cfg = OmegaConf.create({
              "data_path": None,
@@ -217,8 +228,8 @@ def predict_peptides(input_file, decoding_method):
         raise gr.Error("Please upload a mass spectrometry file.")
     input_path = input_file.name # Gradio provides the path in .name
-    print(f"Processing file: {input_path}")
-    print(f"Using decoding method: {decoding_method}")
     # Create a temporary file for the output CSV
     with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as temp_out:
@@ -227,10 +238,10 @@ def predict_peptides(input_file, decoding_method):
     try:
         # 1. Create Config
         config = create_inference_config(input_path, output_csv_path, decoding_method)
-        print("Inference Config:\n", OmegaConf.to_yaml(config))
         # 2. Load Data using SpectrumDataFrame
-        print("Loading spectrum data...")
         try:
             sdf = SpectrumDataFrame.load(
                 config.data_path,
@@ -247,13 +258,13 @@ def predict_peptides(input_file, decoding_method):
                 lambda row: (row["precursor_charge"] <= max_charge) and (row["precursor_charge"] > 0)
             )
             if len(sdf) < original_size:
-                print(f"Warning: Filtered {original_size - len(sdf)} spectra with charge > {max_charge} or <= 0.")
             if len(sdf) == 0:
                  raise gr.Error("No valid spectra found in the uploaded file after filtering.")
-            print(f"Data loaded: {len(sdf)} spectra.")
         except Exception as e:
-            print(f"Error loading data: {e}")
             raise gr.Error(f"Failed to load or process the spectrum file. Error: {e}")
         # 3. Prepare Dataset and DataLoader
@@ -275,7 +286,7 @@ def predict_peptides(input_file, decoding_method):
         )
         # 4. Select Decoder
-        print("Initializing decoder...")
         decoder: Decoder
         if config.use_knapsack:
             if KNAPSACK is None:
@@ -287,7 +298,7 @@ def predict_peptides(input_file, decoding_method):
             decoder = KnapsackBeamSearchDecoder(model=MODEL, knapsack=KNAPSACK)
         elif config.num_beams > 1:
              # BeamSearchDecoder is available but not explicitly requested, use Greedy for num_beams=1
-             print(f"Warning: num_beams={config.num_beams} > 1 but only Greedy and Knapsack Beam Search are implemented in this app. Defaulting to Greedy.")
              decoder = GreedyDecoder(model=MODEL, mass_scale=MASS_SCALE)
         else:
              decoder = GreedyDecoder(
@@ -297,10 +308,10 @@ def predict_peptides(input_file, decoding_method):
                  suppressed_residues=config.get("suppressed_residues", None),
                  disable_terminal_residues_anywhere=config.get("disable_terminal_residues_anywhere", True),
              )
-        print(f"Using decoder: {type(decoder).__name__}")
         # 5. Run Prediction Loop (Adapted from instanovo/transformer/predict.py)
-        print("Starting prediction...")
         start_time = time.time()
         results_list: list[ScoredSequence | list] = [] # Store ScoredSequence or empty list
@@ -325,13 +336,13 @@ def predict_peptides(input_file, decoding_method):
                     return_beam=False # Only get the top prediction for simplicity
                 )
             results_list.extend(batch_predictions) # Should be list[ScoredSequence] or list[list]
-            print(f"Processed batch {i+1}/{len(dl)}")
         end_time = time.time()
-        print(f"Prediction finished in {end_time - start_time:.2f} seconds.")
         # 6. Format Results
-        print("Formatting results...")
         output_data = []
         # Use sdf index columns + prediction results
         index_cols = [col for col in config.index_columns if col in sdf.df.columns]
@@ -356,7 +367,7 @@ def predict_peptides(input_file, decoding_method):
                      min_abs_ppm = min(abs(p) for p in delta_mass_list) if delta_mass_list else float('nan')
                      row_data["delta_mass_ppm"] = f"{min_abs_ppm:.2f}"
                 except Exception as e:
-                     print(f"Warning: Could not calculate delta mass for prediction {i}: {e}")
                      row_data["delta_mass_ppm"] = "N/A"
             else:
@@ -374,7 +385,7 @@ def predict_peptides(input_file, decoding_method):
             if col in output_df.columns:
                 final_display_cols.append(col)
             else:
-                 print(f"Warning: Expected display column '{col}' not found in results.")
         # Add any remaining index columns that weren't in display_cols
         for col in index_cols:
@@ -385,14 +396,14 @@ def predict_peptides(input_file, decoding_method):
         # 7. Save full results to CSV
-        print(f"Saving results to {output_csv_path}...")
         output_df.write_csv(output_csv_path)
         # Return DataFrame for display and path for download
         return output_df_display.to_pandas(), output_csv_path
     except Exception as e:
-        print(f"An error occurred during prediction: {e}")
         # Clean up the temporary output file if it exists
         if os.path.exists(output_csv_path):
             os.remove(output_csv_path)
@@ -469,6 +480,10 @@ with gr.Blocks(css=css, theme=gr.themes.Default(primary_hue="blue", secondary_hu
          """.format(MODEL_ID=MODEL_ID)
     )
 # --- Launch the App ---
 if __name__ == "__main__":
     # Set share=True for temporary public link if running locally

 import time
 import polars as pl
 import numpy as np
+import logging
 from pathlib import Path
 from omegaconf import OmegaConf, DictConfig
+from gradio_log import Log
 # --- InstaNovo Imports ---
 try:
 MODEL_CONFIG: DictConfig | None = None
 RESIDUE_SET: ResidueSet | None = None
+# --- Assets ---
 gr.set_static_paths(paths=[Path.cwd().absolute()/"assets"])
+# Logging configuration
+log_file = "/tmp/instanovo_gradio_log.txt"
+Path(log_file).touch()
+file_handler = logging.FileHandler(log_file)
+file_handler.setLevel(logging.DEBUG)
+logger = logging.getLogger("instanovo")
+logger.setLevel(logging.DEBUG)
 def load_model_and_knapsack():
     """Loads the InstaNovo model and generates/loads the knapsack."""
     global MODEL, KNAPSACK, MODEL_CONFIG, RESIDUE_SET
     if MODEL is not None:
+        logger.info("Model already loaded.")
         return
+    logger.info(f"Loading InstaNovo model: {MODEL_ID} to {DEVICE}...")
     try:
         MODEL, MODEL_CONFIG = InstaNovo.from_pretrained(MODEL_ID)
         MODEL.to(DEVICE)
         MODEL.eval()
         RESIDUE_SET = MODEL.residue_set
+        logger.info("Model loaded successfully.")
     except Exception as e:
+        logger.error(f"Error loading model: {e}")
         raise gr.Error(f"Failed to load InstaNovo model: {MODEL_ID}. Error: {e}")
     # --- Knapsack Handling ---
     )
     if knapsack_exists:
+        logger.info(f"Loading pre-generated knapsack from {KNAPSACK_DIR}...")
         try:
             KNAPSACK = Knapsack.from_file(str(KNAPSACK_DIR))
+            logger.info("Knapsack loaded successfully.")
         except Exception as e:
+            logger.info(f"Error loading knapsack: {e}. Will attempt to regenerate.")
             KNAPSACK = None # Force regeneration
             knapsack_exists = False # Ensure generation happens
     if not knapsack_exists:
+        logger.info("Knapsack not found or failed to load. Generating knapsack...")
         if RESIDUE_SET is None:
              raise gr.Error("Cannot generate knapsack because ResidueSet failed to load.")
         try:
             residue_masses_knapsack = dict(RESIDUE_SET.residue_masses.copy())
             negative_residues = [k for k, v in residue_masses_knapsack.items() if v <= 0]
             if negative_residues:
+                logger.info(f"Warning: Non-positive masses found in residues: {negative_residues}. "
                       "Excluding from knapsack generation.")
                 for res in negative_residues:
                     del residue_masses_knapsack[res]
                 max_mass=MAX_MASS,
                 mass_scale=MASS_SCALE,
             )
+            logger.info(f"Knapsack generated. Saving to {KNAPSACK_DIR}...")
             KNAPSACK.save(str(KNAPSACK_DIR)) # Save for future runs
+            logger.info("Knapsack saved.")
         except Exception as e:
+            logger.info(f"Error generating or saving knapsack: {e}")
             gr.Warning("Failed to generate Knapsack. Knapsack Beam Search will not be available. {e}")
             KNAPSACK = None # Ensure it's None if generation failed
     if DEFAULT_CONFIG_PATH.exists():
          base_cfg = OmegaConf.load(DEFAULT_CONFIG_PATH)
     else:
+         logger.info(f"Warning: Default config not found at {DEFAULT_CONFIG_PATH}. Using minimal config.")
          # Create a minimal config if default is missing
          base_cfg = OmegaConf.create({
              "data_path": None,
         raise gr.Error("Please upload a mass spectrometry file.")
     input_path = input_file.name # Gradio provides the path in .name
+    logger.info(f"Processing file: {input_path}")
+    logger.info(f"Using decoding method: {decoding_method}")
     # Create a temporary file for the output CSV
     with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as temp_out:
     try:
         # 1. Create Config
         config = create_inference_config(input_path, output_csv_path, decoding_method)
+        logger.info(f"Inference Config:\n{OmegaConf.to_yaml(config)}")
         # 2. Load Data using SpectrumDataFrame
+        logger.info("Loading spectrum data...")
         try:
             sdf = SpectrumDataFrame.load(
                 config.data_path,
                 lambda row: (row["precursor_charge"] <= max_charge) and (row["precursor_charge"] > 0)
             )
             if len(sdf) < original_size:
+                logger.info(f"Warning: Filtered {original_size - len(sdf)} spectra with charge > {max_charge} or <= 0.")
             if len(sdf) == 0:
                  raise gr.Error("No valid spectra found in the uploaded file after filtering.")
+            logger.info(f"Data loaded: {len(sdf)} spectra.")
         except Exception as e:
+            logger.info(f"Error loading data: {e}")
             raise gr.Error(f"Failed to load or process the spectrum file. Error: {e}")
         # 3. Prepare Dataset and DataLoader
         )
         # 4. Select Decoder
+        logger.info("Initializing decoder...")
         decoder: Decoder
         if config.use_knapsack:
             if KNAPSACK is None:
             decoder = KnapsackBeamSearchDecoder(model=MODEL, knapsack=KNAPSACK)
         elif config.num_beams > 1:
              # BeamSearchDecoder is available but not explicitly requested, use Greedy for num_beams=1
+             logger.info(f"Warning: num_beams={config.num_beams} > 1 but only Greedy and Knapsack Beam Search are implemented in this app. Defaulting to Greedy.")
              decoder = GreedyDecoder(model=MODEL, mass_scale=MASS_SCALE)
         else:
              decoder = GreedyDecoder(
                  suppressed_residues=config.get("suppressed_residues", None),
                  disable_terminal_residues_anywhere=config.get("disable_terminal_residues_anywhere", True),
              )
+        logger.info(f"Using decoder: {type(decoder).__name__}")
         # 5. Run Prediction Loop (Adapted from instanovo/transformer/predict.py)
+        logger.info("Starting prediction...")
         start_time = time.time()
         results_list: list[ScoredSequence | list] = [] # Store ScoredSequence or empty list
                     return_beam=False # Only get the top prediction for simplicity
                 )
             results_list.extend(batch_predictions) # Should be list[ScoredSequence] or list[list]
+            logger.info(f"Processed batch {i+1}/{len(dl)}")
         end_time = time.time()
+        logger.info(f"Prediction finished in {end_time - start_time:.2f} seconds.")
         # 6. Format Results
+        logger.info("Formatting results...")
         output_data = []
         # Use sdf index columns + prediction results
         index_cols = [col for col in config.index_columns if col in sdf.df.columns]
                      min_abs_ppm = min(abs(p) for p in delta_mass_list) if delta_mass_list else float('nan')
                      row_data["delta_mass_ppm"] = f"{min_abs_ppm:.2f}"
                 except Exception as e:
+                     logger.info(f"Warning: Could not calculate delta mass for prediction {i}: {e}")
                      row_data["delta_mass_ppm"] = "N/A"
             else:
             if col in output_df.columns:
                 final_display_cols.append(col)
             else:
+                 logger.info(f"Warning: Expected display column '{col}' not found in results.")
         # Add any remaining index columns that weren't in display_cols
         for col in index_cols:
         # 7. Save full results to CSV
+        logger.info(f"Saving results to {output_csv_path}...")
         output_df.write_csv(output_csv_path)
         # Return DataFrame for display and path for download
         return output_df_display.to_pandas(), output_csv_path
     except Exception as e:
+        logger.info(f"An error occurred during prediction: {e}")
         # Clean up the temporary output file if it exists
         if os.path.exists(output_csv_path):
             os.remove(output_csv_path)
          """.format(MODEL_ID=MODEL_ID)
     )
+    # Add logging component
+    with gr.Accordion("Application Logs", open=False):
+        log_display = Log(log_file, dark=True, height=300)
 # --- Launch the App ---
 if __name__ == "__main__":
     # Set share=True for temporary public link if running locally

pyproject.toml CHANGED Viewed

@@ -6,6 +6,7 @@ readme = "README.md"
 requires-python = ">=3.12"
 dependencies = [
     "gradio>=5.23.1",
     "instanovo",
 ]

 requires-python = ">=3.12"
 dependencies = [
     "gradio>=5.23.1",
+    "gradio-log>=0.0.8",
     "instanovo",
 ]

uv.lock CHANGED Viewed

@@ -675,6 +675,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/15/c8/0df7f92c8f1bdf5c244c29de8cd7e33a5931768ddba245526a770bfa18a2/gradio_client-1.8.0-py3-none-any.whl", hash = "sha256:27a3ab5278a44d57d1d05a86de67cec5f7370e540600d11816744a620addb967", size = 322165 },
 ]
 [[package]]
 name = "groovy"
 version = "0.1.2"
@@ -868,12 +880,14 @@ version = "0.1.0"
 source = { virtual = "." }
 dependencies = [
     { name = "gradio" },
     { name = "instanovo" },
 ]
 [package.metadata]
 requires-dist = [
     { name = "gradio", specifier = ">=5.23.1" },
     { name = "instanovo", path = "../dtu-denovo-sequencing/dist/instanovo-1.1.0-py3-none-any.whl" },
 ]

     { url = "https://files.pythonhosted.org/packages/15/c8/0df7f92c8f1bdf5c244c29de8cd7e33a5931768ddba245526a770bfa18a2/gradio_client-1.8.0-py3-none-any.whl", hash = "sha256:27a3ab5278a44d57d1d05a86de67cec5f7370e540600d11816744a620addb967", size = 322165 },
 ]
+[[package]]
+name = "gradio-log"
+version = "0.0.8"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "gradio" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/6a/8d/368e16b93fdd2fc0b601ff648a7786e8551a7b4cc946faec0bdcff33ea4e/gradio_log-0.0.8.tar.gz", hash = "sha256:43d7aeb2651fb3b0583f6c205bdabc4ede50b47b531328174db9b0ca63cccf0c", size = 3457531 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4d/9b/5caba8a73175d2c6d8ebd72d92d1e6943f188cb5fff2196593a5f0d0dc1e/gradio_log-0.0.8-py3-none-any.whl", hash = "sha256:ed88db174429cc539c3bd9605891471f69544aafb930c36706a67a073459db48", size = 1220834 },
+]
 [[package]]
 name = "groovy"
 version = "0.1.2"
 source = { virtual = "." }
 dependencies = [
     { name = "gradio" },
+    { name = "gradio-log" },
     { name = "instanovo" },
 ]
 [package.metadata]
 requires-dist = [
     { name = "gradio", specifier = ">=5.23.1" },
+    { name = "gradio-log", specifier = ">=0.0.8" },
     { name = "instanovo", path = "../dtu-denovo-sequencing/dist/instanovo-1.1.0-py3-none-any.whl" },
 ]