BioGeek commited on
Commit
dd42569
·
1 Parent(s): e649e86

feat: add logging (WIP)

Browse files
Files changed (3) hide show
  1. app.py +47 -32
  2. pyproject.toml +1 -0
  3. uv.lock +14 -0
app.py CHANGED
@@ -5,8 +5,10 @@ import tempfile
5
  import time
6
  import polars as pl
7
  import numpy as np
 
8
  from pathlib import Path
9
  from omegaconf import OmegaConf, DictConfig
 
10
 
11
  # --- InstaNovo Imports ---
12
  try:
@@ -40,26 +42,35 @@ KNAPSACK: Knapsack | None = None
40
  MODEL_CONFIG: DictConfig | None = None
41
  RESIDUE_SET: ResidueSet | None = None
42
 
43
- # Assets
44
  gr.set_static_paths(paths=[Path.cwd().absolute()/"assets"])
45
 
 
 
 
 
 
 
 
 
 
46
 
47
  def load_model_and_knapsack():
48
  """Loads the InstaNovo model and generates/loads the knapsack."""
49
  global MODEL, KNAPSACK, MODEL_CONFIG, RESIDUE_SET
50
  if MODEL is not None:
51
- print("Model already loaded.")
52
  return
53
 
54
- print(f"Loading InstaNovo model: {MODEL_ID} to {DEVICE}...")
55
  try:
56
  MODEL, MODEL_CONFIG = InstaNovo.from_pretrained(MODEL_ID)
57
  MODEL.to(DEVICE)
58
  MODEL.eval()
59
  RESIDUE_SET = MODEL.residue_set
60
- print("Model loaded successfully.")
61
  except Exception as e:
62
- print(f"Error loading model: {e}")
63
  raise gr.Error(f"Failed to load InstaNovo model: {MODEL_ID}. Error: {e}")
64
 
65
  # --- Knapsack Handling ---
@@ -70,17 +81,17 @@ def load_model_and_knapsack():
70
  )
71
 
72
  if knapsack_exists:
73
- print(f"Loading pre-generated knapsack from {KNAPSACK_DIR}...")
74
  try:
75
  KNAPSACK = Knapsack.from_file(str(KNAPSACK_DIR))
76
- print("Knapsack loaded successfully.")
77
  except Exception as e:
78
- print(f"Error loading knapsack: {e}. Will attempt to regenerate.")
79
  KNAPSACK = None # Force regeneration
80
  knapsack_exists = False # Ensure generation happens
81
 
82
  if not knapsack_exists:
83
- print("Knapsack not found or failed to load. Generating knapsack...")
84
  if RESIDUE_SET is None:
85
  raise gr.Error("Cannot generate knapsack because ResidueSet failed to load.")
86
  try:
@@ -88,7 +99,7 @@ def load_model_and_knapsack():
88
  residue_masses_knapsack = dict(RESIDUE_SET.residue_masses.copy())
89
  negative_residues = [k for k, v in residue_masses_knapsack.items() if v <= 0]
90
  if negative_residues:
91
- print(f"Warning: Non-positive masses found in residues: {negative_residues}. "
92
  "Excluding from knapsack generation.")
93
  for res in negative_residues:
94
  del residue_masses_knapsack[res]
@@ -110,11 +121,11 @@ def load_model_and_knapsack():
110
  max_mass=MAX_MASS,
111
  mass_scale=MASS_SCALE,
112
  )
113
- print(f"Knapsack generated. Saving to {KNAPSACK_DIR}...")
114
  KNAPSACK.save(str(KNAPSACK_DIR)) # Save for future runs
115
- print("Knapsack saved.")
116
  except Exception as e:
117
- print(f"Error generating or saving knapsack: {e}")
118
  gr.Warning("Failed to generate Knapsack. Knapsack Beam Search will not be available. {e}")
119
  KNAPSACK = None # Ensure it's None if generation failed
120
 
@@ -131,7 +142,7 @@ def create_inference_config(
131
  if DEFAULT_CONFIG_PATH.exists():
132
  base_cfg = OmegaConf.load(DEFAULT_CONFIG_PATH)
133
  else:
134
- print(f"Warning: Default config not found at {DEFAULT_CONFIG_PATH}. Using minimal config.")
135
  # Create a minimal config if default is missing
136
  base_cfg = OmegaConf.create({
137
  "data_path": None,
@@ -217,8 +228,8 @@ def predict_peptides(input_file, decoding_method):
217
  raise gr.Error("Please upload a mass spectrometry file.")
218
 
219
  input_path = input_file.name # Gradio provides the path in .name
220
- print(f"Processing file: {input_path}")
221
- print(f"Using decoding method: {decoding_method}")
222
 
223
  # Create a temporary file for the output CSV
224
  with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as temp_out:
@@ -227,10 +238,10 @@ def predict_peptides(input_file, decoding_method):
227
  try:
228
  # 1. Create Config
229
  config = create_inference_config(input_path, output_csv_path, decoding_method)
230
- print("Inference Config:\n", OmegaConf.to_yaml(config))
231
 
232
  # 2. Load Data using SpectrumDataFrame
233
- print("Loading spectrum data...")
234
  try:
235
  sdf = SpectrumDataFrame.load(
236
  config.data_path,
@@ -247,13 +258,13 @@ def predict_peptides(input_file, decoding_method):
247
  lambda row: (row["precursor_charge"] <= max_charge) and (row["precursor_charge"] > 0)
248
  )
249
  if len(sdf) < original_size:
250
- print(f"Warning: Filtered {original_size - len(sdf)} spectra with charge > {max_charge} or <= 0.")
251
 
252
  if len(sdf) == 0:
253
  raise gr.Error("No valid spectra found in the uploaded file after filtering.")
254
- print(f"Data loaded: {len(sdf)} spectra.")
255
  except Exception as e:
256
- print(f"Error loading data: {e}")
257
  raise gr.Error(f"Failed to load or process the spectrum file. Error: {e}")
258
 
259
  # 3. Prepare Dataset and DataLoader
@@ -275,7 +286,7 @@ def predict_peptides(input_file, decoding_method):
275
  )
276
 
277
  # 4. Select Decoder
278
- print("Initializing decoder...")
279
  decoder: Decoder
280
  if config.use_knapsack:
281
  if KNAPSACK is None:
@@ -287,7 +298,7 @@ def predict_peptides(input_file, decoding_method):
287
  decoder = KnapsackBeamSearchDecoder(model=MODEL, knapsack=KNAPSACK)
288
  elif config.num_beams > 1:
289
  # BeamSearchDecoder is available but not explicitly requested, use Greedy for num_beams=1
290
- print(f"Warning: num_beams={config.num_beams} > 1 but only Greedy and Knapsack Beam Search are implemented in this app. Defaulting to Greedy.")
291
  decoder = GreedyDecoder(model=MODEL, mass_scale=MASS_SCALE)
292
  else:
293
  decoder = GreedyDecoder(
@@ -297,10 +308,10 @@ def predict_peptides(input_file, decoding_method):
297
  suppressed_residues=config.get("suppressed_residues", None),
298
  disable_terminal_residues_anywhere=config.get("disable_terminal_residues_anywhere", True),
299
  )
300
- print(f"Using decoder: {type(decoder).__name__}")
301
 
302
  # 5. Run Prediction Loop (Adapted from instanovo/transformer/predict.py)
303
- print("Starting prediction...")
304
  start_time = time.time()
305
  results_list: list[ScoredSequence | list] = [] # Store ScoredSequence or empty list
306
 
@@ -325,13 +336,13 @@ def predict_peptides(input_file, decoding_method):
325
  return_beam=False # Only get the top prediction for simplicity
326
  )
327
  results_list.extend(batch_predictions) # Should be list[ScoredSequence] or list[list]
328
- print(f"Processed batch {i+1}/{len(dl)}")
329
 
330
  end_time = time.time()
331
- print(f"Prediction finished in {end_time - start_time:.2f} seconds.")
332
 
333
  # 6. Format Results
334
- print("Formatting results...")
335
  output_data = []
336
  # Use sdf index columns + prediction results
337
  index_cols = [col for col in config.index_columns if col in sdf.df.columns]
@@ -356,7 +367,7 @@ def predict_peptides(input_file, decoding_method):
356
  min_abs_ppm = min(abs(p) for p in delta_mass_list) if delta_mass_list else float('nan')
357
  row_data["delta_mass_ppm"] = f"{min_abs_ppm:.2f}"
358
  except Exception as e:
359
- print(f"Warning: Could not calculate delta mass for prediction {i}: {e}")
360
  row_data["delta_mass_ppm"] = "N/A"
361
 
362
  else:
@@ -374,7 +385,7 @@ def predict_peptides(input_file, decoding_method):
374
  if col in output_df.columns:
375
  final_display_cols.append(col)
376
  else:
377
- print(f"Warning: Expected display column '{col}' not found in results.")
378
 
379
  # Add any remaining index columns that weren't in display_cols
380
  for col in index_cols:
@@ -385,14 +396,14 @@ def predict_peptides(input_file, decoding_method):
385
 
386
 
387
  # 7. Save full results to CSV
388
- print(f"Saving results to {output_csv_path}...")
389
  output_df.write_csv(output_csv_path)
390
 
391
  # Return DataFrame for display and path for download
392
  return output_df_display.to_pandas(), output_csv_path
393
 
394
  except Exception as e:
395
- print(f"An error occurred during prediction: {e}")
396
  # Clean up the temporary output file if it exists
397
  if os.path.exists(output_csv_path):
398
  os.remove(output_csv_path)
@@ -469,6 +480,10 @@ with gr.Blocks(css=css, theme=gr.themes.Default(primary_hue="blue", secondary_hu
469
  """.format(MODEL_ID=MODEL_ID)
470
  )
471
 
 
 
 
 
472
  # --- Launch the App ---
473
  if __name__ == "__main__":
474
  # Set share=True for temporary public link if running locally
 
5
  import time
6
  import polars as pl
7
  import numpy as np
8
+ import logging
9
  from pathlib import Path
10
  from omegaconf import OmegaConf, DictConfig
11
+ from gradio_log import Log
12
 
13
  # --- InstaNovo Imports ---
14
  try:
 
42
  MODEL_CONFIG: DictConfig | None = None
43
  RESIDUE_SET: ResidueSet | None = None
44
 
45
+ # --- Assets ---
46
  gr.set_static_paths(paths=[Path.cwd().absolute()/"assets"])
47
 
48
+ # Logging configuration
49
+ log_file = "/tmp/instanovo_gradio_log.txt"
50
+ Path(log_file).touch()
51
+
52
+ file_handler = logging.FileHandler(log_file)
53
+ file_handler.setLevel(logging.DEBUG)
54
+
55
+ logger = logging.getLogger("instanovo")
56
+ logger.setLevel(logging.DEBUG)
57
 
58
  def load_model_and_knapsack():
59
  """Loads the InstaNovo model and generates/loads the knapsack."""
60
  global MODEL, KNAPSACK, MODEL_CONFIG, RESIDUE_SET
61
  if MODEL is not None:
62
+ logger.info("Model already loaded.")
63
  return
64
 
65
+ logger.info(f"Loading InstaNovo model: {MODEL_ID} to {DEVICE}...")
66
  try:
67
  MODEL, MODEL_CONFIG = InstaNovo.from_pretrained(MODEL_ID)
68
  MODEL.to(DEVICE)
69
  MODEL.eval()
70
  RESIDUE_SET = MODEL.residue_set
71
+ logger.info("Model loaded successfully.")
72
  except Exception as e:
73
+ logger.error(f"Error loading model: {e}")
74
  raise gr.Error(f"Failed to load InstaNovo model: {MODEL_ID}. Error: {e}")
75
 
76
  # --- Knapsack Handling ---
 
81
  )
82
 
83
  if knapsack_exists:
84
+ logger.info(f"Loading pre-generated knapsack from {KNAPSACK_DIR}...")
85
  try:
86
  KNAPSACK = Knapsack.from_file(str(KNAPSACK_DIR))
87
+ logger.info("Knapsack loaded successfully.")
88
  except Exception as e:
89
+ logger.info(f"Error loading knapsack: {e}. Will attempt to regenerate.")
90
  KNAPSACK = None # Force regeneration
91
  knapsack_exists = False # Ensure generation happens
92
 
93
  if not knapsack_exists:
94
+ logger.info("Knapsack not found or failed to load. Generating knapsack...")
95
  if RESIDUE_SET is None:
96
  raise gr.Error("Cannot generate knapsack because ResidueSet failed to load.")
97
  try:
 
99
  residue_masses_knapsack = dict(RESIDUE_SET.residue_masses.copy())
100
  negative_residues = [k for k, v in residue_masses_knapsack.items() if v <= 0]
101
  if negative_residues:
102
+ logger.info(f"Warning: Non-positive masses found in residues: {negative_residues}. "
103
  "Excluding from knapsack generation.")
104
  for res in negative_residues:
105
  del residue_masses_knapsack[res]
 
121
  max_mass=MAX_MASS,
122
  mass_scale=MASS_SCALE,
123
  )
124
+ logger.info(f"Knapsack generated. Saving to {KNAPSACK_DIR}...")
125
  KNAPSACK.save(str(KNAPSACK_DIR)) # Save for future runs
126
+ logger.info("Knapsack saved.")
127
  except Exception as e:
128
+ logger.info(f"Error generating or saving knapsack: {e}")
129
  gr.Warning("Failed to generate Knapsack. Knapsack Beam Search will not be available. {e}")
130
  KNAPSACK = None # Ensure it's None if generation failed
131
 
 
142
  if DEFAULT_CONFIG_PATH.exists():
143
  base_cfg = OmegaConf.load(DEFAULT_CONFIG_PATH)
144
  else:
145
+ logger.info(f"Warning: Default config not found at {DEFAULT_CONFIG_PATH}. Using minimal config.")
146
  # Create a minimal config if default is missing
147
  base_cfg = OmegaConf.create({
148
  "data_path": None,
 
228
  raise gr.Error("Please upload a mass spectrometry file.")
229
 
230
  input_path = input_file.name # Gradio provides the path in .name
231
+ logger.info(f"Processing file: {input_path}")
232
+ logger.info(f"Using decoding method: {decoding_method}")
233
 
234
  # Create a temporary file for the output CSV
235
  with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as temp_out:
 
238
  try:
239
  # 1. Create Config
240
  config = create_inference_config(input_path, output_csv_path, decoding_method)
241
+ logger.info(f"Inference Config:\n{OmegaConf.to_yaml(config)}")
242
 
243
  # 2. Load Data using SpectrumDataFrame
244
+ logger.info("Loading spectrum data...")
245
  try:
246
  sdf = SpectrumDataFrame.load(
247
  config.data_path,
 
258
  lambda row: (row["precursor_charge"] <= max_charge) and (row["precursor_charge"] > 0)
259
  )
260
  if len(sdf) < original_size:
261
+ logger.info(f"Warning: Filtered {original_size - len(sdf)} spectra with charge > {max_charge} or <= 0.")
262
 
263
  if len(sdf) == 0:
264
  raise gr.Error("No valid spectra found in the uploaded file after filtering.")
265
+ logger.info(f"Data loaded: {len(sdf)} spectra.")
266
  except Exception as e:
267
+ logger.info(f"Error loading data: {e}")
268
  raise gr.Error(f"Failed to load or process the spectrum file. Error: {e}")
269
 
270
  # 3. Prepare Dataset and DataLoader
 
286
  )
287
 
288
  # 4. Select Decoder
289
+ logger.info("Initializing decoder...")
290
  decoder: Decoder
291
  if config.use_knapsack:
292
  if KNAPSACK is None:
 
298
  decoder = KnapsackBeamSearchDecoder(model=MODEL, knapsack=KNAPSACK)
299
  elif config.num_beams > 1:
300
  # BeamSearchDecoder is available but not explicitly requested, use Greedy for num_beams=1
301
+ logger.info(f"Warning: num_beams={config.num_beams} > 1 but only Greedy and Knapsack Beam Search are implemented in this app. Defaulting to Greedy.")
302
  decoder = GreedyDecoder(model=MODEL, mass_scale=MASS_SCALE)
303
  else:
304
  decoder = GreedyDecoder(
 
308
  suppressed_residues=config.get("suppressed_residues", None),
309
  disable_terminal_residues_anywhere=config.get("disable_terminal_residues_anywhere", True),
310
  )
311
+ logger.info(f"Using decoder: {type(decoder).__name__}")
312
 
313
  # 5. Run Prediction Loop (Adapted from instanovo/transformer/predict.py)
314
+ logger.info("Starting prediction...")
315
  start_time = time.time()
316
  results_list: list[ScoredSequence | list] = [] # Store ScoredSequence or empty list
317
 
 
336
  return_beam=False # Only get the top prediction for simplicity
337
  )
338
  results_list.extend(batch_predictions) # Should be list[ScoredSequence] or list[list]
339
+ logger.info(f"Processed batch {i+1}/{len(dl)}")
340
 
341
  end_time = time.time()
342
+ logger.info(f"Prediction finished in {end_time - start_time:.2f} seconds.")
343
 
344
  # 6. Format Results
345
+ logger.info("Formatting results...")
346
  output_data = []
347
  # Use sdf index columns + prediction results
348
  index_cols = [col for col in config.index_columns if col in sdf.df.columns]
 
367
  min_abs_ppm = min(abs(p) for p in delta_mass_list) if delta_mass_list else float('nan')
368
  row_data["delta_mass_ppm"] = f"{min_abs_ppm:.2f}"
369
  except Exception as e:
370
+ logger.info(f"Warning: Could not calculate delta mass for prediction {i}: {e}")
371
  row_data["delta_mass_ppm"] = "N/A"
372
 
373
  else:
 
385
  if col in output_df.columns:
386
  final_display_cols.append(col)
387
  else:
388
+ logger.info(f"Warning: Expected display column '{col}' not found in results.")
389
 
390
  # Add any remaining index columns that weren't in display_cols
391
  for col in index_cols:
 
396
 
397
 
398
  # 7. Save full results to CSV
399
+ logger.info(f"Saving results to {output_csv_path}...")
400
  output_df.write_csv(output_csv_path)
401
 
402
  # Return DataFrame for display and path for download
403
  return output_df_display.to_pandas(), output_csv_path
404
 
405
  except Exception as e:
406
+ logger.info(f"An error occurred during prediction: {e}")
407
  # Clean up the temporary output file if it exists
408
  if os.path.exists(output_csv_path):
409
  os.remove(output_csv_path)
 
480
  """.format(MODEL_ID=MODEL_ID)
481
  )
482
 
483
+ # Add logging component
484
+ with gr.Accordion("Application Logs", open=False):
485
+ log_display = Log(log_file, dark=True, height=300)
486
+
487
  # --- Launch the App ---
488
  if __name__ == "__main__":
489
  # Set share=True for temporary public link if running locally
pyproject.toml CHANGED
@@ -6,6 +6,7 @@ readme = "README.md"
6
  requires-python = ">=3.12"
7
  dependencies = [
8
  "gradio>=5.23.1",
 
9
  "instanovo",
10
  ]
11
 
 
6
  requires-python = ">=3.12"
7
  dependencies = [
8
  "gradio>=5.23.1",
9
+ "gradio-log>=0.0.8",
10
  "instanovo",
11
  ]
12
 
uv.lock CHANGED
@@ -675,6 +675,18 @@ wheels = [
675
  { url = "https://files.pythonhosted.org/packages/15/c8/0df7f92c8f1bdf5c244c29de8cd7e33a5931768ddba245526a770bfa18a2/gradio_client-1.8.0-py3-none-any.whl", hash = "sha256:27a3ab5278a44d57d1d05a86de67cec5f7370e540600d11816744a620addb967", size = 322165 },
676
  ]
677
 
 
 
 
 
 
 
 
 
 
 
 
 
678
  [[package]]
679
  name = "groovy"
680
  version = "0.1.2"
@@ -868,12 +880,14 @@ version = "0.1.0"
868
  source = { virtual = "." }
869
  dependencies = [
870
  { name = "gradio" },
 
871
  { name = "instanovo" },
872
  ]
873
 
874
  [package.metadata]
875
  requires-dist = [
876
  { name = "gradio", specifier = ">=5.23.1" },
 
877
  { name = "instanovo", path = "../dtu-denovo-sequencing/dist/instanovo-1.1.0-py3-none-any.whl" },
878
  ]
879
 
 
675
  { url = "https://files.pythonhosted.org/packages/15/c8/0df7f92c8f1bdf5c244c29de8cd7e33a5931768ddba245526a770bfa18a2/gradio_client-1.8.0-py3-none-any.whl", hash = "sha256:27a3ab5278a44d57d1d05a86de67cec5f7370e540600d11816744a620addb967", size = 322165 },
676
  ]
677
 
678
+ [[package]]
679
+ name = "gradio-log"
680
+ version = "0.0.8"
681
+ source = { registry = "https://pypi.org/simple" }
682
+ dependencies = [
683
+ { name = "gradio" },
684
+ ]
685
+ sdist = { url = "https://files.pythonhosted.org/packages/6a/8d/368e16b93fdd2fc0b601ff648a7786e8551a7b4cc946faec0bdcff33ea4e/gradio_log-0.0.8.tar.gz", hash = "sha256:43d7aeb2651fb3b0583f6c205bdabc4ede50b47b531328174db9b0ca63cccf0c", size = 3457531 }
686
+ wheels = [
687
+ { url = "https://files.pythonhosted.org/packages/4d/9b/5caba8a73175d2c6d8ebd72d92d1e6943f188cb5fff2196593a5f0d0dc1e/gradio_log-0.0.8-py3-none-any.whl", hash = "sha256:ed88db174429cc539c3bd9605891471f69544aafb930c36706a67a073459db48", size = 1220834 },
688
+ ]
689
+
690
  [[package]]
691
  name = "groovy"
692
  version = "0.1.2"
 
880
  source = { virtual = "." }
881
  dependencies = [
882
  { name = "gradio" },
883
+ { name = "gradio-log" },
884
  { name = "instanovo" },
885
  ]
886
 
887
  [package.metadata]
888
  requires-dist = [
889
  { name = "gradio", specifier = ">=5.23.1" },
890
+ { name = "gradio-log", specifier = ">=0.0.8" },
891
  { name = "instanovo", path = "../dtu-denovo-sequencing/dist/instanovo-1.1.0-py3-none-any.whl" },
892
  ]
893