BioGeek commited on
Commit
e7ec3c2
·
1 Parent(s): 2ce8475

fix: pass full residue set to knapsack

Browse files
Files changed (1) hide show
  1. app.py +22 -23
app.py CHANGED
@@ -68,12 +68,11 @@ Path(log_file).touch()
68
 
69
  logger = logging.getLogger("instanovo")
70
  logger.setLevel(logging.INFO)
71
- if not logger.handlers:
72
- file_handler = logging.FileHandler(log_file)
73
- file_handler.setLevel(logging.INFO)
74
- stream_handler = logging.StreamHandler()
75
- stream_handler.setLevel(logging.INFO)
76
- logger.addHandler(stream_handler)
77
 
78
 
79
  def load_models_and_knapsack():
@@ -147,33 +146,29 @@ def load_models_and_knapsack():
147
  if not knapsack_exists:
148
  logger.info("Knapsack not found or failed to load. Generating knapsack...")
149
  try:
150
- residue_masses_knapsack = dict(RESIDUE_SET.residue_masses.copy())
151
  special_and_nonpositive = list(RESIDUE_SET.special_tokens) + [
152
- k for k, v in residue_masses_knapsack.items() if v <= 0
153
  ]
154
  if special_and_nonpositive:
155
- logger.info(f"Excluding special/non-positive mass residues from knapsack: {special_and_nonpositive}")
156
- for res in set(special_and_nonpositive):
157
- if res in residue_masses_knapsack:
158
- del residue_masses_knapsack[res]
159
-
160
- valid_residue_indices = {
161
- res: idx
162
- for res, idx in RESIDUE_SET.residue_to_index.items()
163
- if res in residue_masses_knapsack
164
- }
165
-
166
- if not residue_masses_knapsack:
167
  raise ValueError("No valid residues with positive mass found for knapsack generation.")
168
 
 
169
  KNAPSACK = Knapsack.construct_knapsack(
170
- residue_masses=residue_masses_knapsack,
171
- residue_indices=valid_residue_indices,
172
  max_mass=MAX_MASS,
173
  mass_scale=MASS_SCALE,
174
  )
175
  logger.info(f"Knapsack generated. Saving to {KNAPSACK_DIR}...")
176
- KNAPSACK_DIR.mkdir(parents=True, exist_ok=True)
177
  KNAPSACK.save(str(KNAPSACK_DIR))
178
  logger.info("Knapsack saved.")
179
  except Exception as e:
@@ -717,6 +712,10 @@ with gr.Blocks(
717
  * **Knapsack Beam Search:** use this for the best results and highest peptide recall, but is about 10x slower than Greedy Search.
718
  * `delta_mass_ppm` shows the lowest absolute precursor mass error (ppm) across isotopes 0-1 for the final sequence.
719
  * Check logs for progress, especially for large files or slower methods.
 
 
 
 
720
  """,
721
  elem_classes="feedback"
722
  )
 
68
 
69
  logger = logging.getLogger("instanovo")
70
  logger.setLevel(logging.INFO)
71
+ file_handler = logging.FileHandler(log_file)
72
+ file_handler.setLevel(logging.INFO)
73
+ formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
74
+ file_handler.setFormatter(formatter)
75
+ logger.addHandler(file_handler)
 
76
 
77
 
78
  def load_models_and_knapsack():
 
146
  if not knapsack_exists:
147
  logger.info("Knapsack not found or failed to load. Generating knapsack...")
148
  try:
149
+ residue_masses_for_calc = dict(RESIDUE_SET.residue_masses.copy())
150
  special_and_nonpositive = list(RESIDUE_SET.special_tokens) + [
151
+ k for k, v in residue_masses_for_calc.items() if v <= 0
152
  ]
153
  if special_and_nonpositive:
154
+ logger.info(f"Excluding special/non-positive mass residues from knapsack: {special_and_nonpositive}")
155
+ for res in set(special_and_nonpositive):
156
+ if res in residue_masses_for_calc:
157
+ del residue_masses_for_calc[res]
158
+
159
+ full_residue_indices = RESIDUE_SET.residue_to_index
160
+
161
+ if not residue_masses_for_calc: # Check if any residues are left for calculation
 
 
 
 
162
  raise ValueError("No valid residues with positive mass found for knapsack generation.")
163
 
164
+ logger.info("Generating knapsack. This will take a few minutes, please be patient.")
165
  KNAPSACK = Knapsack.construct_knapsack(
166
+ residue_masses=residue_masses_for_calc,
167
+ residue_indices=full_residue_indices,
168
  max_mass=MAX_MASS,
169
  mass_scale=MASS_SCALE,
170
  )
171
  logger.info(f"Knapsack generated. Saving to {KNAPSACK_DIR}...")
 
172
  KNAPSACK.save(str(KNAPSACK_DIR))
173
  logger.info("Knapsack saved.")
174
  except Exception as e:
 
712
  * **Knapsack Beam Search:** use this for the best results and highest peptide recall, but is about 10x slower than Greedy Search.
713
  * `delta_mass_ppm` shows the lowest absolute precursor mass error (ppm) across isotopes 0-1 for the final sequence.
714
  * Check logs for progress, especially for large files or slower methods.
715
+
716
+ **Links:**
717
+ * [InstaNovo enables diffusion-powered de novo peptide sequencing in large-scale proteomics experiments](https://www.nature.com/articles/s42256-025-01019-5), Eloff, Kalogeropoulos et al. 2025, Nature Machine Intelligence.
718
+ * [GitHub Repository for InstaNovo](https://github.com/instadeepai/instanovo)
719
  """,
720
  elem_classes="feedback"
721
  )