Spaces:
Running
on
Zero
Running
on
Zero
fix: pass full residue set to knapsack
Browse files
app.py
CHANGED
@@ -68,12 +68,11 @@ Path(log_file).touch()
|
|
68 |
|
69 |
logger = logging.getLogger("instanovo")
|
70 |
logger.setLevel(logging.INFO)
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
logger.addHandler(stream_handler)
|
77 |
|
78 |
|
79 |
def load_models_and_knapsack():
|
@@ -147,33 +146,29 @@ def load_models_and_knapsack():
|
|
147 |
if not knapsack_exists:
|
148 |
logger.info("Knapsack not found or failed to load. Generating knapsack...")
|
149 |
try:
|
150 |
-
|
151 |
special_and_nonpositive = list(RESIDUE_SET.special_tokens) + [
|
152 |
-
k for k, v in
|
153 |
]
|
154 |
if special_and_nonpositive:
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
if res in residue_masses_knapsack
|
164 |
-
}
|
165 |
-
|
166 |
-
if not residue_masses_knapsack:
|
167 |
raise ValueError("No valid residues with positive mass found for knapsack generation.")
|
168 |
|
|
|
169 |
KNAPSACK = Knapsack.construct_knapsack(
|
170 |
-
residue_masses=
|
171 |
-
residue_indices=
|
172 |
max_mass=MAX_MASS,
|
173 |
mass_scale=MASS_SCALE,
|
174 |
)
|
175 |
logger.info(f"Knapsack generated. Saving to {KNAPSACK_DIR}...")
|
176 |
-
KNAPSACK_DIR.mkdir(parents=True, exist_ok=True)
|
177 |
KNAPSACK.save(str(KNAPSACK_DIR))
|
178 |
logger.info("Knapsack saved.")
|
179 |
except Exception as e:
|
@@ -717,6 +712,10 @@ with gr.Blocks(
|
|
717 |
* **Knapsack Beam Search:** use this for the best results and highest peptide recall, but is about 10x slower than Greedy Search.
|
718 |
* `delta_mass_ppm` shows the lowest absolute precursor mass error (ppm) across isotopes 0-1 for the final sequence.
|
719 |
* Check logs for progress, especially for large files or slower methods.
|
|
|
|
|
|
|
|
|
720 |
""",
|
721 |
elem_classes="feedback"
|
722 |
)
|
|
|
68 |
|
69 |
logger = logging.getLogger("instanovo")
|
70 |
logger.setLevel(logging.INFO)
|
71 |
+
file_handler = logging.FileHandler(log_file)
|
72 |
+
file_handler.setLevel(logging.INFO)
|
73 |
+
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
|
74 |
+
file_handler.setFormatter(formatter)
|
75 |
+
logger.addHandler(file_handler)
|
|
|
76 |
|
77 |
|
78 |
def load_models_and_knapsack():
|
|
|
146 |
if not knapsack_exists:
|
147 |
logger.info("Knapsack not found or failed to load. Generating knapsack...")
|
148 |
try:
|
149 |
+
residue_masses_for_calc = dict(RESIDUE_SET.residue_masses.copy())
|
150 |
special_and_nonpositive = list(RESIDUE_SET.special_tokens) + [
|
151 |
+
k for k, v in residue_masses_for_calc.items() if v <= 0
|
152 |
]
|
153 |
if special_and_nonpositive:
|
154 |
+
logger.info(f"Excluding special/non-positive mass residues from knapsack: {special_and_nonpositive}")
|
155 |
+
for res in set(special_and_nonpositive):
|
156 |
+
if res in residue_masses_for_calc:
|
157 |
+
del residue_masses_for_calc[res]
|
158 |
+
|
159 |
+
full_residue_indices = RESIDUE_SET.residue_to_index
|
160 |
+
|
161 |
+
if not residue_masses_for_calc: # Check if any residues are left for calculation
|
|
|
|
|
|
|
|
|
162 |
raise ValueError("No valid residues with positive mass found for knapsack generation.")
|
163 |
|
164 |
+
logger.info("Generating knapsack. This will take a few minutes, please be patient.")
|
165 |
KNAPSACK = Knapsack.construct_knapsack(
|
166 |
+
residue_masses=residue_masses_for_calc,
|
167 |
+
residue_indices=full_residue_indices,
|
168 |
max_mass=MAX_MASS,
|
169 |
mass_scale=MASS_SCALE,
|
170 |
)
|
171 |
logger.info(f"Knapsack generated. Saving to {KNAPSACK_DIR}...")
|
|
|
172 |
KNAPSACK.save(str(KNAPSACK_DIR))
|
173 |
logger.info("Knapsack saved.")
|
174 |
except Exception as e:
|
|
|
712 |
* **Knapsack Beam Search:** use this for the best results and highest peptide recall, but is about 10x slower than Greedy Search.
|
713 |
* `delta_mass_ppm` shows the lowest absolute precursor mass error (ppm) across isotopes 0-1 for the final sequence.
|
714 |
* Check logs for progress, especially for large files or slower methods.
|
715 |
+
|
716 |
+
**Links:**
|
717 |
+
* [InstaNovo enables diffusion-powered de novo peptide sequencing in large-scale proteomics experiments](https://www.nature.com/articles/s42256-025-01019-5), Eloff, Kalogeropoulos et al. 2025, Nature Machine Intelligence.
|
718 |
+
* [GitHub Repository for InstaNovo](https://github.com/instadeepai/instanovo)
|
719 |
""",
|
720 |
elem_classes="feedback"
|
721 |
)
|