Spaces:
Running
on
Zero
Running
on
Zero
docs: update Notes
Browse files
app.py
CHANGED
@@ -39,9 +39,6 @@ except ImportError as e:
|
|
39 |
TRANSFORMER_MODEL_ID = "instanovo-v1.1.0"
|
40 |
DIFFUSION_MODEL_ID = "instanovoplus-v1.1.0-alpha"
|
41 |
KNAPSACK_DIR = Path("./knapsack_cache")
|
42 |
-
DEFAULT_CONFIG_PATH = Path(
|
43 |
-
"./configs/inference/default.yaml"
|
44 |
-
)
|
45 |
|
46 |
# Determine device
|
47 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
@@ -198,43 +195,38 @@ def create_inference_config(
|
|
198 |
output_path: str,
|
199 |
) -> DictConfig:
|
200 |
"""Creates a base OmegaConf DictConfig for prediction environment."""
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
"index_columns": [
|
234 |
-
"scan_number", "precursor_mz", "precursor_charge",
|
235 |
-
"retention_time", "spectrum_id", "experiment_name",
|
236 |
-
],
|
237 |
-
})
|
238 |
|
239 |
cfg_overrides = {
|
240 |
"data_path": input_path, "output_path": output_path,
|
@@ -524,7 +516,7 @@ def predict_peptides(input_file, mode_selection, transformer_decoder_selection):
|
|
524 |
logger.info("--- New Prediction Request ---")
|
525 |
logger.info(f"Input File: {input_path}")
|
526 |
logger.info(f"Selected Mode: {mode_selection}")
|
527 |
-
if "
|
528 |
logger.info(f"Selected Transformer Decoder: {transformer_decoder_selection}")
|
529 |
|
530 |
# Create temp output file
|
@@ -630,7 +622,7 @@ def predict_peptides(input_file, mode_selection, transformer_decoder_selection):
|
|
630 |
row_data["prediction"] = ""
|
631 |
results_data.append(row_data)
|
632 |
|
633 |
-
elif "
|
634 |
output_headers.extend([
|
635 |
"transformer_prediction", "transformer_log_probability",
|
636 |
"refined_prediction", "refined_log_probability", "refined_delta_mass_ppm"
|
@@ -713,8 +705,20 @@ with gr.Blocks(
|
|
713 |
Upload your mass spectrometry data file (.mgf, .mzml, or .mzxml) and get peptide sequence predictions.
|
714 |
Choose your prediction method and decoding options.
|
715 |
|
716 |
-
|
717 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
718 |
)
|
719 |
with gr.Row():
|
720 |
with gr.Column(scale=1):
|
@@ -790,20 +794,6 @@ with gr.Blocks(
|
|
790 |
label="Example Usage:",
|
791 |
)
|
792 |
|
793 |
-
gr.Markdown(
|
794 |
-
f"""
|
795 |
-
**Notes:**
|
796 |
-
* Predictions use `{TRANSFORMER_MODEL_ID}` (Transformer) and `{DIFFUSION_MODEL_ID}` (Diffusion, Alpha release).
|
797 |
-
* **Refinement Mode:** Runs initial prediction with the selected Transformer method (Greedy/Knapsack), then refines using InstaNovo+.
|
798 |
-
* **InstaNovo Only Mode:** Uses only the Transformer with the selected decoding method.
|
799 |
-
* **InstaNovo+ Only Mode:** Predicts directly using the Diffusion model (alpha version).
|
800 |
-
* `delta_mass_ppm` shows the lowest absolute precursor mass error (ppm) across isotopes 0-1 for the final sequence.
|
801 |
-
* Knapsack Beam Search requires a pre-computed knapsack file. If unavailable, the option will be disabled.
|
802 |
-
* Check logs for progress, especially for large files or slower methods.
|
803 |
-
""",
|
804 |
-
elem_classes="feedback"
|
805 |
-
)
|
806 |
-
|
807 |
with gr.Accordion("Application Logs", open=True):
|
808 |
log_display = Log(log_file, dark=True, height=300)
|
809 |
|
|
|
39 |
TRANSFORMER_MODEL_ID = "instanovo-v1.1.0"
|
40 |
DIFFUSION_MODEL_ID = "instanovoplus-v1.1.0-alpha"
|
41 |
KNAPSACK_DIR = Path("./knapsack_cache")
|
|
|
|
|
|
|
42 |
|
43 |
# Determine device
|
44 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
195 |
output_path: str,
|
196 |
) -> DictConfig:
|
197 |
"""Creates a base OmegaConf DictConfig for prediction environment."""
|
198 |
+
base_cfg = OmegaConf.create({
|
199 |
+
"data_path": None, "instanovo_model": TRANSFORMER_MODEL_ID,
|
200 |
+
"instanovoplus_model": DIFFUSION_MODEL_ID, "output_path": None,
|
201 |
+
"knapsack_path": str(KNAPSACK_DIR), "denovo": True, "refine": True,
|
202 |
+
"num_beams": 1, "max_length": 40, "max_charge": 10,
|
203 |
+
"isotope_error_range": [0, 1], "subset": 1.0, "use_knapsack": False,
|
204 |
+
"save_beams": False, "batch_size": 64, "device": DEVICE, "fp16": FP16,
|
205 |
+
"log_interval": 500, "use_basic_logging": True,
|
206 |
+
"filter_precursor_ppm": 20, "filter_confidence": 1e-4,
|
207 |
+
"filter_fdr_threshold": 0.05, "suppressed_residues": None,
|
208 |
+
"disable_terminal_residues_anywhere": True,
|
209 |
+
"residue_remapping": {
|
210 |
+
"M(ox)": "M[UNIMOD:35]", "M(+15.99)": "M[UNIMOD:35]",
|
211 |
+
"S(p)": "S[UNIMOD:21]", "T(p)": "T[UNIMOD:21]", "Y(p)": "Y[UNIMOD:21]",
|
212 |
+
"S(+79.97)": "S[UNIMOD:21]", "T(+79.97)": "T[UNIMOD:21]", "Y(+79.97)": "Y[UNIMOD:21]",
|
213 |
+
"Q(+0.98)": "Q[UNIMOD:7]", "N(+0.98)": "N[UNIMOD:7]",
|
214 |
+
"Q(+.98)": "Q[UNIMOD:7]", "N(+.98)": "N[UNIMOD:7]",
|
215 |
+
"C(+57.02)": "C[UNIMOD:4]", "(+42.01)": "[UNIMOD:1]",
|
216 |
+
"(+43.01)": "[UNIMOD:5]", "(-17.03)": "[UNIMOD:385]",
|
217 |
+
},
|
218 |
+
"column_map": {
|
219 |
+
"Modified sequence": "modified_sequence", "MS/MS m/z": "precursor_mz",
|
220 |
+
"Mass": "precursor_mass", "Charge": "precursor_charge",
|
221 |
+
"Mass values": "mz_array", "Mass spectrum": "mz_array",
|
222 |
+
"Intensity": "intensity_array", "Raw intensity spectrum": "intensity_array",
|
223 |
+
"Scan number": "scan_number"
|
224 |
+
},
|
225 |
+
"index_columns": [
|
226 |
+
"scan_number", "precursor_mz", "precursor_charge",
|
227 |
+
"retention_time", "spectrum_id", "experiment_name",
|
228 |
+
],
|
229 |
+
})
|
|
|
|
|
|
|
|
|
|
|
230 |
|
231 |
cfg_overrides = {
|
232 |
"data_path": input_path, "output_path": output_path,
|
|
|
516 |
logger.info("--- New Prediction Request ---")
|
517 |
logger.info(f"Input File: {input_path}")
|
518 |
logger.info(f"Selected Mode: {mode_selection}")
|
519 |
+
if "refinement" in mode_selection or "InstaNovo Only" in mode_selection:
|
520 |
logger.info(f"Selected Transformer Decoder: {transformer_decoder_selection}")
|
521 |
|
522 |
# Create temp output file
|
|
|
622 |
row_data["prediction"] = ""
|
623 |
results_data.append(row_data)
|
624 |
|
625 |
+
elif "refinement" in mode_selection:
|
626 |
output_headers.extend([
|
627 |
"transformer_prediction", "transformer_log_probability",
|
628 |
"refined_prediction", "refined_log_probability", "refined_delta_mass_ppm"
|
|
|
705 |
Upload your mass spectrometry data file (.mgf, .mzml, or .mzxml) and get peptide sequence predictions.
|
706 |
Choose your prediction method and decoding options.
|
707 |
|
708 |
+
**Notes:**
|
709 |
+
* Predictions use version `{TRANSFORMER_MODEL_ID}` for the transformer-based InstaNovo model and version `{DIFFUSION_MODEL_ID}` for the diffusion-based InstaNovo+ model.
|
710 |
+
* The InstaNovo+ model `{DIFFUSION_MODEL_ID}` is an alpha release.
|
711 |
+
* **Predction Modes:**
|
712 |
+
* **InstaNovo with InstaNovo+ refinement** Runs initial prediction with the selected Transformer method (Greedy/Knapsack), then refines using InstaNovo+.
|
713 |
+
* **InstaNovo Only:** Uses only the Transformer with the selected decoding method.
|
714 |
+
* **InstaNovo+ Only:** Predicts directly using the Diffusion model (alpha release).
|
715 |
+
* **Transformer Decoding Methods:**
|
716 |
+
* **Greedy Search:** use this for optimal performance, has similar performance as Knapsack Beam Search at 5% FDR.
|
717 |
+
* **Knapsack Beam Search:** use this for the best results and highest peptide recall, but is about 10x slower than Greedy Search.
|
718 |
+
* `delta_mass_ppm` shows the lowest absolute precursor mass error (ppm) across isotopes 0-1 for the final sequence.
|
719 |
+
* Check logs for progress, especially for large files or slower methods.
|
720 |
+
""",
|
721 |
+
elem_classes="feedback"
|
722 |
)
|
723 |
with gr.Row():
|
724 |
with gr.Column(scale=1):
|
|
|
794 |
label="Example Usage:",
|
795 |
)
|
796 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
797 |
with gr.Accordion("Application Logs", open=True):
|
798 |
log_display = Log(log_file, dark=True, height=300)
|
799 |
|