Spaces:

InstaDeepAI
/

InstaNovo

Running on Zero

App Files Files Community

BioGeek commited on Mar 30

Commit

2ce8475

1 Parent(s): 68f76b5

docs: update Notes

Browse files

Files changed (1) hide show

app.py +48 -58

app.py CHANGED Viewed

@@ -39,9 +39,6 @@ except ImportError as e:
 TRANSFORMER_MODEL_ID = "instanovo-v1.1.0"
 DIFFUSION_MODEL_ID = "instanovoplus-v1.1.0-alpha"
 KNAPSACK_DIR = Path("./knapsack_cache")
-DEFAULT_CONFIG_PATH = Path(
-    "./configs/inference/default.yaml"
-)
 # Determine device
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
@@ -198,43 +195,38 @@ def create_inference_config(
     output_path: str,
 ) -> DictConfig:
     """Creates a base OmegaConf DictConfig for prediction environment."""
-    if DEFAULT_CONFIG_PATH.exists():
-        base_cfg = OmegaConf.load(DEFAULT_CONFIG_PATH)
-        logger.info(f"Loaded base config from {DEFAULT_CONFIG_PATH}")
-    else:
-         logger.info(f"Warning: Default config not found at {DEFAULT_CONFIG_PATH}. Using minimal config.")
-         base_cfg = OmegaConf.create({
-             "data_path": None, "instanovo_model": TRANSFORMER_MODEL_ID,
-             "instanovoplus_model": DIFFUSION_MODEL_ID, "output_path": None,
-             "knapsack_path": str(KNAPSACK_DIR), "denovo": True, "refine": True,
-             "num_beams": 1, "max_length": 40, "max_charge": 10,
-             "isotope_error_range": [0, 1], "subset": 1.0, "use_knapsack": False,
-             "save_beams": False, "batch_size": 64, "device": DEVICE, "fp16": FP16,
-             "log_interval": 500, "use_basic_logging": True,
-             "filter_precursor_ppm": 20, "filter_confidence": 1e-4,
-             "filter_fdr_threshold": 0.05, "suppressed_residues": None,
-             "disable_terminal_residues_anywhere": True,
-             "residue_remapping": {
-                 "M(ox)": "M[UNIMOD:35]", "M(+15.99)": "M[UNIMOD:35]",
-                 "S(p)": "S[UNIMOD:21]", "T(p)": "T[UNIMOD:21]", "Y(p)": "Y[UNIMOD:21]",
-                 "S(+79.97)": "S[UNIMOD:21]", "T(+79.97)": "T[UNIMOD:21]", "Y(+79.97)": "Y[UNIMOD:21]",
-                 "Q(+0.98)": "Q[UNIMOD:7]", "N(+0.98)": "N[UNIMOD:7]",
-                 "Q(+.98)": "Q[UNIMOD:7]", "N(+.98)": "N[UNIMOD:7]",
-                 "C(+57.02)": "C[UNIMOD:4]", "(+42.01)": "[UNIMOD:1]",
-                 "(+43.01)": "[UNIMOD:5]", "(-17.03)": "[UNIMOD:385]",
-             },
-             "column_map": {
-                "Modified sequence": "modified_sequence", "MS/MS m/z": "precursor_mz",
-                "Mass": "precursor_mass", "Charge": "precursor_charge",
-                "Mass values": "mz_array", "Mass spectrum": "mz_array",
-                "Intensity": "intensity_array", "Raw intensity spectrum": "intensity_array",
-                "Scan number": "scan_number"
-             },
-             "index_columns": [
-                 "scan_number", "precursor_mz", "precursor_charge",
-                 "retention_time", "spectrum_id", "experiment_name",
-             ],
-         })
     cfg_overrides = {
         "data_path": input_path, "output_path": output_path,
@@ -524,7 +516,7 @@ def predict_peptides(input_file, mode_selection, transformer_decoder_selection):
     logger.info("--- New Prediction Request ---")
     logger.info(f"Input File: {input_path}")
     logger.info(f"Selected Mode: {mode_selection}")
-    if "Refinement" in mode_selection or "InstaNovo Only" in mode_selection:
         logger.info(f"Selected Transformer Decoder: {transformer_decoder_selection}")
     # Create temp output file
@@ -630,7 +622,7 @@ def predict_peptides(input_file, mode_selection, transformer_decoder_selection):
                      row_data["prediction"] = ""
                  results_data.append(row_data)
-        elif "Refinement" in mode_selection:
              output_headers.extend([
                  "transformer_prediction", "transformer_log_probability",
                  "refined_prediction", "refined_log_probability", "refined_delta_mass_ppm"
@@ -713,8 +705,20 @@ with gr.Blocks(
         Upload your mass spectrometry data file (.mgf, .mzml, or .mzxml) and get peptide sequence predictions.
         Choose your prediction method and decoding options.
-        **Note:** The InstaNovo+ model `{DIFFUSION_MODEL_ID}` is an alpha release.
-        """
     )
     with gr.Row():
         with gr.Column(scale=1):
@@ -790,20 +794,6 @@ with gr.Blocks(
         label="Example Usage:",
     )
-    gr.Markdown(
-        f"""
-         **Notes:**
-         *   Predictions use `{TRANSFORMER_MODEL_ID}` (Transformer) and `{DIFFUSION_MODEL_ID}` (Diffusion, Alpha release).
-         *   **Refinement Mode:** Runs initial prediction with the selected Transformer method (Greedy/Knapsack), then refines using InstaNovo+.
-         *   **InstaNovo Only Mode:** Uses only the Transformer with the selected decoding method.
-         *   **InstaNovo+ Only Mode:** Predicts directly using the Diffusion model (alpha version).
-         *   `delta_mass_ppm` shows the lowest absolute precursor mass error (ppm) across isotopes 0-1 for the final sequence.
-         *   Knapsack Beam Search requires a pre-computed knapsack file. If unavailable, the option will be disabled.
-         *   Check logs for progress, especially for large files or slower methods.
-         """,
-         elem_classes="feedback"
-    )
     with gr.Accordion("Application Logs", open=True):
         log_display = Log(log_file, dark=True, height=300)

 TRANSFORMER_MODEL_ID = "instanovo-v1.1.0"
 DIFFUSION_MODEL_ID = "instanovoplus-v1.1.0-alpha"
 KNAPSACK_DIR = Path("./knapsack_cache")
 # Determine device
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
     output_path: str,
 ) -> DictConfig:
     """Creates a base OmegaConf DictConfig for prediction environment."""
+    base_cfg = OmegaConf.create({
+        "data_path": None, "instanovo_model": TRANSFORMER_MODEL_ID,
+        "instanovoplus_model": DIFFUSION_MODEL_ID, "output_path": None,
+        "knapsack_path": str(KNAPSACK_DIR), "denovo": True, "refine": True,
+        "num_beams": 1, "max_length": 40, "max_charge": 10,
+        "isotope_error_range": [0, 1], "subset": 1.0, "use_knapsack": False,
+        "save_beams": False, "batch_size": 64, "device": DEVICE, "fp16": FP16,
+        "log_interval": 500, "use_basic_logging": True,
+        "filter_precursor_ppm": 20, "filter_confidence": 1e-4,
+        "filter_fdr_threshold": 0.05, "suppressed_residues": None,
+        "disable_terminal_residues_anywhere": True,
+        "residue_remapping": {
+            "M(ox)": "M[UNIMOD:35]", "M(+15.99)": "M[UNIMOD:35]",
+            "S(p)": "S[UNIMOD:21]", "T(p)": "T[UNIMOD:21]", "Y(p)": "Y[UNIMOD:21]",
+            "S(+79.97)": "S[UNIMOD:21]", "T(+79.97)": "T[UNIMOD:21]", "Y(+79.97)": "Y[UNIMOD:21]",
+            "Q(+0.98)": "Q[UNIMOD:7]", "N(+0.98)": "N[UNIMOD:7]",
+            "Q(+.98)": "Q[UNIMOD:7]", "N(+.98)": "N[UNIMOD:7]",
+            "C(+57.02)": "C[UNIMOD:4]", "(+42.01)": "[UNIMOD:1]",
+            "(+43.01)": "[UNIMOD:5]", "(-17.03)": "[UNIMOD:385]",
+        },
+        "column_map": {
+        "Modified sequence": "modified_sequence", "MS/MS m/z": "precursor_mz",
+        "Mass": "precursor_mass", "Charge": "precursor_charge",
+        "Mass values": "mz_array", "Mass spectrum": "mz_array",
+        "Intensity": "intensity_array", "Raw intensity spectrum": "intensity_array",
+        "Scan number": "scan_number"
+        },
+        "index_columns": [
+            "scan_number", "precursor_mz", "precursor_charge",
+            "retention_time", "spectrum_id", "experiment_name",
+        ],
+    })
     cfg_overrides = {
         "data_path": input_path, "output_path": output_path,
     logger.info("--- New Prediction Request ---")
     logger.info(f"Input File: {input_path}")
     logger.info(f"Selected Mode: {mode_selection}")
+    if "refinement" in mode_selection or "InstaNovo Only" in mode_selection:
         logger.info(f"Selected Transformer Decoder: {transformer_decoder_selection}")
     # Create temp output file
                      row_data["prediction"] = ""
                  results_data.append(row_data)
+        elif "refinement" in mode_selection:
              output_headers.extend([
                  "transformer_prediction", "transformer_log_probability",
                  "refined_prediction", "refined_log_probability", "refined_delta_mass_ppm"
         Upload your mass spectrometry data file (.mgf, .mzml, or .mzxml) and get peptide sequence predictions.
         Choose your prediction method and decoding options.
+         **Notes:**
+         *   Predictions use version `{TRANSFORMER_MODEL_ID}` for the transformer-based InstaNovo model and version `{DIFFUSION_MODEL_ID}` for the diffusion-based InstaNovo+ model.
+         *   The InstaNovo+ model `{DIFFUSION_MODEL_ID}` is an alpha release.
+         * **Predction Modes:**
+             *   **InstaNovo with InstaNovo+ refinement** Runs initial prediction with the selected Transformer method (Greedy/Knapsack), then refines using InstaNovo+.
+             *   **InstaNovo Only:** Uses only the Transformer with the selected decoding method.
+             *   **InstaNovo+ Only:** Predicts directly using the Diffusion model (alpha release).
+        * **Transformer Decoding Methods:**
+             *   **Greedy Search:** use this for optimal performance, has similar performance as Knapsack Beam Search at 5% FDR.
+             *   **Knapsack Beam Search:** use this for the best results and highest peptide recall, but is about 10x slower than Greedy Search.
+         *   `delta_mass_ppm` shows the lowest absolute precursor mass error (ppm) across isotopes 0-1 for the final sequence.
+         *   Check logs for progress, especially for large files or slower methods.
+         """,
+         elem_classes="feedback"
     )
     with gr.Row():
         with gr.Column(scale=1):
         label="Example Usage:",
     )
     with gr.Accordion("Application Logs", open=True):
         log_display = Log(log_file, dark=True, height=300)