BioGeek commited on
Commit
2ce8475
·
1 Parent(s): 68f76b5

docs: update Notes

Browse files
Files changed (1) hide show
  1. app.py +48 -58
app.py CHANGED
@@ -39,9 +39,6 @@ except ImportError as e:
39
  TRANSFORMER_MODEL_ID = "instanovo-v1.1.0"
40
  DIFFUSION_MODEL_ID = "instanovoplus-v1.1.0-alpha"
41
  KNAPSACK_DIR = Path("./knapsack_cache")
42
- DEFAULT_CONFIG_PATH = Path(
43
- "./configs/inference/default.yaml"
44
- )
45
 
46
  # Determine device
47
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
@@ -198,43 +195,38 @@ def create_inference_config(
198
  output_path: str,
199
  ) -> DictConfig:
200
  """Creates a base OmegaConf DictConfig for prediction environment."""
201
- if DEFAULT_CONFIG_PATH.exists():
202
- base_cfg = OmegaConf.load(DEFAULT_CONFIG_PATH)
203
- logger.info(f"Loaded base config from {DEFAULT_CONFIG_PATH}")
204
- else:
205
- logger.info(f"Warning: Default config not found at {DEFAULT_CONFIG_PATH}. Using minimal config.")
206
- base_cfg = OmegaConf.create({
207
- "data_path": None, "instanovo_model": TRANSFORMER_MODEL_ID,
208
- "instanovoplus_model": DIFFUSION_MODEL_ID, "output_path": None,
209
- "knapsack_path": str(KNAPSACK_DIR), "denovo": True, "refine": True,
210
- "num_beams": 1, "max_length": 40, "max_charge": 10,
211
- "isotope_error_range": [0, 1], "subset": 1.0, "use_knapsack": False,
212
- "save_beams": False, "batch_size": 64, "device": DEVICE, "fp16": FP16,
213
- "log_interval": 500, "use_basic_logging": True,
214
- "filter_precursor_ppm": 20, "filter_confidence": 1e-4,
215
- "filter_fdr_threshold": 0.05, "suppressed_residues": None,
216
- "disable_terminal_residues_anywhere": True,
217
- "residue_remapping": {
218
- "M(ox)": "M[UNIMOD:35]", "M(+15.99)": "M[UNIMOD:35]",
219
- "S(p)": "S[UNIMOD:21]", "T(p)": "T[UNIMOD:21]", "Y(p)": "Y[UNIMOD:21]",
220
- "S(+79.97)": "S[UNIMOD:21]", "T(+79.97)": "T[UNIMOD:21]", "Y(+79.97)": "Y[UNIMOD:21]",
221
- "Q(+0.98)": "Q[UNIMOD:7]", "N(+0.98)": "N[UNIMOD:7]",
222
- "Q(+.98)": "Q[UNIMOD:7]", "N(+.98)": "N[UNIMOD:7]",
223
- "C(+57.02)": "C[UNIMOD:4]", "(+42.01)": "[UNIMOD:1]",
224
- "(+43.01)": "[UNIMOD:5]", "(-17.03)": "[UNIMOD:385]",
225
- },
226
- "column_map": {
227
- "Modified sequence": "modified_sequence", "MS/MS m/z": "precursor_mz",
228
- "Mass": "precursor_mass", "Charge": "precursor_charge",
229
- "Mass values": "mz_array", "Mass spectrum": "mz_array",
230
- "Intensity": "intensity_array", "Raw intensity spectrum": "intensity_array",
231
- "Scan number": "scan_number"
232
- },
233
- "index_columns": [
234
- "scan_number", "precursor_mz", "precursor_charge",
235
- "retention_time", "spectrum_id", "experiment_name",
236
- ],
237
- })
238
 
239
  cfg_overrides = {
240
  "data_path": input_path, "output_path": output_path,
@@ -524,7 +516,7 @@ def predict_peptides(input_file, mode_selection, transformer_decoder_selection):
524
  logger.info("--- New Prediction Request ---")
525
  logger.info(f"Input File: {input_path}")
526
  logger.info(f"Selected Mode: {mode_selection}")
527
- if "Refinement" in mode_selection or "InstaNovo Only" in mode_selection:
528
  logger.info(f"Selected Transformer Decoder: {transformer_decoder_selection}")
529
 
530
  # Create temp output file
@@ -630,7 +622,7 @@ def predict_peptides(input_file, mode_selection, transformer_decoder_selection):
630
  row_data["prediction"] = ""
631
  results_data.append(row_data)
632
 
633
- elif "Refinement" in mode_selection:
634
  output_headers.extend([
635
  "transformer_prediction", "transformer_log_probability",
636
  "refined_prediction", "refined_log_probability", "refined_delta_mass_ppm"
@@ -713,8 +705,20 @@ with gr.Blocks(
713
  Upload your mass spectrometry data file (.mgf, .mzml, or .mzxml) and get peptide sequence predictions.
714
  Choose your prediction method and decoding options.
715
 
716
- **Note:** The InstaNovo+ model `{DIFFUSION_MODEL_ID}` is an alpha release.
717
- """
 
 
 
 
 
 
 
 
 
 
 
 
718
  )
719
  with gr.Row():
720
  with gr.Column(scale=1):
@@ -790,20 +794,6 @@ with gr.Blocks(
790
  label="Example Usage:",
791
  )
792
 
793
- gr.Markdown(
794
- f"""
795
- **Notes:**
796
- * Predictions use `{TRANSFORMER_MODEL_ID}` (Transformer) and `{DIFFUSION_MODEL_ID}` (Diffusion, Alpha release).
797
- * **Refinement Mode:** Runs initial prediction with the selected Transformer method (Greedy/Knapsack), then refines using InstaNovo+.
798
- * **InstaNovo Only Mode:** Uses only the Transformer with the selected decoding method.
799
- * **InstaNovo+ Only Mode:** Predicts directly using the Diffusion model (alpha version).
800
- * `delta_mass_ppm` shows the lowest absolute precursor mass error (ppm) across isotopes 0-1 for the final sequence.
801
- * Knapsack Beam Search requires a pre-computed knapsack file. If unavailable, the option will be disabled.
802
- * Check logs for progress, especially for large files or slower methods.
803
- """,
804
- elem_classes="feedback"
805
- )
806
-
807
  with gr.Accordion("Application Logs", open=True):
808
  log_display = Log(log_file, dark=True, height=300)
809
 
 
39
  TRANSFORMER_MODEL_ID = "instanovo-v1.1.0"
40
  DIFFUSION_MODEL_ID = "instanovoplus-v1.1.0-alpha"
41
  KNAPSACK_DIR = Path("./knapsack_cache")
 
 
 
42
 
43
  # Determine device
44
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 
195
  output_path: str,
196
  ) -> DictConfig:
197
  """Creates a base OmegaConf DictConfig for prediction environment."""
198
+ base_cfg = OmegaConf.create({
199
+ "data_path": None, "instanovo_model": TRANSFORMER_MODEL_ID,
200
+ "instanovoplus_model": DIFFUSION_MODEL_ID, "output_path": None,
201
+ "knapsack_path": str(KNAPSACK_DIR), "denovo": True, "refine": True,
202
+ "num_beams": 1, "max_length": 40, "max_charge": 10,
203
+ "isotope_error_range": [0, 1], "subset": 1.0, "use_knapsack": False,
204
+ "save_beams": False, "batch_size": 64, "device": DEVICE, "fp16": FP16,
205
+ "log_interval": 500, "use_basic_logging": True,
206
+ "filter_precursor_ppm": 20, "filter_confidence": 1e-4,
207
+ "filter_fdr_threshold": 0.05, "suppressed_residues": None,
208
+ "disable_terminal_residues_anywhere": True,
209
+ "residue_remapping": {
210
+ "M(ox)": "M[UNIMOD:35]", "M(+15.99)": "M[UNIMOD:35]",
211
+ "S(p)": "S[UNIMOD:21]", "T(p)": "T[UNIMOD:21]", "Y(p)": "Y[UNIMOD:21]",
212
+ "S(+79.97)": "S[UNIMOD:21]", "T(+79.97)": "T[UNIMOD:21]", "Y(+79.97)": "Y[UNIMOD:21]",
213
+ "Q(+0.98)": "Q[UNIMOD:7]", "N(+0.98)": "N[UNIMOD:7]",
214
+ "Q(+.98)": "Q[UNIMOD:7]", "N(+.98)": "N[UNIMOD:7]",
215
+ "C(+57.02)": "C[UNIMOD:4]", "(+42.01)": "[UNIMOD:1]",
216
+ "(+43.01)": "[UNIMOD:5]", "(-17.03)": "[UNIMOD:385]",
217
+ },
218
+ "column_map": {
219
+ "Modified sequence": "modified_sequence", "MS/MS m/z": "precursor_mz",
220
+ "Mass": "precursor_mass", "Charge": "precursor_charge",
221
+ "Mass values": "mz_array", "Mass spectrum": "mz_array",
222
+ "Intensity": "intensity_array", "Raw intensity spectrum": "intensity_array",
223
+ "Scan number": "scan_number"
224
+ },
225
+ "index_columns": [
226
+ "scan_number", "precursor_mz", "precursor_charge",
227
+ "retention_time", "spectrum_id", "experiment_name",
228
+ ],
229
+ })
 
 
 
 
 
230
 
231
  cfg_overrides = {
232
  "data_path": input_path, "output_path": output_path,
 
516
  logger.info("--- New Prediction Request ---")
517
  logger.info(f"Input File: {input_path}")
518
  logger.info(f"Selected Mode: {mode_selection}")
519
+ if "refinement" in mode_selection or "InstaNovo Only" in mode_selection:
520
  logger.info(f"Selected Transformer Decoder: {transformer_decoder_selection}")
521
 
522
  # Create temp output file
 
622
  row_data["prediction"] = ""
623
  results_data.append(row_data)
624
 
625
+ elif "refinement" in mode_selection:
626
  output_headers.extend([
627
  "transformer_prediction", "transformer_log_probability",
628
  "refined_prediction", "refined_log_probability", "refined_delta_mass_ppm"
 
705
  Upload your mass spectrometry data file (.mgf, .mzml, or .mzxml) and get peptide sequence predictions.
706
  Choose your prediction method and decoding options.
707
 
708
+ **Notes:**
709
+ * Predictions use version `{TRANSFORMER_MODEL_ID}` for the transformer-based InstaNovo model and version `{DIFFUSION_MODEL_ID}` for the diffusion-based InstaNovo+ model.
710
+ * The InstaNovo+ model `{DIFFUSION_MODEL_ID}` is an alpha release.
711
+ * **Predction Modes:**
712
+ * **InstaNovo with InstaNovo+ refinement** Runs initial prediction with the selected Transformer method (Greedy/Knapsack), then refines using InstaNovo+.
713
+ * **InstaNovo Only:** Uses only the Transformer with the selected decoding method.
714
+ * **InstaNovo+ Only:** Predicts directly using the Diffusion model (alpha release).
715
+ * **Transformer Decoding Methods:**
716
+ * **Greedy Search:** use this for optimal performance, has similar performance as Knapsack Beam Search at 5% FDR.
717
+ * **Knapsack Beam Search:** use this for the best results and highest peptide recall, but is about 10x slower than Greedy Search.
718
+ * `delta_mass_ppm` shows the lowest absolute precursor mass error (ppm) across isotopes 0-1 for the final sequence.
719
+ * Check logs for progress, especially for large files or slower methods.
720
+ """,
721
+ elem_classes="feedback"
722
  )
723
  with gr.Row():
724
  with gr.Column(scale=1):
 
794
  label="Example Usage:",
795
  )
796
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
797
  with gr.Accordion("Application Logs", open=True):
798
  log_display = Log(log_file, dark=True, height=300)
799