Spaces:
Running
on
Zero
Running
on
Zero
feat: add logging (WIP)
Browse files- app.py +47 -32
- pyproject.toml +1 -0
- uv.lock +14 -0
app.py
CHANGED
@@ -5,8 +5,10 @@ import tempfile
|
|
5 |
import time
|
6 |
import polars as pl
|
7 |
import numpy as np
|
|
|
8 |
from pathlib import Path
|
9 |
from omegaconf import OmegaConf, DictConfig
|
|
|
10 |
|
11 |
# --- InstaNovo Imports ---
|
12 |
try:
|
@@ -40,26 +42,35 @@ KNAPSACK: Knapsack | None = None
|
|
40 |
MODEL_CONFIG: DictConfig | None = None
|
41 |
RESIDUE_SET: ResidueSet | None = None
|
42 |
|
43 |
-
# Assets
|
44 |
gr.set_static_paths(paths=[Path.cwd().absolute()/"assets"])
|
45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
def load_model_and_knapsack():
|
48 |
"""Loads the InstaNovo model and generates/loads the knapsack."""
|
49 |
global MODEL, KNAPSACK, MODEL_CONFIG, RESIDUE_SET
|
50 |
if MODEL is not None:
|
51 |
-
|
52 |
return
|
53 |
|
54 |
-
|
55 |
try:
|
56 |
MODEL, MODEL_CONFIG = InstaNovo.from_pretrained(MODEL_ID)
|
57 |
MODEL.to(DEVICE)
|
58 |
MODEL.eval()
|
59 |
RESIDUE_SET = MODEL.residue_set
|
60 |
-
|
61 |
except Exception as e:
|
62 |
-
|
63 |
raise gr.Error(f"Failed to load InstaNovo model: {MODEL_ID}. Error: {e}")
|
64 |
|
65 |
# --- Knapsack Handling ---
|
@@ -70,17 +81,17 @@ def load_model_and_knapsack():
|
|
70 |
)
|
71 |
|
72 |
if knapsack_exists:
|
73 |
-
|
74 |
try:
|
75 |
KNAPSACK = Knapsack.from_file(str(KNAPSACK_DIR))
|
76 |
-
|
77 |
except Exception as e:
|
78 |
-
|
79 |
KNAPSACK = None # Force regeneration
|
80 |
knapsack_exists = False # Ensure generation happens
|
81 |
|
82 |
if not knapsack_exists:
|
83 |
-
|
84 |
if RESIDUE_SET is None:
|
85 |
raise gr.Error("Cannot generate knapsack because ResidueSet failed to load.")
|
86 |
try:
|
@@ -88,7 +99,7 @@ def load_model_and_knapsack():
|
|
88 |
residue_masses_knapsack = dict(RESIDUE_SET.residue_masses.copy())
|
89 |
negative_residues = [k for k, v in residue_masses_knapsack.items() if v <= 0]
|
90 |
if negative_residues:
|
91 |
-
|
92 |
"Excluding from knapsack generation.")
|
93 |
for res in negative_residues:
|
94 |
del residue_masses_knapsack[res]
|
@@ -110,11 +121,11 @@ def load_model_and_knapsack():
|
|
110 |
max_mass=MAX_MASS,
|
111 |
mass_scale=MASS_SCALE,
|
112 |
)
|
113 |
-
|
114 |
KNAPSACK.save(str(KNAPSACK_DIR)) # Save for future runs
|
115 |
-
|
116 |
except Exception as e:
|
117 |
-
|
118 |
gr.Warning("Failed to generate Knapsack. Knapsack Beam Search will not be available. {e}")
|
119 |
KNAPSACK = None # Ensure it's None if generation failed
|
120 |
|
@@ -131,7 +142,7 @@ def create_inference_config(
|
|
131 |
if DEFAULT_CONFIG_PATH.exists():
|
132 |
base_cfg = OmegaConf.load(DEFAULT_CONFIG_PATH)
|
133 |
else:
|
134 |
-
|
135 |
# Create a minimal config if default is missing
|
136 |
base_cfg = OmegaConf.create({
|
137 |
"data_path": None,
|
@@ -217,8 +228,8 @@ def predict_peptides(input_file, decoding_method):
|
|
217 |
raise gr.Error("Please upload a mass spectrometry file.")
|
218 |
|
219 |
input_path = input_file.name # Gradio provides the path in .name
|
220 |
-
|
221 |
-
|
222 |
|
223 |
# Create a temporary file for the output CSV
|
224 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as temp_out:
|
@@ -227,10 +238,10 @@ def predict_peptides(input_file, decoding_method):
|
|
227 |
try:
|
228 |
# 1. Create Config
|
229 |
config = create_inference_config(input_path, output_csv_path, decoding_method)
|
230 |
-
|
231 |
|
232 |
# 2. Load Data using SpectrumDataFrame
|
233 |
-
|
234 |
try:
|
235 |
sdf = SpectrumDataFrame.load(
|
236 |
config.data_path,
|
@@ -247,13 +258,13 @@ def predict_peptides(input_file, decoding_method):
|
|
247 |
lambda row: (row["precursor_charge"] <= max_charge) and (row["precursor_charge"] > 0)
|
248 |
)
|
249 |
if len(sdf) < original_size:
|
250 |
-
|
251 |
|
252 |
if len(sdf) == 0:
|
253 |
raise gr.Error("No valid spectra found in the uploaded file after filtering.")
|
254 |
-
|
255 |
except Exception as e:
|
256 |
-
|
257 |
raise gr.Error(f"Failed to load or process the spectrum file. Error: {e}")
|
258 |
|
259 |
# 3. Prepare Dataset and DataLoader
|
@@ -275,7 +286,7 @@ def predict_peptides(input_file, decoding_method):
|
|
275 |
)
|
276 |
|
277 |
# 4. Select Decoder
|
278 |
-
|
279 |
decoder: Decoder
|
280 |
if config.use_knapsack:
|
281 |
if KNAPSACK is None:
|
@@ -287,7 +298,7 @@ def predict_peptides(input_file, decoding_method):
|
|
287 |
decoder = KnapsackBeamSearchDecoder(model=MODEL, knapsack=KNAPSACK)
|
288 |
elif config.num_beams > 1:
|
289 |
# BeamSearchDecoder is available but not explicitly requested, use Greedy for num_beams=1
|
290 |
-
|
291 |
decoder = GreedyDecoder(model=MODEL, mass_scale=MASS_SCALE)
|
292 |
else:
|
293 |
decoder = GreedyDecoder(
|
@@ -297,10 +308,10 @@ def predict_peptides(input_file, decoding_method):
|
|
297 |
suppressed_residues=config.get("suppressed_residues", None),
|
298 |
disable_terminal_residues_anywhere=config.get("disable_terminal_residues_anywhere", True),
|
299 |
)
|
300 |
-
|
301 |
|
302 |
# 5. Run Prediction Loop (Adapted from instanovo/transformer/predict.py)
|
303 |
-
|
304 |
start_time = time.time()
|
305 |
results_list: list[ScoredSequence | list] = [] # Store ScoredSequence or empty list
|
306 |
|
@@ -325,13 +336,13 @@ def predict_peptides(input_file, decoding_method):
|
|
325 |
return_beam=False # Only get the top prediction for simplicity
|
326 |
)
|
327 |
results_list.extend(batch_predictions) # Should be list[ScoredSequence] or list[list]
|
328 |
-
|
329 |
|
330 |
end_time = time.time()
|
331 |
-
|
332 |
|
333 |
# 6. Format Results
|
334 |
-
|
335 |
output_data = []
|
336 |
# Use sdf index columns + prediction results
|
337 |
index_cols = [col for col in config.index_columns if col in sdf.df.columns]
|
@@ -356,7 +367,7 @@ def predict_peptides(input_file, decoding_method):
|
|
356 |
min_abs_ppm = min(abs(p) for p in delta_mass_list) if delta_mass_list else float('nan')
|
357 |
row_data["delta_mass_ppm"] = f"{min_abs_ppm:.2f}"
|
358 |
except Exception as e:
|
359 |
-
|
360 |
row_data["delta_mass_ppm"] = "N/A"
|
361 |
|
362 |
else:
|
@@ -374,7 +385,7 @@ def predict_peptides(input_file, decoding_method):
|
|
374 |
if col in output_df.columns:
|
375 |
final_display_cols.append(col)
|
376 |
else:
|
377 |
-
|
378 |
|
379 |
# Add any remaining index columns that weren't in display_cols
|
380 |
for col in index_cols:
|
@@ -385,14 +396,14 @@ def predict_peptides(input_file, decoding_method):
|
|
385 |
|
386 |
|
387 |
# 7. Save full results to CSV
|
388 |
-
|
389 |
output_df.write_csv(output_csv_path)
|
390 |
|
391 |
# Return DataFrame for display and path for download
|
392 |
return output_df_display.to_pandas(), output_csv_path
|
393 |
|
394 |
except Exception as e:
|
395 |
-
|
396 |
# Clean up the temporary output file if it exists
|
397 |
if os.path.exists(output_csv_path):
|
398 |
os.remove(output_csv_path)
|
@@ -469,6 +480,10 @@ with gr.Blocks(css=css, theme=gr.themes.Default(primary_hue="blue", secondary_hu
|
|
469 |
""".format(MODEL_ID=MODEL_ID)
|
470 |
)
|
471 |
|
|
|
|
|
|
|
|
|
472 |
# --- Launch the App ---
|
473 |
if __name__ == "__main__":
|
474 |
# Set share=True for temporary public link if running locally
|
|
|
5 |
import time
|
6 |
import polars as pl
|
7 |
import numpy as np
|
8 |
+
import logging
|
9 |
from pathlib import Path
|
10 |
from omegaconf import OmegaConf, DictConfig
|
11 |
+
from gradio_log import Log
|
12 |
|
13 |
# --- InstaNovo Imports ---
|
14 |
try:
|
|
|
42 |
MODEL_CONFIG: DictConfig | None = None
|
43 |
RESIDUE_SET: ResidueSet | None = None
|
44 |
|
45 |
+
# --- Assets ---
|
46 |
gr.set_static_paths(paths=[Path.cwd().absolute()/"assets"])
|
47 |
|
48 |
+
# Logging configuration
|
49 |
+
log_file = "/tmp/instanovo_gradio_log.txt"
|
50 |
+
Path(log_file).touch()
|
51 |
+
|
52 |
+
file_handler = logging.FileHandler(log_file)
|
53 |
+
file_handler.setLevel(logging.DEBUG)
|
54 |
+
|
55 |
+
logger = logging.getLogger("instanovo")
|
56 |
+
logger.setLevel(logging.DEBUG)
|
57 |
|
58 |
def load_model_and_knapsack():
|
59 |
"""Loads the InstaNovo model and generates/loads the knapsack."""
|
60 |
global MODEL, KNAPSACK, MODEL_CONFIG, RESIDUE_SET
|
61 |
if MODEL is not None:
|
62 |
+
logger.info("Model already loaded.")
|
63 |
return
|
64 |
|
65 |
+
logger.info(f"Loading InstaNovo model: {MODEL_ID} to {DEVICE}...")
|
66 |
try:
|
67 |
MODEL, MODEL_CONFIG = InstaNovo.from_pretrained(MODEL_ID)
|
68 |
MODEL.to(DEVICE)
|
69 |
MODEL.eval()
|
70 |
RESIDUE_SET = MODEL.residue_set
|
71 |
+
logger.info("Model loaded successfully.")
|
72 |
except Exception as e:
|
73 |
+
logger.error(f"Error loading model: {e}")
|
74 |
raise gr.Error(f"Failed to load InstaNovo model: {MODEL_ID}. Error: {e}")
|
75 |
|
76 |
# --- Knapsack Handling ---
|
|
|
81 |
)
|
82 |
|
83 |
if knapsack_exists:
|
84 |
+
logger.info(f"Loading pre-generated knapsack from {KNAPSACK_DIR}...")
|
85 |
try:
|
86 |
KNAPSACK = Knapsack.from_file(str(KNAPSACK_DIR))
|
87 |
+
logger.info("Knapsack loaded successfully.")
|
88 |
except Exception as e:
|
89 |
+
logger.info(f"Error loading knapsack: {e}. Will attempt to regenerate.")
|
90 |
KNAPSACK = None # Force regeneration
|
91 |
knapsack_exists = False # Ensure generation happens
|
92 |
|
93 |
if not knapsack_exists:
|
94 |
+
logger.info("Knapsack not found or failed to load. Generating knapsack...")
|
95 |
if RESIDUE_SET is None:
|
96 |
raise gr.Error("Cannot generate knapsack because ResidueSet failed to load.")
|
97 |
try:
|
|
|
99 |
residue_masses_knapsack = dict(RESIDUE_SET.residue_masses.copy())
|
100 |
negative_residues = [k for k, v in residue_masses_knapsack.items() if v <= 0]
|
101 |
if negative_residues:
|
102 |
+
logger.info(f"Warning: Non-positive masses found in residues: {negative_residues}. "
|
103 |
"Excluding from knapsack generation.")
|
104 |
for res in negative_residues:
|
105 |
del residue_masses_knapsack[res]
|
|
|
121 |
max_mass=MAX_MASS,
|
122 |
mass_scale=MASS_SCALE,
|
123 |
)
|
124 |
+
logger.info(f"Knapsack generated. Saving to {KNAPSACK_DIR}...")
|
125 |
KNAPSACK.save(str(KNAPSACK_DIR)) # Save for future runs
|
126 |
+
logger.info("Knapsack saved.")
|
127 |
except Exception as e:
|
128 |
+
logger.info(f"Error generating or saving knapsack: {e}")
|
129 |
gr.Warning("Failed to generate Knapsack. Knapsack Beam Search will not be available. {e}")
|
130 |
KNAPSACK = None # Ensure it's None if generation failed
|
131 |
|
|
|
142 |
if DEFAULT_CONFIG_PATH.exists():
|
143 |
base_cfg = OmegaConf.load(DEFAULT_CONFIG_PATH)
|
144 |
else:
|
145 |
+
logger.info(f"Warning: Default config not found at {DEFAULT_CONFIG_PATH}. Using minimal config.")
|
146 |
# Create a minimal config if default is missing
|
147 |
base_cfg = OmegaConf.create({
|
148 |
"data_path": None,
|
|
|
228 |
raise gr.Error("Please upload a mass spectrometry file.")
|
229 |
|
230 |
input_path = input_file.name # Gradio provides the path in .name
|
231 |
+
logger.info(f"Processing file: {input_path}")
|
232 |
+
logger.info(f"Using decoding method: {decoding_method}")
|
233 |
|
234 |
# Create a temporary file for the output CSV
|
235 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as temp_out:
|
|
|
238 |
try:
|
239 |
# 1. Create Config
|
240 |
config = create_inference_config(input_path, output_csv_path, decoding_method)
|
241 |
+
logger.info(f"Inference Config:\n{OmegaConf.to_yaml(config)}")
|
242 |
|
243 |
# 2. Load Data using SpectrumDataFrame
|
244 |
+
logger.info("Loading spectrum data...")
|
245 |
try:
|
246 |
sdf = SpectrumDataFrame.load(
|
247 |
config.data_path,
|
|
|
258 |
lambda row: (row["precursor_charge"] <= max_charge) and (row["precursor_charge"] > 0)
|
259 |
)
|
260 |
if len(sdf) < original_size:
|
261 |
+
logger.info(f"Warning: Filtered {original_size - len(sdf)} spectra with charge > {max_charge} or <= 0.")
|
262 |
|
263 |
if len(sdf) == 0:
|
264 |
raise gr.Error("No valid spectra found in the uploaded file after filtering.")
|
265 |
+
logger.info(f"Data loaded: {len(sdf)} spectra.")
|
266 |
except Exception as e:
|
267 |
+
logger.info(f"Error loading data: {e}")
|
268 |
raise gr.Error(f"Failed to load or process the spectrum file. Error: {e}")
|
269 |
|
270 |
# 3. Prepare Dataset and DataLoader
|
|
|
286 |
)
|
287 |
|
288 |
# 4. Select Decoder
|
289 |
+
logger.info("Initializing decoder...")
|
290 |
decoder: Decoder
|
291 |
if config.use_knapsack:
|
292 |
if KNAPSACK is None:
|
|
|
298 |
decoder = KnapsackBeamSearchDecoder(model=MODEL, knapsack=KNAPSACK)
|
299 |
elif config.num_beams > 1:
|
300 |
# BeamSearchDecoder is available but not explicitly requested, use Greedy for num_beams=1
|
301 |
+
logger.info(f"Warning: num_beams={config.num_beams} > 1 but only Greedy and Knapsack Beam Search are implemented in this app. Defaulting to Greedy.")
|
302 |
decoder = GreedyDecoder(model=MODEL, mass_scale=MASS_SCALE)
|
303 |
else:
|
304 |
decoder = GreedyDecoder(
|
|
|
308 |
suppressed_residues=config.get("suppressed_residues", None),
|
309 |
disable_terminal_residues_anywhere=config.get("disable_terminal_residues_anywhere", True),
|
310 |
)
|
311 |
+
logger.info(f"Using decoder: {type(decoder).__name__}")
|
312 |
|
313 |
# 5. Run Prediction Loop (Adapted from instanovo/transformer/predict.py)
|
314 |
+
logger.info("Starting prediction...")
|
315 |
start_time = time.time()
|
316 |
results_list: list[ScoredSequence | list] = [] # Store ScoredSequence or empty list
|
317 |
|
|
|
336 |
return_beam=False # Only get the top prediction for simplicity
|
337 |
)
|
338 |
results_list.extend(batch_predictions) # Should be list[ScoredSequence] or list[list]
|
339 |
+
logger.info(f"Processed batch {i+1}/{len(dl)}")
|
340 |
|
341 |
end_time = time.time()
|
342 |
+
logger.info(f"Prediction finished in {end_time - start_time:.2f} seconds.")
|
343 |
|
344 |
# 6. Format Results
|
345 |
+
logger.info("Formatting results...")
|
346 |
output_data = []
|
347 |
# Use sdf index columns + prediction results
|
348 |
index_cols = [col for col in config.index_columns if col in sdf.df.columns]
|
|
|
367 |
min_abs_ppm = min(abs(p) for p in delta_mass_list) if delta_mass_list else float('nan')
|
368 |
row_data["delta_mass_ppm"] = f"{min_abs_ppm:.2f}"
|
369 |
except Exception as e:
|
370 |
+
logger.info(f"Warning: Could not calculate delta mass for prediction {i}: {e}")
|
371 |
row_data["delta_mass_ppm"] = "N/A"
|
372 |
|
373 |
else:
|
|
|
385 |
if col in output_df.columns:
|
386 |
final_display_cols.append(col)
|
387 |
else:
|
388 |
+
logger.info(f"Warning: Expected display column '{col}' not found in results.")
|
389 |
|
390 |
# Add any remaining index columns that weren't in display_cols
|
391 |
for col in index_cols:
|
|
|
396 |
|
397 |
|
398 |
# 7. Save full results to CSV
|
399 |
+
logger.info(f"Saving results to {output_csv_path}...")
|
400 |
output_df.write_csv(output_csv_path)
|
401 |
|
402 |
# Return DataFrame for display and path for download
|
403 |
return output_df_display.to_pandas(), output_csv_path
|
404 |
|
405 |
except Exception as e:
|
406 |
+
logger.info(f"An error occurred during prediction: {e}")
|
407 |
# Clean up the temporary output file if it exists
|
408 |
if os.path.exists(output_csv_path):
|
409 |
os.remove(output_csv_path)
|
|
|
480 |
""".format(MODEL_ID=MODEL_ID)
|
481 |
)
|
482 |
|
483 |
+
# Add logging component
|
484 |
+
with gr.Accordion("Application Logs", open=False):
|
485 |
+
log_display = Log(log_file, dark=True, height=300)
|
486 |
+
|
487 |
# --- Launch the App ---
|
488 |
if __name__ == "__main__":
|
489 |
# Set share=True for temporary public link if running locally
|
pyproject.toml
CHANGED
@@ -6,6 +6,7 @@ readme = "README.md"
|
|
6 |
requires-python = ">=3.12"
|
7 |
dependencies = [
|
8 |
"gradio>=5.23.1",
|
|
|
9 |
"instanovo",
|
10 |
]
|
11 |
|
|
|
6 |
requires-python = ">=3.12"
|
7 |
dependencies = [
|
8 |
"gradio>=5.23.1",
|
9 |
+
"gradio-log>=0.0.8",
|
10 |
"instanovo",
|
11 |
]
|
12 |
|
uv.lock
CHANGED
@@ -675,6 +675,18 @@ wheels = [
|
|
675 |
{ url = "https://files.pythonhosted.org/packages/15/c8/0df7f92c8f1bdf5c244c29de8cd7e33a5931768ddba245526a770bfa18a2/gradio_client-1.8.0-py3-none-any.whl", hash = "sha256:27a3ab5278a44d57d1d05a86de67cec5f7370e540600d11816744a620addb967", size = 322165 },
|
676 |
]
|
677 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
678 |
[[package]]
|
679 |
name = "groovy"
|
680 |
version = "0.1.2"
|
@@ -868,12 +880,14 @@ version = "0.1.0"
|
|
868 |
source = { virtual = "." }
|
869 |
dependencies = [
|
870 |
{ name = "gradio" },
|
|
|
871 |
{ name = "instanovo" },
|
872 |
]
|
873 |
|
874 |
[package.metadata]
|
875 |
requires-dist = [
|
876 |
{ name = "gradio", specifier = ">=5.23.1" },
|
|
|
877 |
{ name = "instanovo", path = "../dtu-denovo-sequencing/dist/instanovo-1.1.0-py3-none-any.whl" },
|
878 |
]
|
879 |
|
|
|
675 |
{ url = "https://files.pythonhosted.org/packages/15/c8/0df7f92c8f1bdf5c244c29de8cd7e33a5931768ddba245526a770bfa18a2/gradio_client-1.8.0-py3-none-any.whl", hash = "sha256:27a3ab5278a44d57d1d05a86de67cec5f7370e540600d11816744a620addb967", size = 322165 },
|
676 |
]
|
677 |
|
678 |
+
[[package]]
|
679 |
+
name = "gradio-log"
|
680 |
+
version = "0.0.8"
|
681 |
+
source = { registry = "https://pypi.org/simple" }
|
682 |
+
dependencies = [
|
683 |
+
{ name = "gradio" },
|
684 |
+
]
|
685 |
+
sdist = { url = "https://files.pythonhosted.org/packages/6a/8d/368e16b93fdd2fc0b601ff648a7786e8551a7b4cc946faec0bdcff33ea4e/gradio_log-0.0.8.tar.gz", hash = "sha256:43d7aeb2651fb3b0583f6c205bdabc4ede50b47b531328174db9b0ca63cccf0c", size = 3457531 }
|
686 |
+
wheels = [
|
687 |
+
{ url = "https://files.pythonhosted.org/packages/4d/9b/5caba8a73175d2c6d8ebd72d92d1e6943f188cb5fff2196593a5f0d0dc1e/gradio_log-0.0.8-py3-none-any.whl", hash = "sha256:ed88db174429cc539c3bd9605891471f69544aafb930c36706a67a073459db48", size = 1220834 },
|
688 |
+
]
|
689 |
+
|
690 |
[[package]]
|
691 |
name = "groovy"
|
692 |
version = "0.1.2"
|
|
|
880 |
source = { virtual = "." }
|
881 |
dependencies = [
|
882 |
{ name = "gradio" },
|
883 |
+
{ name = "gradio-log" },
|
884 |
{ name = "instanovo" },
|
885 |
]
|
886 |
|
887 |
[package.metadata]
|
888 |
requires-dist = [
|
889 |
{ name = "gradio", specifier = ">=5.23.1" },
|
890 |
+
{ name = "gradio-log", specifier = ">=0.0.8" },
|
891 |
{ name = "instanovo", path = "../dtu-denovo-sequencing/dist/instanovo-1.1.0-py3-none-any.whl" },
|
892 |
]
|
893 |
|