Allanatrix commited on
Commit
22b961e
Β·
verified Β·
1 Parent(s): f587a82

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -158
app.py CHANGED
@@ -1,178 +1,78 @@
1
  import gradio as gr
2
- import pandas as pd
3
- import matplotlib.pyplot as plt
4
 
5
- # ─── 1. BENCHMARK DATA ──────────────────────────────────────────────────────────
6
- # Nested dict: Domain β†’ { Model Name β†’ {metric_name: value, …, "SOTA_<metric>": value } }
7
- benchmark_data = {
8
- "Protein Folding": {
9
- "Nexa Bio1 (Secondary)": {
10
- "Accuracy (%)": 71,
11
- "Q3 (%)": 65,
12
- "Q8 (%)": 55,
13
- "TM-score": 0.60,
14
- "SOTA_Accuracy (%)": 85,
15
- "SOTA_TM-score": 0.75
16
- },
17
- "Nexa Bio2 (Tertiary)": {
18
- "Confidence (%)": 90,
19
- "GDT_TS": 0.82,
20
- "Entropy Threshold (%)": 80,
21
- "SOTA_Confidence (%)": 92,
22
- "SOTA_GDT_TS": 0.85
23
- },
24
  },
25
- "Astrophysics": {
26
- "Nexa Astro": {
27
- "Accuracy (%)": 97,
28
- "Macro-F1 (%)": 96,
29
- "ROC-AUC": 0.98,
30
- "SOTA_Accuracy (%)": 96,
31
- "SOTA_ROC-AUC": 0.97
32
- },
33
  },
34
- "Materials Science": {
35
- "Nexa MatSci": {
36
- "MAE (eV)": 0.02,
37
- "RMSE (eV)": 0.03,
38
- "Bandgap Accuracy (%)": 98,
39
- "SOTA_MAE (eV)": 0.03,
40
- "SOTA_Bandgap Accuracy (%)": 95
41
- },
42
  },
43
- "Quantum State Tomography": {
44
- "Nexa QST": {
45
- "Fidelity": 0.80,
46
- "Purity": 1.00,
47
- "Trace Distance": 0.15,
48
- "SOTA_Fidelity": 0.83,
49
- "SOTA_Trace Distance": 0.12
50
- },
51
  },
52
- "Computational Fluid Dynamics": {
53
- "Nexa CFD": {
54
- "Relative L2 Error": 0.015,
55
- "Energy Conservation Loss": 0.005,
56
- "PSNR": 30,
57
- "SSIM": 0.88,
58
- "SOTA_Relative L2 Error": 0.020,
59
- "SOTA_SSIM": 0.85
60
- },
61
  },
62
- "High-Energy Physics": {
63
- "Nexa HEP": {
64
- "ROC-AUC": 0.92,
65
- "Event Accuracy (%)": 90,
66
- "Jet Tagging (%)": 88,
67
- "SOTA_ROC-AUC": 0.93,
68
- "SOTA_Event Accuracy (%)": 89
69
- },
70
  },
71
- "LLM Hypothesis & Methodology": {
72
- "Nexa MOE": {
73
- "Coherence (1–10)": 9.1,
74
- "Novelty (1–10)": 8.6,
75
- "Utility (1–10)": 8.8,
76
- "Expert-Rated SOTA (1–10)": 9.0
77
- },
78
- },
79
- }
80
-
81
- # ─── 2. SECTION DESCRIPTIONS ───────────────────────────────────────────────────
82
- section_descriptions = {
83
- "Protein Folding": """**Protein Folding**
84
- Benchmarks for secondary (Q3/Q8) and tertiary (TM-score) structure prediction.
85
- Nexa Bio1 handles sequence→secondary, Nexa Bio2 handles full 3D fold confidence.""",
86
- "Astrophysics": """**Astrophysics**
87
- Stellar classification and redshift estimation.
88
- Metrics: Accuracy, F1, ROC-AUC against SDSS-Net and astroML baselines.""",
89
- "Materials Science": """**Materials Science**
90
- Property prediction for novel materials (e.g., bandgap, formation energy).
91
- Metrics: MAE/RMSE, bandgap‐prediction accuracy vs. CGCNN, ALIGNN.""",
92
- "Quantum State Tomography": """**Quantum State Tomography**
93
- Reconstruct quantum states from measurement data.
94
- Metrics: Fidelity, Purity, Trace Distance against PINNs and QuNet.""",
95
- "Computational Fluid Dynamics": """**CFD**
96
- Flow field prediction (Navier–Stokes).
97
- Metrics: Relative L2 Error, PSNR/SSIM, Energy Conservation Loss vs. FNO.""",
98
- "High-Energy Physics": """**High-Energy Physics**
99
- Particle classification and signal/background separation.
100
- Metrics: ROC-AUC, event reconstruction accuracy, jet-tagging efficiency.""",
101
- "LLM Hypothesis & Methodology": """**LLM-Based Scientific Reasoning**
102
- Hypothesis and methodology generation.
103
- Metrics scored 1–10 by expert rubric on Coherence, Novelty, and Utility; compared to top academic LLM baselines."""
104
  }
105
 
106
- # ─── 3. PLOTTING FUNCTION ────────────────────────────────────────────────────────
107
- def plot_comparison(category):
108
- data = benchmark_data[category]
109
- fig, ax = plt.subplots(figsize=(7, 4))
110
- bar_width = 0.4
111
- indices = list(range(len(data)))
112
- labels = list(data.keys())
113
 
114
- # collect metrics that aren’t SOTA
115
- for i, model in enumerate(labels):
116
- metrics = data[model]
117
- # extract non-SOTA metrics
118
- non_sota = {k: v for k, v in metrics.items() if not k.startswith("SOTA")}
119
- sota = {k.replace("SOTA_", ""): v for k, v in metrics.items() if k.startswith("SOTA")}
 
 
 
 
 
120
 
121
- # bar positions
122
- pos = i * 2
123
- ax.bar([pos + j*bar_width for j in range(len(non_sota))],
124
- list(non_sota.values()),
125
- width=bar_width, label=f"{model} Metrics")
126
- if sota:
127
- ax.bar([pos + bar_width*len(non_sota) + j*bar_width for j in range(len(sota))],
128
- list(sota.values()),
129
- width=bar_width, alpha=0.7, label=f"{model} SOTA")
130
 
131
- # formatting
132
- ax.set_xticks([i * (2) + bar_width*(len(non_sota)/2) for i in indices])
133
- ax.set_xticklabels(labels, rotation=45, ha='right')
134
- ax.set_ylabel("Value / Score")
135
- ax.set_title(f"{category} β€” Nexa vs. SOTA")
136
- ax.legend(loc="upper right")
137
- plt.tight_layout()
138
- return fig
139
 
140
- # ─── 4. CALLBACK TO RENDER SECTION ─────────────────────────────────────────────
141
- def show_eval(category):
142
- desc = section_descriptions[category]
143
- df = pd.DataFrame(benchmark_data[category]).T
144
- fig = plot_comparison(category)
145
- return desc, df, fig
146
 
147
- # ─── 5. BUILD GRADIO APP ───────────────────────────────────────────────────────
148
- with gr.Blocks(css="""
149
- body { background-color: #f7f9fc; font-family: Arial, sans-serif; }
150
- .gradio-container { max-width: 900px; margin: auto; }
151
- h1, h2, h3 { color: #333; }
152
- """) as app:
153
- gr.Markdown("# πŸ”¬ Nexa Evals Dashboard")
154
- gr.Markdown("A **comprehensive** SciML benchmark framework. Select a domain to view metrics, compare with SOTA, and explore detailed plots and tables.")
155
 
156
  with gr.Row():
157
- with gr.Column(scale=1):
158
- category = gr.Radio(
159
- choices=list(benchmark_data.keys()),
160
- value="Protein Folding",
161
- label="Select Domain / Model Group"
162
- )
163
- with gr.Column(scale=3):
164
- description = gr.Markdown("")
165
- table = gr.Dataframe(headers=["Metric", "Value"], interactive=False)
166
- plot = gr.Plot()
167
 
168
- category.change(
169
- fn=show_eval,
170
- inputs=category,
171
- outputs=[description, table, plot]
172
- )
173
 
174
- # initialize
175
- description.value, table.value, _ = show_eval("Protein Folding")
176
 
177
- # Launch (on Hugging Face the config flags will be auto-managed)
178
- app.launch()
 
1
  import gradio as gr
2
+ import plotly.graph_objs as go
3
+ import json
4
 
5
+ # Dummy data - replace with real model benchmarks later
6
+ MODEL_EVALS = {
7
+ "Proteins": {
8
+ "Nexa Bio1 (Secondary)": 0.71,
9
+ "Porter6 (Secondary)": 0.8456,
10
+ "DeepCNF (Secondary)": 0.85,
11
+ "AlphaFold2 (Tertiary GDT-TS)": 0.924,
12
+ "Nexa Bio2 (Tertiary)": 0.90,
 
 
 
 
 
 
 
 
 
 
 
13
  },
14
+ "Astro": {
15
+ "Nexa Astro": 0.97,
16
+ "Baseline CNN": 0.89,
 
 
 
 
 
17
  },
18
+ "Materials": {
19
+ "Nexa Materials": 0.9999,
20
+ "Random Forest Baseline": 0.92,
 
 
 
 
 
21
  },
22
+ "QST": {
23
+ "Nexa PIN Model": 0.80,
24
+ "Quantum TomoNet": 0.85,
 
 
 
 
 
25
  },
26
+ "HEP": {
27
+ "Nexa HEP Model": 0.91,
28
+ "CMSNet": 0.94,
 
 
 
 
 
 
29
  },
30
+ "CFD": {
31
+ "Nexa CFD Model": 0.92,
32
+ "FlowNet": 0.89,
 
 
 
 
 
33
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  }
35
 
36
+ def plot_domain(domain):
37
+ models = list(MODEL_EVALS[domain].keys())
38
+ scores = list(MODEL_EVALS[domain].values())
 
 
 
 
39
 
40
+ fig = go.Figure()
41
+ fig.add_trace(go.Bar(x=models, y=scores, marker_color='indigo'))
42
+ fig.update_layout(
43
+ title=f"Model Benchmark Scores β€” {domain}",
44
+ xaxis_title="Model",
45
+ yaxis_title="Score",
46
+ yaxis_range=[0, 1.0],
47
+ template="plotly_white",
48
+ height=500
49
+ )
50
+ return fig
51
 
52
+ def get_model_details(domain):
53
+ return json.dumps(MODEL_EVALS[domain], indent=2)
 
 
 
 
 
 
 
54
 
55
+ def display_eval(domain):
56
+ plot = plot_domain(domain)
57
+ details = get_model_details(domain)
58
+ return plot, details
 
 
 
 
59
 
60
+ domain_list = list(MODEL_EVALS.keys())
 
 
 
 
 
61
 
62
+ with gr.Blocks(title="Nexa Evals β€” Scientific ML Benchmark Suite") as demo:
63
+ gr.Markdown("""
64
+ # πŸ”¬ Nexa Evals
65
+ A benchmarking suite comparing Nexa models against SOTA across scientific domains.
66
+ """)
 
 
 
67
 
68
  with gr.Row():
69
+ domain = gr.Dropdown(domain_list, label="Select Domain")
70
+ show_btn = gr.Button("Run Evaluation")
 
 
 
 
 
 
 
 
71
 
72
+ with gr.Row():
73
+ plot_output = gr.Plot(label="Benchmark Plot")
74
+ metrics_output = gr.Code(label="Raw Scores (JSON)", language="json")
 
 
75
 
76
+ show_btn.click(display_eval, inputs=domain, outputs=[plot_output, metrics_output])
 
77
 
78
+ demo.launch()