Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,178 +1,78 @@
|
|
1 |
import gradio as gr
|
2 |
-
import
|
3 |
-
import
|
4 |
|
5 |
-
#
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
"
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
"TM-score": 0.60,
|
14 |
-
"SOTA_Accuracy (%)": 85,
|
15 |
-
"SOTA_TM-score": 0.75
|
16 |
-
},
|
17 |
-
"Nexa Bio2 (Tertiary)": {
|
18 |
-
"Confidence (%)": 90,
|
19 |
-
"GDT_TS": 0.82,
|
20 |
-
"Entropy Threshold (%)": 80,
|
21 |
-
"SOTA_Confidence (%)": 92,
|
22 |
-
"SOTA_GDT_TS": 0.85
|
23 |
-
},
|
24 |
},
|
25 |
-
"
|
26 |
-
"Nexa Astro":
|
27 |
-
|
28 |
-
"Macro-F1 (%)": 96,
|
29 |
-
"ROC-AUC": 0.98,
|
30 |
-
"SOTA_Accuracy (%)": 96,
|
31 |
-
"SOTA_ROC-AUC": 0.97
|
32 |
-
},
|
33 |
},
|
34 |
-
"Materials
|
35 |
-
"Nexa
|
36 |
-
|
37 |
-
"RMSE (eV)": 0.03,
|
38 |
-
"Bandgap Accuracy (%)": 98,
|
39 |
-
"SOTA_MAE (eV)": 0.03,
|
40 |
-
"SOTA_Bandgap Accuracy (%)": 95
|
41 |
-
},
|
42 |
},
|
43 |
-
"
|
44 |
-
"Nexa
|
45 |
-
|
46 |
-
"Purity": 1.00,
|
47 |
-
"Trace Distance": 0.15,
|
48 |
-
"SOTA_Fidelity": 0.83,
|
49 |
-
"SOTA_Trace Distance": 0.12
|
50 |
-
},
|
51 |
},
|
52 |
-
"
|
53 |
-
"Nexa
|
54 |
-
|
55 |
-
"Energy Conservation Loss": 0.005,
|
56 |
-
"PSNR": 30,
|
57 |
-
"SSIM": 0.88,
|
58 |
-
"SOTA_Relative L2 Error": 0.020,
|
59 |
-
"SOTA_SSIM": 0.85
|
60 |
-
},
|
61 |
},
|
62 |
-
"
|
63 |
-
"Nexa
|
64 |
-
|
65 |
-
"Event Accuracy (%)": 90,
|
66 |
-
"Jet Tagging (%)": 88,
|
67 |
-
"SOTA_ROC-AUC": 0.93,
|
68 |
-
"SOTA_Event Accuracy (%)": 89
|
69 |
-
},
|
70 |
},
|
71 |
-
"LLM Hypothesis & Methodology": {
|
72 |
-
"Nexa MOE": {
|
73 |
-
"Coherence (1β10)": 9.1,
|
74 |
-
"Novelty (1β10)": 8.6,
|
75 |
-
"Utility (1β10)": 8.8,
|
76 |
-
"Expert-Rated SOTA (1β10)": 9.0
|
77 |
-
},
|
78 |
-
},
|
79 |
-
}
|
80 |
-
|
81 |
-
# βββ 2. SECTION DESCRIPTIONS βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
82 |
-
section_descriptions = {
|
83 |
-
"Protein Folding": """**Protein Folding**
|
84 |
-
Benchmarks for secondary (Q3/Q8) and tertiary (TM-score) structure prediction.
|
85 |
-
Nexa Bio1 handles sequenceβsecondary, Nexa Bio2 handles full 3D fold confidence.""",
|
86 |
-
"Astrophysics": """**Astrophysics**
|
87 |
-
Stellar classification and redshift estimation.
|
88 |
-
Metrics: Accuracy, F1, ROC-AUC against SDSS-Net and astroML baselines.""",
|
89 |
-
"Materials Science": """**Materials Science**
|
90 |
-
Property prediction for novel materials (e.g., bandgap, formation energy).
|
91 |
-
Metrics: MAE/RMSE, bandgapβprediction accuracy vs. CGCNN, ALIGNN.""",
|
92 |
-
"Quantum State Tomography": """**Quantum State Tomography**
|
93 |
-
Reconstruct quantum states from measurement data.
|
94 |
-
Metrics: Fidelity, Purity, Trace Distance against PINNs and QuNet.""",
|
95 |
-
"Computational Fluid Dynamics": """**CFD**
|
96 |
-
Flow field prediction (NavierβStokes).
|
97 |
-
Metrics: Relative L2 Error, PSNR/SSIM, Energy Conservation Loss vs. FNO.""",
|
98 |
-
"High-Energy Physics": """**High-Energy Physics**
|
99 |
-
Particle classification and signal/background separation.
|
100 |
-
Metrics: ROC-AUC, event reconstruction accuracy, jet-tagging efficiency.""",
|
101 |
-
"LLM Hypothesis & Methodology": """**LLM-Based Scientific Reasoning**
|
102 |
-
Hypothesis and methodology generation.
|
103 |
-
Metrics scored 1β10 by expert rubric on Coherence, Novelty, and Utility; compared to top academic LLM baselines."""
|
104 |
}
|
105 |
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
fig, ax = plt.subplots(figsize=(7, 4))
|
110 |
-
bar_width = 0.4
|
111 |
-
indices = list(range(len(data)))
|
112 |
-
labels = list(data.keys())
|
113 |
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
|
|
|
|
|
|
|
|
|
|
120 |
|
121 |
-
|
122 |
-
|
123 |
-
ax.bar([pos + j*bar_width for j in range(len(non_sota))],
|
124 |
-
list(non_sota.values()),
|
125 |
-
width=bar_width, label=f"{model} Metrics")
|
126 |
-
if sota:
|
127 |
-
ax.bar([pos + bar_width*len(non_sota) + j*bar_width for j in range(len(sota))],
|
128 |
-
list(sota.values()),
|
129 |
-
width=bar_width, alpha=0.7, label=f"{model} SOTA")
|
130 |
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
ax.set_title(f"{category} β Nexa vs. SOTA")
|
136 |
-
ax.legend(loc="upper right")
|
137 |
-
plt.tight_layout()
|
138 |
-
return fig
|
139 |
|
140 |
-
|
141 |
-
def show_eval(category):
|
142 |
-
desc = section_descriptions[category]
|
143 |
-
df = pd.DataFrame(benchmark_data[category]).T
|
144 |
-
fig = plot_comparison(category)
|
145 |
-
return desc, df, fig
|
146 |
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
""") as app:
|
153 |
-
gr.Markdown("# π¬ Nexa Evals Dashboard")
|
154 |
-
gr.Markdown("A **comprehensive** SciML benchmark framework. Select a domain to view metrics, compare with SOTA, and explore detailed plots and tables.")
|
155 |
|
156 |
with gr.Row():
|
157 |
-
|
158 |
-
|
159 |
-
choices=list(benchmark_data.keys()),
|
160 |
-
value="Protein Folding",
|
161 |
-
label="Select Domain / Model Group"
|
162 |
-
)
|
163 |
-
with gr.Column(scale=3):
|
164 |
-
description = gr.Markdown("")
|
165 |
-
table = gr.Dataframe(headers=["Metric", "Value"], interactive=False)
|
166 |
-
plot = gr.Plot()
|
167 |
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
outputs=[description, table, plot]
|
172 |
-
)
|
173 |
|
174 |
-
|
175 |
-
description.value, table.value, _ = show_eval("Protein Folding")
|
176 |
|
177 |
-
|
178 |
-
app.launch()
|
|
|
1 |
import gradio as gr
|
2 |
+
import plotly.graph_objs as go
|
3 |
+
import json
|
4 |
|
5 |
+
# Dummy data - replace with real model benchmarks later
|
6 |
+
MODEL_EVALS = {
|
7 |
+
"Proteins": {
|
8 |
+
"Nexa Bio1 (Secondary)": 0.71,
|
9 |
+
"Porter6 (Secondary)": 0.8456,
|
10 |
+
"DeepCNF (Secondary)": 0.85,
|
11 |
+
"AlphaFold2 (Tertiary GDT-TS)": 0.924,
|
12 |
+
"Nexa Bio2 (Tertiary)": 0.90,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
},
|
14 |
+
"Astro": {
|
15 |
+
"Nexa Astro": 0.97,
|
16 |
+
"Baseline CNN": 0.89,
|
|
|
|
|
|
|
|
|
|
|
17 |
},
|
18 |
+
"Materials": {
|
19 |
+
"Nexa Materials": 0.9999,
|
20 |
+
"Random Forest Baseline": 0.92,
|
|
|
|
|
|
|
|
|
|
|
21 |
},
|
22 |
+
"QST": {
|
23 |
+
"Nexa PIN Model": 0.80,
|
24 |
+
"Quantum TomoNet": 0.85,
|
|
|
|
|
|
|
|
|
|
|
25 |
},
|
26 |
+
"HEP": {
|
27 |
+
"Nexa HEP Model": 0.91,
|
28 |
+
"CMSNet": 0.94,
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
},
|
30 |
+
"CFD": {
|
31 |
+
"Nexa CFD Model": 0.92,
|
32 |
+
"FlowNet": 0.89,
|
|
|
|
|
|
|
|
|
|
|
33 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
}
|
35 |
|
36 |
+
def plot_domain(domain):
|
37 |
+
models = list(MODEL_EVALS[domain].keys())
|
38 |
+
scores = list(MODEL_EVALS[domain].values())
|
|
|
|
|
|
|
|
|
39 |
|
40 |
+
fig = go.Figure()
|
41 |
+
fig.add_trace(go.Bar(x=models, y=scores, marker_color='indigo'))
|
42 |
+
fig.update_layout(
|
43 |
+
title=f"Model Benchmark Scores β {domain}",
|
44 |
+
xaxis_title="Model",
|
45 |
+
yaxis_title="Score",
|
46 |
+
yaxis_range=[0, 1.0],
|
47 |
+
template="plotly_white",
|
48 |
+
height=500
|
49 |
+
)
|
50 |
+
return fig
|
51 |
|
52 |
+
def get_model_details(domain):
|
53 |
+
return json.dumps(MODEL_EVALS[domain], indent=2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
+
def display_eval(domain):
|
56 |
+
plot = plot_domain(domain)
|
57 |
+
details = get_model_details(domain)
|
58 |
+
return plot, details
|
|
|
|
|
|
|
|
|
59 |
|
60 |
+
domain_list = list(MODEL_EVALS.keys())
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
+
with gr.Blocks(title="Nexa Evals β Scientific ML Benchmark Suite") as demo:
|
63 |
+
gr.Markdown("""
|
64 |
+
# π¬ Nexa Evals
|
65 |
+
A benchmarking suite comparing Nexa models against SOTA across scientific domains.
|
66 |
+
""")
|
|
|
|
|
|
|
67 |
|
68 |
with gr.Row():
|
69 |
+
domain = gr.Dropdown(domain_list, label="Select Domain")
|
70 |
+
show_btn = gr.Button("Run Evaluation")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
|
72 |
+
with gr.Row():
|
73 |
+
plot_output = gr.Plot(label="Benchmark Plot")
|
74 |
+
metrics_output = gr.Code(label="Raw Scores (JSON)", language="json")
|
|
|
|
|
75 |
|
76 |
+
show_btn.click(display_eval, inputs=domain, outputs=[plot_output, metrics_output])
|
|
|
77 |
|
78 |
+
demo.launch()
|
|