Spaces:

Allanatrix
/

NexaEvals

Running

App Files Files Community

Allanatrix commited on Jun 23

Commit

22b961e

verified ·

1 Parent(s): f587a82

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -158

app.py CHANGED Viewed

@@ -1,178 +1,78 @@
 import gradio as gr
-import pandas as pd
-import matplotlib.pyplot as plt
-# ─── 1. BENCHMARK DATA ──────────────────────────────────────────────────────────
-# Nested dict: Domain → { Model Name → {metric_name: value, …, "SOTA_<metric>": value } }
-benchmark_data = {
-    "Protein Folding": {
-        "Nexa Bio1 (Secondary)": {
-            "Accuracy (%)": 71,
-            "Q3 (%)": 65,
-            "Q8 (%)": 55,
-            "TM-score": 0.60,
-            "SOTA_Accuracy (%)": 85,
-            "SOTA_TM-score": 0.75
-        },
-        "Nexa Bio2 (Tertiary)": {
-            "Confidence (%)": 90,
-            "GDT_TS": 0.82,
-            "Entropy Threshold (%)": 80,
-            "SOTA_Confidence (%)": 92,
-            "SOTA_GDT_TS": 0.85
-        },
     },
-    "Astrophysics": {
-        "Nexa Astro": {
-            "Accuracy (%)": 97,
-            "Macro-F1 (%)": 96,
-            "ROC-AUC": 0.98,
-            "SOTA_Accuracy (%)": 96,
-            "SOTA_ROC-AUC": 0.97
-        },
     },
-    "Materials Science": {
-        "Nexa MatSci": {
-            "MAE (eV)": 0.02,
-            "RMSE (eV)": 0.03,
-            "Bandgap Accuracy (%)": 98,
-            "SOTA_MAE (eV)": 0.03,
-            "SOTA_Bandgap Accuracy (%)": 95
-        },
     },
-    "Quantum State Tomography": {
-        "Nexa QST": {
-            "Fidelity": 0.80,
-            "Purity": 1.00,
-            "Trace Distance": 0.15,
-            "SOTA_Fidelity": 0.83,
-            "SOTA_Trace Distance": 0.12
-        },
     },
-    "Computational Fluid Dynamics": {
-        "Nexa CFD": {
-            "Relative L2 Error": 0.015,
-            "Energy Conservation Loss": 0.005,
-            "PSNR": 30,
-            "SSIM": 0.88,
-            "SOTA_Relative L2 Error": 0.020,
-            "SOTA_SSIM": 0.85
-        },
     },
-    "High-Energy Physics": {
-        "Nexa HEP": {
-            "ROC-AUC": 0.92,
-            "Event Accuracy (%)": 90,
-            "Jet Tagging (%)": 88,
-            "SOTA_ROC-AUC": 0.93,
-            "SOTA_Event Accuracy (%)": 89
-        },
     },
-    "LLM Hypothesis & Methodology": {
-        "Nexa MOE": {
-            "Coherence (1–10)": 9.1,
-            "Novelty (1–10)": 8.6,
-            "Utility (1–10)": 8.8,
-            "Expert-Rated SOTA (1–10)": 9.0
-        },
-    },
-}
-# ─── 2. SECTION DESCRIPTIONS ───────────────────────────────────────────────────
-section_descriptions = {
-    "Protein Folding": """**Protein Folding**
-Benchmarks for secondary (Q3/Q8) and tertiary (TM-score) structure prediction.
-Nexa Bio1 handles sequence→secondary, Nexa Bio2 handles full 3D fold confidence.""",
-    "Astrophysics": """**Astrophysics**
-Stellar classification and redshift estimation.
-Metrics: Accuracy, F1, ROC-AUC against SDSS-Net and astroML baselines.""",
-    "Materials Science": """**Materials Science**
-Property prediction for novel materials (e.g., bandgap, formation energy).
-Metrics: MAE/RMSE, bandgap‐prediction accuracy vs. CGCNN, ALIGNN.""",
-    "Quantum State Tomography": """**Quantum State Tomography**
-Reconstruct quantum states from measurement data.
-Metrics: Fidelity, Purity, Trace Distance against PINNs and QuNet.""",
-    "Computational Fluid Dynamics": """**CFD**
-Flow field prediction (Navier–Stokes).
-Metrics: Relative L2 Error, PSNR/SSIM, Energy Conservation Loss vs. FNO.""",
-    "High-Energy Physics": """**High-Energy Physics**
-Particle classification and signal/background separation.
-Metrics: ROC-AUC, event reconstruction accuracy, jet-tagging efficiency.""",
-    "LLM Hypothesis & Methodology": """**LLM-Based Scientific Reasoning**
-Hypothesis and methodology generation.
-Metrics scored 1–10 by expert rubric on Coherence, Novelty, and Utility; compared to top academic LLM baselines."""
 }
-# ─── 3. PLOTTING FUNCTION ────────────────────────────────────────────────────────
-def plot_comparison(category):
-    data = benchmark_data[category]
-    fig, ax = plt.subplots(figsize=(7, 4))
-    bar_width = 0.4
-    indices = list(range(len(data)))
-    labels = list(data.keys())
-    # collect metrics that aren’t SOTA
-    for i, model in enumerate(labels):
-        metrics = data[model]
-        # extract non-SOTA metrics
-        non_sota = {k: v for k, v in metrics.items() if not k.startswith("SOTA")}
-        sota = {k.replace("SOTA_", ""): v for k, v in metrics.items() if k.startswith("SOTA")}
-        # bar positions
-        pos = i * 2
-        ax.bar([pos + j*bar_width for j in range(len(non_sota))],
-               list(non_sota.values()),
-               width=bar_width, label=f"{model} Metrics")
-        if sota:
-            ax.bar([pos + bar_width*len(non_sota) + j*bar_width for j in range(len(sota))],
-                   list(sota.values()),
-                   width=bar_width, alpha=0.7, label=f"{model} SOTA")
-    # formatting
-    ax.set_xticks([i * (2) + bar_width*(len(non_sota)/2) for i in indices])
-    ax.set_xticklabels(labels, rotation=45, ha='right')
-    ax.set_ylabel("Value / Score")
-    ax.set_title(f"{category} — Nexa vs. SOTA")
-    ax.legend(loc="upper right")
-    plt.tight_layout()
-    return fig
-# ─── 4. CALLBACK TO RENDER SECTION ─────────────────────────────────────────────
-def show_eval(category):
-    desc = section_descriptions[category]
-    df = pd.DataFrame(benchmark_data[category]).T
-    fig = plot_comparison(category)
-    return desc, df, fig
-# ─── 5. BUILD GRADIO APP ───────────────────────────────────────────────────────
-with gr.Blocks(css="""
-    body { background-color: #f7f9fc; font-family: Arial, sans-serif; }
-    .gradio-container { max-width: 900px; margin: auto; }
-    h1, h2, h3 { color: #333; }
-""") as app:
-    gr.Markdown("# 🔬 Nexa Evals Dashboard")
-    gr.Markdown("A **comprehensive** SciML benchmark framework. Select a domain to view metrics, compare with SOTA, and explore detailed plots and tables.")
     with gr.Row():
-        with gr.Column(scale=1):
-            category = gr.Radio(
-                choices=list(benchmark_data.keys()),
-                value="Protein Folding",
-                label="Select Domain / Model Group"
-            )
-        with gr.Column(scale=3):
-            description = gr.Markdown("")
-            table = gr.Dataframe(headers=["Metric", "Value"], interactive=False)
-            plot = gr.Plot()
-    category.change(
-        fn=show_eval,
-        inputs=category,
-        outputs=[description, table, plot]
-    )
-    # initialize
-    description.value, table.value, _ = show_eval("Protein Folding")
-# Launch (on Hugging Face the config flags will be auto-managed)
-app.launch()

 import gradio as gr
+import plotly.graph_objs as go
+import json
+# Dummy data - replace with real model benchmarks later
+MODEL_EVALS = {
+    "Proteins": {
+        "Nexa Bio1 (Secondary)": 0.71,
+        "Porter6 (Secondary)": 0.8456,
+        "DeepCNF (Secondary)": 0.85,
+        "AlphaFold2 (Tertiary GDT-TS)": 0.924,
+        "Nexa Bio2 (Tertiary)": 0.90,
     },
+    "Astro": {
+        "Nexa Astro": 0.97,
+        "Baseline CNN": 0.89,
     },
+    "Materials": {
+        "Nexa Materials": 0.9999,
+        "Random Forest Baseline": 0.92,
     },
+    "QST": {
+        "Nexa PIN Model": 0.80,
+        "Quantum TomoNet": 0.85,
     },
+    "HEP": {
+        "Nexa HEP Model": 0.91,
+        "CMSNet": 0.94,
     },
+    "CFD": {
+        "Nexa CFD Model": 0.92,
+        "FlowNet": 0.89,
     },
 }
+def plot_domain(domain):
+    models = list(MODEL_EVALS[domain].keys())
+    scores = list(MODEL_EVALS[domain].values())
+    fig = go.Figure()
+    fig.add_trace(go.Bar(x=models, y=scores, marker_color='indigo'))
+    fig.update_layout(
+        title=f"Model Benchmark Scores — {domain}",
+        xaxis_title="Model",
+        yaxis_title="Score",
+        yaxis_range=[0, 1.0],
+        template="plotly_white",
+        height=500
+    )
+    return fig
+def get_model_details(domain):
+    return json.dumps(MODEL_EVALS[domain], indent=2)
+def display_eval(domain):
+    plot = plot_domain(domain)
+    details = get_model_details(domain)
+    return plot, details
+domain_list = list(MODEL_EVALS.keys())
+with gr.Blocks(title="Nexa Evals — Scientific ML Benchmark Suite") as demo:
+    gr.Markdown("""
+    # 🔬 Nexa Evals
+    A benchmarking suite comparing Nexa models against SOTA across scientific domains.
+    """)
     with gr.Row():
+        domain = gr.Dropdown(domain_list, label="Select Domain")
+        show_btn = gr.Button("Run Evaluation")
+    with gr.Row():
+        plot_output = gr.Plot(label="Benchmark Plot")
+        metrics_output = gr.Code(label="Raw Scores (JSON)", language="json")
+    show_btn.click(display_eval, inputs=domain, outputs=[plot_output, metrics_output])
+demo.launch()