Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -160,23 +160,30 @@ with gr.Blocks(css="""
|
|
160 |
border-radius: 8px;
|
161 |
padding: 10px;
|
162 |
}
|
|
|
|
|
|
|
163 |
""") as demo:
|
164 |
gr.Markdown("# Head-to-Head Model Evaluation Comparator")
|
165 |
gr.Markdown("""
|
166 |
-
This demo evaluates two models (or one model with two different configs) on a benchmark dataset.
|
167 |
-
|
168 |
-
Available
|
|
|
|
|
|
|
|
|
169 |
""")
|
170 |
|
171 |
# Dataset Selection Section
|
172 |
-
gr.Markdown("## (A) Select Dataset for
|
173 |
|
174 |
with gr.Row():
|
175 |
dataset_dropdown = gr.Dropdown(
|
176 |
choices=["(Select Dataset)", "MMLU-Pro"],
|
177 |
value="(Select Dataset)",
|
178 |
label="Dataset",
|
179 |
-
info="Select a dataset to perform the Head
|
180 |
)
|
181 |
preview_toggle = gr.Button("Show Preview", interactive=False, variant="secondary")
|
182 |
|
|
|
160 |
border-radius: 8px;
|
161 |
padding: 10px;
|
162 |
}
|
163 |
+
h1 {
|
164 |
+
text-align: center;
|
165 |
+
}
|
166 |
""") as demo:
|
167 |
gr.Markdown("# Head-to-Head Model Evaluation Comparator")
|
168 |
gr.Markdown("""
|
169 |
+
This demo evaluates two models (or one model with two different configs), head-to-head, on a benchmark dataset.
|
170 |
+
|
171 |
+
Available Datasets:
|
172 |
+
- [MMLU-Pro](https://huggingface.co/datasets/TIGER-Lab/MMLU-Pro)
|
173 |
+
|
174 |
+
Available Models:
|
175 |
+
- [Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1)
|
176 |
""")
|
177 |
|
178 |
# Dataset Selection Section
|
179 |
+
gr.Markdown("## (A) Select Dataset for Evaluation")
|
180 |
|
181 |
with gr.Row():
|
182 |
dataset_dropdown = gr.Dropdown(
|
183 |
choices=["(Select Dataset)", "MMLU-Pro"],
|
184 |
value="(Select Dataset)",
|
185 |
label="Dataset",
|
186 |
+
info="Select a dataset to perform the Head-to-Head Evaluation on. Available Datasets: [MMLU-Pro](https://huggingface.co/datasets/TIGER-Lab/MMLU-Pro)"
|
187 |
)
|
188 |
preview_toggle = gr.Button("Show Preview", interactive=False, variant="secondary")
|
189 |
|