Update app.py
Browse files
app.py
CHANGED
@@ -130,19 +130,13 @@ def create_leaderboard():
|
|
130 |
# Create Gradio interface with a nice theme
|
131 |
with gr.Blocks(theme=gr.themes.Soft(), title="Financial Model Performance Leaderboard") as demo:
|
132 |
gr.Markdown(
|
133 |
-
"""<div style="text-align: center;"><h1>Financial <span style='color: #e6b800;'>
|
134 |
<br>\
|
135 |
<p>Inspired by the <a href="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard">π€ Open LLM Leaderboard</a> and <a href="https://huggingface.co/spaces/optimum/llm-perf-leaderboard">π€ Open LLM-Perf Leaderboard ποΈ</a>, we evaluate model performance using <a href="https://huggingface.co/papers/2502.06329">FailSafe Long Context QA</a>. This evaluation leverages the <a href="https://huggingface.co/datasets/Writer/FailSafeQA">FailSafeQA dataset</a> to assess how well models handle long-context question answering, ensuring robust and reliable performance in extended-context scenarios.</p>
|
136 |
""",
|
137 |
elem_classes="markdown-text",
|
138 |
)
|
139 |
-
|
140 |
-
<div style="padding: 20px;">
|
141 |
-
<h2>About This Leaderboard</h2>
|
142 |
-
<p>This Financial Model Performance Leaderboard compares the performance of various AI models across robustness and context grounding metrics. The data is sourced from evaluations conducted on February 18, 2025, and reflects the models' ability to handle financial tasks under different conditions.</p>
|
143 |
-
<p>For more information, contact us at <a href="mailto:[email protected]">[email protected]</a>.</p>
|
144 |
-
</div>
|
145 |
-
""")
|
146 |
|
147 |
with gr.Row():
|
148 |
with gr.Column():
|
|
|
130 |
# Create Gradio interface with a nice theme
|
131 |
with gr.Blocks(theme=gr.themes.Soft(), title="Financial Model Performance Leaderboard") as demo:
|
132 |
gr.Markdown(
|
133 |
+
"""<div style="text-align: center;"><h1>Financial <span style='color: #e6b800;'>Models</span> <span style='color: #e6b800;'> Performance Leaderboard</span></h1></div>\
|
134 |
<br>\
|
135 |
<p>Inspired by the <a href="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard">π€ Open LLM Leaderboard</a> and <a href="https://huggingface.co/spaces/optimum/llm-perf-leaderboard">π€ Open LLM-Perf Leaderboard ποΈ</a>, we evaluate model performance using <a href="https://huggingface.co/papers/2502.06329">FailSafe Long Context QA</a>. This evaluation leverages the <a href="https://huggingface.co/datasets/Writer/FailSafeQA">FailSafeQA dataset</a> to assess how well models handle long-context question answering, ensuring robust and reliable performance in extended-context scenarios.</p>
|
136 |
""",
|
137 |
elem_classes="markdown-text",
|
138 |
)
|
139 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
140 |
|
141 |
with gr.Row():
|
142 |
with gr.Column():
|