Spaces:
Sleeping
Sleeping
Update my_model/tabs/results.py
Browse files- my_model/tabs/results.py +19 -1
my_model/tabs/results.py
CHANGED
@@ -3,9 +3,27 @@ from my_model.results.demo import ResultDemonstrator
|
|
3 |
from my_model.config import evaluation_config as config
|
4 |
|
5 |
|
6 |
-
def run_demo():
|
7 |
"""
|
8 |
Run the interactive Streamlit demo for visualizing model evaluation results and analysis.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
"""
|
10 |
|
11 |
demo = ResultDemonstrator() # Instantiate the ResultDemonstrator class
|
|
|
3 |
from my_model.config import evaluation_config as config
|
4 |
|
5 |
|
6 |
+
def run_demo()-> None:
|
7 |
"""
|
8 |
Run the interactive Streamlit demo for visualizing model evaluation results and analysis.
|
9 |
+
|
10 |
+
This function initializes the ResultDemonstrator class and sets up an interactive interface
|
11 |
+
where users can choose to view either evaluation results & analysis or evaluation samples.
|
12 |
+
Based on the user's selection, different aspects of the evaluation are displayed, such as
|
13 |
+
main & ablation results, results per question category, or the impact of prompt length on performance.
|
14 |
+
|
15 |
+
Interface Elements:
|
16 |
+
- Evaluation Results & Analysis
|
17 |
+
- Main & Ablation Results
|
18 |
+
- Results per Question Category
|
19 |
+
- Prompt Length (token count) Impact on Performance
|
20 |
+
- Select Model Size
|
21 |
+
- Select Score Type
|
22 |
+
- Evaluation Samples
|
23 |
+
- Generate Random Samples
|
24 |
+
|
25 |
+
Returns:
|
26 |
+
None
|
27 |
"""
|
28 |
|
29 |
demo = ResultDemonstrator() # Instantiate the ResultDemonstrator class
|