Spaces:
Sleeping
Sleeping
Update my_model/tabs/results.py
Browse files- my_model/tabs/results.py +19 -1
my_model/tabs/results.py
CHANGED
|
@@ -3,9 +3,27 @@ from my_model.results.demo import ResultDemonstrator
|
|
| 3 |
from my_model.config import evaluation_config as config
|
| 4 |
|
| 5 |
|
| 6 |
-
def run_demo():
|
| 7 |
"""
|
| 8 |
Run the interactive Streamlit demo for visualizing model evaluation results and analysis.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
"""
|
| 10 |
|
| 11 |
demo = ResultDemonstrator() # Instantiate the ResultDemonstrator class
|
|
|
|
| 3 |
from my_model.config import evaluation_config as config
|
| 4 |
|
| 5 |
|
| 6 |
+
def run_demo()-> None:
|
| 7 |
"""
|
| 8 |
Run the interactive Streamlit demo for visualizing model evaluation results and analysis.
|
| 9 |
+
|
| 10 |
+
This function initializes the ResultDemonstrator class and sets up an interactive interface
|
| 11 |
+
where users can choose to view either evaluation results & analysis or evaluation samples.
|
| 12 |
+
Based on the user's selection, different aspects of the evaluation are displayed, such as
|
| 13 |
+
main & ablation results, results per question category, or the impact of prompt length on performance.
|
| 14 |
+
|
| 15 |
+
Interface Elements:
|
| 16 |
+
- Evaluation Results & Analysis
|
| 17 |
+
- Main & Ablation Results
|
| 18 |
+
- Results per Question Category
|
| 19 |
+
- Prompt Length (token count) Impact on Performance
|
| 20 |
+
- Select Model Size
|
| 21 |
+
- Select Score Type
|
| 22 |
+
- Evaluation Samples
|
| 23 |
+
- Generate Random Samples
|
| 24 |
+
|
| 25 |
+
Returns:
|
| 26 |
+
None
|
| 27 |
"""
|
| 28 |
|
| 29 |
demo = ResultDemonstrator() # Instantiate the ResultDemonstrator class
|