Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from my_model.results.demo import ResultDemonstrator | |
| from my_model.config import evaluation_config as config | |
| def run_demo()-> None: | |
| """ | |
| Run the interactive Streamlit demo for visualizing model evaluation results and analysis. | |
| This function initializes the ResultDemonstrator class and sets up an interactive interface | |
| where users can choose to view either evaluation results & analysis or evaluation samples. | |
| Based on the user's selection, different aspects of the evaluation are displayed, such as | |
| main & ablation results, results per question category, or the impact of prompt length on performance. | |
| Returns: | |
| None | |
| """ | |
| demo = ResultDemonstrator() # Instantiate the ResultDemonstrator class | |
| col1, col2 = st.columns([1, 4]) | |
| with col1: | |
| # User selects the evaluation analysis aspect | |
| section_type = st.radio("Select Evaluation Aspect", ["Evaluation Results & Analysis", 'Evaluation Samples']) | |
| # Only show analysis type if the section type is "Evaluation Results & Analysis" | |
| if section_type == "Evaluation Results & Analysis": | |
| analysis_type = st.radio("Select Type", ["Main & Ablation Results", "Results per Question Category", | |
| "Prompt Length (token count) Impact on Performance"], index=2) | |
| if analysis_type == "Prompt Length (token count) Impact on Performance": | |
| # Based on the selection, other options appear | |
| model_name = st.radio("Select Model Size", config.MODEL_NAMES) | |
| score_name = st.radio("Select Score Type", ["VQA Score", "Exact Match"]) | |
| elif section_type == 'Evaluation Samples': | |
| samples_button = st.button("Generate Random Samples") | |
| with col2: | |
| if section_type == "Evaluation Results & Analysis": | |
| if analysis_type == "Prompt Length (token count) Impact on Performance": | |
| for conf in config.MODEL_CONFIGURATIONS: | |
| with st.expander(conf): | |
| demo.plot_token_count_vs_scores(conf, model_name, score_name) | |
| elif analysis_type == "Main & Ablation Results": | |
| demo.display_main_results() | |
| elif analysis_type == "Results per Question Category": | |
| demo.display_ablation_results_per_question_category() | |
| elif section_type == 'Evaluation Samples': | |
| if samples_button: | |
| demo.show_samples(3) |