Spaces:

NIRAJz
/

LMVal-Multi-Metric-LLM-Evaluation

Sleeping

App Files Files Community

NIRAJz commited on Sep 4

Commit

3d4bd94

verified ·

1 Parent(s): 4d6c43c

Update app.py

Browse files

Files changed (1) hide show

app.py +99 -51

app.py CHANGED Viewed

@@ -46,6 +46,43 @@ if "evaluation_params" not in st.session_state:
 if "show_results" not in st.session_state:
     st.session_state.show_results = False
 def run_evaluation_sync(request: EvaluationRequest):
     """Run evaluation synchronously with proper event loop handling"""
     try:
@@ -481,23 +518,6 @@ def build_request_object(questions: List[str], ground_truths: List[str], model_r
     return request
-def read_json_file(uploaded_file):
-    """Read JSON file with proper error handling for Spaces"""
-    try:
-        # For Spaces environment, use file uploader content directly
-        if hasattr(uploaded_file, 'getvalue'):
-            content = uploaded_file.getvalue()
-            if isinstance(content, bytes):
-                content = content.decode('utf-8')
-            return json.loads(content)
-        else:
-            # For local files
-            with open(uploaded_file, 'r', encoding='utf-8') as f:
-                return json.load(f)
-    except Exception as e:
-        st.error(f"Error reading JSON file: {e}")
-        return None
 def main():
     st.title("🤖 LMVal: Multi-Metric LLM Evaluation")
     st.markdown("Advanced RAG pipeline evaluation using LangGraph and Groq/OpenAI")
@@ -639,44 +659,72 @@ def main():
             if uploaded_file is not None:
                 try:
-                    # Use the new file reading function
-                    data = read_json_file(uploaded_file)
-                    if data:
-                        # Handle different JSON structures
-                        if isinstance(data, dict):
-                            # Standard format with separate arrays
-                            questions_list = data.get("questions", [])
-                            truths_list = data.get("ground_truths", [])
-                            responses_list = data.get("model_responses", [])
-                            contexts_list = data.get("contexts", [])
-                        elif isinstance(data, list):
-                            # List of question objects
-                            for item in data:
-                                if isinstance(item, dict):
-                                    questions_list.append(item.get("question", ""))
-                                    truths_list.append(item.get("ground_truth", ""))
-                                    responses_list.append(item.get("model_response", ""))
-                                    contexts_list.append(item.get("context", ""))
-                        if questions_list:
-                            st.success(f"Loaded {len(questions_list)} items from JSON")
-                            # Show preview
-                            with st.expander("Preview loaded data"):
-                                preview_data = {
-                                    "questions": questions_list[:3] + ["..."] if len(questions_list) > 3 else questions_list,
-                                    "ground_truths": truths_list[:3] + ["..."] if len(truths_list) > 3 else truths_list,
-                                    "model_responses": responses_list[:3] + ["..."] if responses_list and len(responses_list) > 3 else responses_list,
-                                    "contexts": contexts_list[:3] + ["..."] if contexts_list and len(contexts_list) > 3 else contexts_list
-                                }
-                                st.json(preview_data)
-                        else:
-                            st.warning("No valid data found in the JSON file")
                 except Exception as e:
                     st.error(f"Error processing JSON file: {e}")
         # Run evaluation button
         run_button = st.button("▶️ Run Evaluation", use_container_width=True,
                               disabled=st.session_state.evaluation_in_progress)
@@ -852,7 +900,7 @@ def main():
                         st.rerun()
             # Clear all history button
-            if st.button("Clear All History ", use_container_width=True, type="secondary"):
                 st.session_state.evaluation_history = []
                 st.success("All history cleared")
                 st.rerun()

 if "show_results" not in st.session_state:
     st.session_state.show_results = False
+def is_running_on_spaces():
+    """Check if we're running on Hugging Face Spaces"""
+    return os.environ.get('SPACES_APP_TYPE') is not None
+def create_sample_data():
+    """Create sample data for demonstration"""
+    return {
+        "questions": [
+            "What is the capital of France?",
+            "How does photosynthesis work?",
+            "What is the theory of relativity?",
+            "What is the main ingredient in guacamole?",
+            "Who developed the theory of relativity?"
+        ],
+        "ground_truths": [
+            "The capital of France is Paris.",
+            "Photosynthesis is the process by which plants convert sunlight into energy.",
+            "The theory of relativity was developed by Albert Einstein.",
+            "The main ingredient in guacamole is avocado.",
+            "Albert Einstein developed the theory of relativity."
+        ],
+        "model_responses": [
+            "Paris is the capital city of France.",
+            "Plants use sunlight to create energy through photosynthesis.",
+            "Einstein developed the theory of relativity.",
+            "The main ingredient in guacamole is tomato.",
+            "Isaac Newton developed the theory of relativity."
+        ],
+        "contexts": [
+            "France is a country in Western Europe with Paris as its capital.",
+            "Photosynthesis is a biological process used by plants to create energy.",
+            "Albert Einstein was a physicist who developed the theory of relativity.",
+            "Guacamole is an avocado-based dip first developed in Mexico.",
+            "Albert Einstein was a German-born theoretical physicist who developed the theory of relativity."
+        ]
+    }
 def run_evaluation_sync(request: EvaluationRequest):
     """Run evaluation synchronously with proper event loop handling"""
     try:
     return request
 def main():
     st.title("🤖 LMVal: Multi-Metric LLM Evaluation")
     st.markdown("Advanced RAG pipeline evaluation using LangGraph and Groq/OpenAI")
             if uploaded_file is not None:
                 try:
+                    # Read content directly from the uploaded file
+                    content = uploaded_file.getvalue()
+                    if isinstance(content, bytes):
+                        content = content.decode('utf-8')
+                    data = json.loads(content)
+                    # Handle different JSON structures
+                    questions_list = []
+                    truths_list = []
+                    responses_list = []
+                    contexts_list = []
+                    if isinstance(data, dict):
+                        # Standard format with separate arrays
+                        questions_list = data.get("questions", [])
+                        truths_list = data.get("ground_truths", [])
+                        responses_list = data.get("model_responses", [])
+                        contexts_list = data.get("contexts", [])
+                    elif isinstance(data, list):
+                        # List of question objects
+                        for item in data:
+                            if isinstance(item, dict):
+                                questions_list.append(item.get("question", ""))
+                                truths_list.append(item.get("ground_truth", ""))
+                                responses_list.append(item.get("model_response", ""))
+                                contexts_list.append(item.get("context", ""))
+                    if questions_list:
+                        st.success(f"Loaded {len(questions_list)} items from JSON")
+                        # Show preview
+                        with st.expander("Preview loaded data"):
+                            preview_data = {
+                                "questions": questions_list[:3] + ["..."] if len(questions_list) > 3 else questions_list,
+                                "ground_truths": truths_list[:3] + ["..."] if len(truths_list) > 3 else truths_list,
+                                "model_responses": responses_list[:3] + ["..."] if responses_list and len(responses_list) > 3 else responses_list,
+                                "contexts": contexts_list[:3] + ["..."] if contexts_list and len(contexts_list) > 3 else contexts_list
+                            }
+                            st.json(preview_data)
+                    else:
+                        st.warning("No valid data found in the JSON file")
                 except Exception as e:
                     st.error(f"Error processing JSON file: {e}")
+        # Add sample data button for Spaces
+        if is_running_on_spaces() and not questions_list:
+            if st.button("📋 Load Sample Data", help="Load sample data for testing"):
+                sample_data = create_sample_data()
+                questions_list = sample_data["questions"]
+                truths_list = sample_data["ground_truths"]
+                responses_list = sample_data["model_responses"]
+                contexts_list = sample_data["contexts"]
+                st.success("Sample data loaded successfully!")
+                # Show preview
+                with st.expander("Preview sample data"):
+                    st.json({
+                        "questions": questions_list,
+                        "ground_truths": truths_list,
+                        "model_responses": responses_list,
+                        "contexts": contexts_list
+                    })
         # Run evaluation button
         run_button = st.button("▶️ Run Evaluation", use_container_width=True,
                               disabled=st.session_state.evaluation_in_progress)
                         st.rerun()
             # Clear all history button
+            if st.button("Clear All History", use_container_width=True, type="secondary"):
                 st.session_state.evaluation_history = []
                 st.success("All history cleared")
                 st.rerun()