Spaces:

MCP-1st-Birthday
/

MudabbirAI

Sleeping

App Files Files Community

youssefleb commited on Nov 22

Commit

762edf7

verified ·

1 Parent(s): a4f27db

Update agent_logic.py

Browse files

Files changed (1) hide show

agent_logic.py +87 -46

agent_logic.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# agent_logic.py (Milestone 5 - FINAL & ROBUST + LOGGING + NATURAL TEXT + ALLOWLIST FILTER)
 import asyncio
 from typing import AsyncGenerator, Dict, Optional
 import json
@@ -12,7 +12,6 @@ import re
 from personas import PERSONAS_DATA
 import config
 from utils import load_prompt
-# Removed extract_json_str as we no longer need to parse the solution
 from mcp_servers import AgentCalibrator, BusinessSolutionEvaluator, get_llm_response
 from self_correction import SelfCorrector
@@ -20,9 +19,6 @@ CLASSIFIER_SYSTEM_PROMPT = load_prompt(config.PROMPT_FILES["classifier"])
 HOMOGENEOUS_MANAGER_PROMPT = load_prompt(config.PROMPT_FILES["manager_homogeneous"])
 HETEROGENEOUS_MANAGER_PROMPT = load_prompt(config.PROMPT_FILES["manager_heterogeneous"])
-# --- METRIC BOUNCER (Allowlist) ---
-# We map any variation of the key to the canonical internal name.
-# If a key isn't in here, it gets dropped.
 METRIC_MAPPING = {
     "novelty": "Novelty",
     "usefulness": "Usefulness_Feasibility",
@@ -42,6 +38,7 @@ class Baseline_Single_Agent:
         self.gemini_client = api_clients.get("Gemini")
     async def solve(self, problem: str, persona_prompt: str):
         if not self.gemini_client: raise ValueError("Single_Agent requires a Google/Gemini client.")
         return await get_llm_response("Gemini", self.gemini_client, persona_prompt, problem)
 class Baseline_Static_Homogeneous:
@@ -55,14 +52,19 @@ class Baseline_Static_Homogeneous:
         user_prompt = f"As an expert Implementer, generate a detailed plan for this problem: {problem}"
         tasks = [get_llm_response(llm, client, system_prompt, user_prompt) for llm, client in self.api_clients.items()]
-        responses = await asyncio.gather(*tasks)
         manager_system_prompt = HOMOGENEOUS_MANAGER_PROMPT
         reports_str = "\n\n".join(f"Report from Team Member {i+1}:\n{resp}" for i, resp in enumerate(responses))
         manager_user_prompt = f"Original Problem: {problem}\n\n{reports_str}\n\nPlease synthesize these reports into one final, comprehensive solution."
-        return await get_llm_response("Gemini", self.gemini_client, manager_system_prompt, manager_user_prompt)
 class Baseline_Static_Heterogeneous:
     def __init__(self, api_clients: dict):
@@ -83,14 +85,19 @@ class Baseline_Static_Heterogeneous:
             user_prompt = f"As the team's '{role}', provide your unique perspective on how to solve this problem: {problem}"
             tasks.append(get_llm_response(llm_name, client, system_prompt, user_prompt))
-        responses = await asyncio.gather(*tasks)
         manager_system_prompt = HETEROGENEOUS_MANAGER_PROMPT
         reports_str = "\n\n".join(f"Report from {team_plan[role]['llm']} (as {role}):\n{resp}" for (role, resp) in zip(team_plan.keys(), responses))
         manager_user_prompt = f"Original Problem: {problem}\n\n{reports_str}\n\nPlease synthesize these specialist reports into one final, comprehensive solution."
-        return await get_llm_response("Gemini", self.gemini_client, manager_system_prompt, manager_user_prompt)
 class StrategicSelectorAgent:
     def __init__(self, api_keys: Dict[str, Optional[str]]):
@@ -124,30 +131,56 @@ class StrategicSelectorAgent:
         if "ERROR:" in CLASSIFIER_SYSTEM_PROMPT: raise FileNotFoundError(CLASSIFIER_SYSTEM_PROMPT)
-    async def _classify_problem(self, problem: str) -> AsyncGenerator[str, None]:
-        yield "Classifying problem archetype (live)..."
-        classification = await get_llm_response("Gemini", self.api_clients["Gemini"], CLASSIFIER_SYSTEM_PROMPT, problem)
-        classification = classification.strip().replace("\"", "")
-        yield f"Diagnosis: {classification}"
     async def solve(self, problem: str) -> AsyncGenerator[str, None]:
-        # --- 1. Initialize Logging ---
         run_id = str(uuid.uuid4())[:8]
         debug_log = {
             "run_id": run_id,
             "timestamp": datetime.datetime.now().isoformat(),
             "problem": problem,
             "classification": "",
-            "trace": []
         }
         try:
-            classification_generator = self._classify_problem(problem)
-            classification = ""
-            async for status_update in classification_generator:
-                yield status_update
-                if "Diagnosis: " in status_update:
-                    classification = status_update.split(": ")[-1]
             debug_log["classification"] = classification
@@ -159,7 +192,7 @@ class StrategicSelectorAgent:
             v_fitness_json = {}
             scores = {}
-            # --- MAIN LOOP (Self-Correction) ---
             for i in range(2):
                 current_problem = problem
                 if i > 0:
@@ -176,20 +209,30 @@ class StrategicSelectorAgent:
                 # --- DEPLOY ---
                 default_persona = PERSONAS_DATA[config.DEFAULT_PERSONA_KEY]["description"]
                 if classification == "Direct_Procedure" or classification == "Holistic_Abstract_Reasoning":
                     if i == 0: yield "Deploying: Baseline Single Agent (Simplicity Hypothesis)..."
-                    solution_draft = await self.single_agent.solve(current_problem, default_persona)
                 elif classification == "Local_Geometric_Procedural":
                     if i == 0: yield "Deploying: Static Homogeneous Team (Expert Anomaly)..."
-                    solution_draft = await self.homo_team.solve(current_problem, default_persona)
                 elif classification == "Cognitive_Labyrinth":
                     if i == 0:
                         yield "Deploying: Static Heterogeneous Team (Cognitive Diversity)..."
-                        team_plan, calibration_errors, calib_details = await self.calibrator.calibrate_team(current_problem)
                         debug_log["trace"].append({
                             "step_type": "calibration",
                             "details": calib_details,
@@ -204,11 +247,16 @@ class StrategicSelectorAgent:
                         yield f"Calibration complete. Best Team: {json.dumps({k: v['llm'] for k, v in team_plan.items()})}"
                         self.current_team_plan = team_plan
-                    solution_draft = await self.hetero_team.solve(current_problem, self.current_team_plan)
                 else:
                     if i == 0: yield f"Diagnosis '{classification}' is unknown. Defaulting to Single Agent."
-                    solution_draft = await self.single_agent.solve(current_problem, default_persona)
                 if "Error generating response" in solution_draft:
                     raise Exception(f"The specialist team failed to generate a solution. Error: {solution_draft}")
@@ -218,32 +266,25 @@ class StrategicSelectorAgent:
                 # --- EVALUATE ---
                 yield "Evaluating draft (live)..."
-                v_fitness_json = await self.evaluator.evaluate(current_problem, solution_draft)
-                # --- Safety Check for List ---
                 if isinstance(v_fitness_json, list):
                     if len(v_fitness_json) > 0 and isinstance(v_fitness_json[0], dict):
                         v_fitness_json = v_fitness_json[0]
                     else:
                         v_fitness_json = {}
-                # --- ROBUST NORMALIZATION WITH ALLOWLIST FILTER ---
                 normalized_fitness = {}
                 if isinstance(v_fitness_json, dict):
                     for k, v in v_fitness_json.items():
-                        # 1. Map fuzzy keys to canonical keys
                         canonical_key = None
                         clean_k = k.lower().strip()
-                        # Check exact match or known variation
-                        if clean_k in METRIC_MAPPING:
-                            canonical_key = METRIC_MAPPING[clean_k]
-                        # If we couldn't map it to a valid metric, SKIP IT.
-                        if not canonical_key:
-                            continue
-                        # 2. Extract Score Value
                         if isinstance(v, dict):
                             score_value = v.get('score')
                             justification_value = v.get('justification', str(v))
@@ -251,14 +292,11 @@ class StrategicSelectorAgent:
                             score_value = v[0].get('score')
                             justification_value = v[0].get('justification', str(v[0]))
                         else:
-                            # Flat value case
                             score_value = v
                             justification_value = "Score extracted directly."
-                        # 3. Clean Score (handle "4/5" strings)
                         if isinstance(score_value, str):
                             try:
-                                # Looks for the first number in the string
                                 match = re.search(r'\d+', score_value)
                                 score_value = int(match.group()) if match else 0
                             except:
@@ -271,7 +309,6 @@ class StrategicSelectorAgent:
                         normalized_fitness[canonical_key] = {'score': score_value, 'justification': justification_value}
                 else:
-                     # Fallback for total failure
                      normalized_fitness = {k: {'score': 0, 'justification': "Invalid JSON structure"} for k in ["Novelty", "Usefulness_Feasibility", "Flexibility", "Elaboration", "Cultural_Appropriateness"]}
                 v_fitness_json = normalized_fitness
@@ -296,6 +333,10 @@ class StrategicSelectorAgent:
                     yield "--- Max correction loops reached. Accepting best effort. ---"
             # --- FINALIZE ---
             await asyncio.sleep(0.5)
             yield "Milestone 5 Complete. Self-Correction loop is live."

+# agent_logic.py (Milestone 5 - FINAL & ROBUST + LOGGING + COST TRACKING)
 import asyncio
 from typing import AsyncGenerator, Dict, Optional
 import json
 from personas import PERSONAS_DATA
 import config
 from utils import load_prompt
 from mcp_servers import AgentCalibrator, BusinessSolutionEvaluator, get_llm_response
 from self_correction import SelfCorrector
 HOMOGENEOUS_MANAGER_PROMPT = load_prompt(config.PROMPT_FILES["manager_homogeneous"])
 HETEROGENEOUS_MANAGER_PROMPT = load_prompt(config.PROMPT_FILES["manager_heterogeneous"])
 METRIC_MAPPING = {
     "novelty": "Novelty",
     "usefulness": "Usefulness_Feasibility",
         self.gemini_client = api_clients.get("Gemini")
     async def solve(self, problem: str, persona_prompt: str):
         if not self.gemini_client: raise ValueError("Single_Agent requires a Google/Gemini client.")
+        # Returns (text, usage)
         return await get_llm_response("Gemini", self.gemini_client, persona_prompt, problem)
 class Baseline_Static_Homogeneous:
         user_prompt = f"As an expert Implementer, generate a detailed plan for this problem: {problem}"
         tasks = [get_llm_response(llm, client, system_prompt, user_prompt) for llm, client in self.api_clients.items()]
+        results = await asyncio.gather(*tasks)
+        responses = [r[0] for r in results]
+        usages = [r[1] for r in results]
         manager_system_prompt = HOMOGENEOUS_MANAGER_PROMPT
         reports_str = "\n\n".join(f"Report from Team Member {i+1}:\n{resp}" for i, resp in enumerate(responses))
         manager_user_prompt = f"Original Problem: {problem}\n\n{reports_str}\n\nPlease synthesize these reports into one final, comprehensive solution."
+        final_text, final_usage = await get_llm_response("Gemini", self.gemini_client, manager_system_prompt, manager_user_prompt)
+        usages.append(final_usage)
+        return final_text, usages
 class Baseline_Static_Heterogeneous:
     def __init__(self, api_clients: dict):
             user_prompt = f"As the team's '{role}', provide your unique perspective on how to solve this problem: {problem}"
             tasks.append(get_llm_response(llm_name, client, system_prompt, user_prompt))
+        results = await asyncio.gather(*tasks)
+        responses = [r[0] for r in results]
+        usages = [r[1] for r in results]
         manager_system_prompt = HETEROGENEOUS_MANAGER_PROMPT
         reports_str = "\n\n".join(f"Report from {team_plan[role]['llm']} (as {role}):\n{resp}" for (role, resp) in zip(team_plan.keys(), responses))
         manager_user_prompt = f"Original Problem: {problem}\n\n{reports_str}\n\nPlease synthesize these specialist reports into one final, comprehensive solution."
+        final_text, final_usage = await get_llm_response("Gemini", self.gemini_client, manager_system_prompt, manager_user_prompt)
+        usages.append(final_usage)
+        return final_text, usages
 class StrategicSelectorAgent:
     def __init__(self, api_keys: Dict[str, Optional[str]]):
         if "ERROR:" in CLASSIFIER_SYSTEM_PROMPT: raise FileNotFoundError(CLASSIFIER_SYSTEM_PROMPT)
+    # Removed unused _classify_problem generator to cleaner code structure in solve()
     async def solve(self, problem: str) -> AsyncGenerator[str, None]:
         run_id = str(uuid.uuid4())[:8]
+        # Initialize Financial Tracking
+        financial_report = {
+            "calibration_cost": 0.0,
+            "generation_cost": 0.0,
+            "total_cost": 0.0,
+            "usage_breakdown": []
+        }
         debug_log = {
             "run_id": run_id,
             "timestamp": datetime.datetime.now().isoformat(),
             "problem": problem,
             "classification": "",
+            "trace": [],
+            "financial_report": financial_report
         }
+        # Helper to add usage and calculate cost
+        def add_usage(usage_list):
+            if isinstance(usage_list, dict): usage_list = [usage_list]
+            current_step_cost = 0.0
+            for u in usage_list:
+                financial_report["usage_breakdown"].append(u)
+                # Lookup pricing
+                model_name = u.get("model", "Gemini")
+                # Default to 0 if model not found in config
+                pricing = config.PRICING.get(model_name, {"input": 0, "output": 0})
+                # Calculate Cost: (Tokens / 1M) * Price
+                cost = (u.get("input", 0) / 1_000_000 * pricing["input"]) + \
+                       (u.get("output", 0) / 1_000_000 * pricing["output"])
+                financial_report["total_cost"] += cost
+                current_step_cost += cost
+            return current_step_cost
         try:
+            yield "Classifying problem archetype (live)..."
+            # Get classification and its usage
+            classification, cls_usage = await get_llm_response("Gemini", self.api_clients["Gemini"], CLASSIFIER_SYSTEM_PROMPT, problem)
+            classification = classification.strip().replace("\"", "")
+            yield f"Diagnosis: {classification}"
+            add_usage(cls_usage)
             debug_log["classification"] = classification
             v_fitness_json = {}
             scores = {}
+            # --- MAIN LOOP ---
             for i in range(2):
                 current_problem = problem
                 if i > 0:
                 # --- DEPLOY ---
                 default_persona = PERSONAS_DATA[config.DEFAULT_PERSONA_KEY]["description"]
+                current_usages = [] # Track usage for this specific generation step
                 if classification == "Direct_Procedure" or classification == "Holistic_Abstract_Reasoning":
                     if i == 0: yield "Deploying: Baseline Single Agent (Simplicity Hypothesis)..."
+                    solution_draft, u = await self.single_agent.solve(current_problem, default_persona)
+                    current_usages.append(u)
                 elif classification == "Local_Geometric_Procedural":
                     if i == 0: yield "Deploying: Static Homogeneous Team (Expert Anomaly)..."
+                    solution_draft, u_list = await self.homo_team.solve(current_problem, default_persona)
+                    current_usages.extend(u_list)
                 elif classification == "Cognitive_Labyrinth":
                     if i == 0:
                         yield "Deploying: Static Heterogeneous Team (Cognitive Diversity)..."
+                        # --- UNPACK 4 VALUES FROM CALIBRATOR ---
+                        # (Plan, Errors, Details, UsageStats)
+                        team_plan, calibration_errors, calib_details, calib_usage = await self.calibrator.calibrate_team(current_problem)
+                        # Track Calibration Cost explicitly
+                        calib_step_cost = add_usage(calib_usage)
+                        financial_report["calibration_cost"] += calib_step_cost
                         debug_log["trace"].append({
                             "step_type": "calibration",
                             "details": calib_details,
                         yield f"Calibration complete. Best Team: {json.dumps({k: v['llm'] for k, v in team_plan.items()})}"
                         self.current_team_plan = team_plan
+                    solution_draft, u_list = await self.hetero_team.solve(current_problem, self.current_team_plan)
+                    current_usages.extend(u_list)
                 else:
                     if i == 0: yield f"Diagnosis '{classification}' is unknown. Defaulting to Single Agent."
+                    solution_draft, u = await self.single_agent.solve(current_problem, default_persona)
+                    current_usages.append(u)
+                # Add generation usage to total
+                add_usage(current_usages)
                 if "Error generating response" in solution_draft:
                     raise Exception(f"The specialist team failed to generate a solution. Error: {solution_draft}")
                 # --- EVALUATE ---
                 yield "Evaluating draft (live)..."
+                v_fitness_json, eval_usage = await self.evaluator.evaluate(current_problem, solution_draft)
+                add_usage(eval_usage)
+                # Safety Check
                 if isinstance(v_fitness_json, list):
                     if len(v_fitness_json) > 0 and isinstance(v_fitness_json[0], dict):
                         v_fitness_json = v_fitness_json[0]
                     else:
                         v_fitness_json = {}
+                # Normalization with Allowlist
                 normalized_fitness = {}
                 if isinstance(v_fitness_json, dict):
                     for k, v in v_fitness_json.items():
                         canonical_key = None
                         clean_k = k.lower().strip()
+                        if clean_k in METRIC_MAPPING: canonical_key = METRIC_MAPPING[clean_k]
+                        if not canonical_key: continue
                         if isinstance(v, dict):
                             score_value = v.get('score')
                             justification_value = v.get('justification', str(v))
                             score_value = v[0].get('score')
                             justification_value = v[0].get('justification', str(v[0]))
                         else:
                             score_value = v
                             justification_value = "Score extracted directly."
                         if isinstance(score_value, str):
                             try:
                                 match = re.search(r'\d+', score_value)
                                 score_value = int(match.group()) if match else 0
                             except:
                         normalized_fitness[canonical_key] = {'score': score_value, 'justification': justification_value}
                 else:
                      normalized_fitness = {k: {'score': 0, 'justification': "Invalid JSON structure"} for k in ["Novelty", "Usefulness_Feasibility", "Flexibility", "Elaboration", "Cultural_Appropriateness"]}
                 v_fitness_json = normalized_fitness
                     yield "--- Max correction loops reached. Accepting best effort. ---"
             # --- FINALIZE ---
+            # Calculate Generation Cost (Total - Calibration)
+            # This captures initial generation + any re-generations + evaluations
+            financial_report["generation_cost"] = financial_report["total_cost"] - financial_report["calibration_cost"]
             await asyncio.sleep(0.5)
             yield "Milestone 5 Complete. Self-Correction loop is live."