youssefleb commited on
Commit
3b50dc6
·
verified ·
1 Parent(s): 762edf7

Update visuals.py

Browse files
Files changed (1) hide show
  1. visuals.py +43 -20
visuals.py CHANGED
@@ -10,22 +10,18 @@ def create_progress_chart(log_data):
10
  if not log_data or "trace" not in log_data:
11
  return None
12
 
13
- # Filter for "attempt" steps to get the scores
14
  attempts = [step for step in log_data["trace"] if step["step_type"] == "attempt"]
15
  if not attempts:
16
  return None
17
 
18
- # Define the standard criteria
19
  categories = ["Novelty", "Usefulness_Feasibility", "Flexibility", "Elaboration", "Cultural_Appropriateness"]
20
 
21
  fig = go.Figure()
22
 
23
  for i, attempt in enumerate(attempts):
24
  scores = attempt.get("scores", {})
25
- # Handle cases where scores might be missing (default to 0)
26
  values = [scores.get(cat, 0) for cat in categories]
27
 
28
- # Close the loop for radar chart (repeat first value at the end)
29
  values += [values[0]]
30
  radar_categories = categories + [categories[0]]
31
 
@@ -44,7 +40,7 @@ def create_progress_chart(log_data):
44
  polar=dict(
45
  radialaxis=dict(
46
  visible=True,
47
- range=[0, 5] # Scores are always 0-5
48
  )),
49
  showlegend=True,
50
  title="Evolution of Solution Quality"
@@ -58,24 +54,15 @@ def create_calibration_table(log_data):
58
  if not log_data or "trace" not in log_data:
59
  return None
60
 
61
- # Find the calibration step in the log
62
  calibration_step = next((step for step in log_data["trace"] if step["step_type"] == "calibration"), None)
63
-
64
- # If no calibration happened (e.g. Single Agent mode), return None
65
  if not calibration_step or "details" not in calibration_step:
66
  return None
67
 
68
- # details is a list of dicts: {'role': 'Plant', 'llm': 'Gemini', 'score': {...}}
69
  details = calibration_step["details"]
70
-
71
  data = []
72
  for item in details:
73
  role = item["role"]
74
  model = item["llm"]
75
-
76
- # The 'score' field contains the full evaluation object.
77
- # We need to extract the specific numeric score relevant to that Role.
78
- # Plant -> Novelty, Implementer -> Feasibility, Monitor -> Cultural
79
  score_data = item.get("score", {})
80
 
81
  score = 0
@@ -89,12 +76,48 @@ def create_calibration_table(log_data):
89
 
90
  data.append({"Role": role, "Model": model, "Score": score})
91
 
92
- if not data:
93
- return None
94
 
95
  df = pd.DataFrame(data)
96
-
97
- # Pivot the table: Rows = Roles, Columns = Models, Values = Scores
98
- # This creates a nice comparison matrix
99
  pivot_df = df.pivot(index="Role", columns="Model", values="Score").reset_index()
100
- return pivot_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  if not log_data or "trace" not in log_data:
11
  return None
12
 
 
13
  attempts = [step for step in log_data["trace"] if step["step_type"] == "attempt"]
14
  if not attempts:
15
  return None
16
 
 
17
  categories = ["Novelty", "Usefulness_Feasibility", "Flexibility", "Elaboration", "Cultural_Appropriateness"]
18
 
19
  fig = go.Figure()
20
 
21
  for i, attempt in enumerate(attempts):
22
  scores = attempt.get("scores", {})
 
23
  values = [scores.get(cat, 0) for cat in categories]
24
 
 
25
  values += [values[0]]
26
  radar_categories = categories + [categories[0]]
27
 
 
40
  polar=dict(
41
  radialaxis=dict(
42
  visible=True,
43
+ range=[0, 5]
44
  )),
45
  showlegend=True,
46
  title="Evolution of Solution Quality"
 
54
  if not log_data or "trace" not in log_data:
55
  return None
56
 
 
57
  calibration_step = next((step for step in log_data["trace"] if step["step_type"] == "calibration"), None)
 
 
58
  if not calibration_step or "details" not in calibration_step:
59
  return None
60
 
 
61
  details = calibration_step["details"]
 
62
  data = []
63
  for item in details:
64
  role = item["role"]
65
  model = item["llm"]
 
 
 
 
66
  score_data = item.get("score", {})
67
 
68
  score = 0
 
76
 
77
  data.append({"Role": role, "Model": model, "Score": score})
78
 
79
+ if not data: return None
 
80
 
81
  df = pd.DataFrame(data)
 
 
 
82
  pivot_df = df.pivot(index="Role", columns="Model", values="Score").reset_index()
83
+ return pivot_df
84
+
85
+ def create_cost_summary(log_data):
86
+ """
87
+ Creates a Markdown summary of costs based on usage data found in the log.
88
+ """
89
+ if not log_data or "financial_report" not in log_data:
90
+ return "**No Financial Data Available**"
91
+
92
+ fin = log_data["financial_report"]
93
+ total = fin.get("total_cost", 0)
94
+ calib = fin.get("calibration_cost", 0)
95
+ gen = fin.get("generation_cost", 0)
96
+
97
+ # Calculate Total Tokens
98
+ total_input = sum(u.get("input", 0) for u in fin.get("usage_breakdown", []))
99
+ total_output = sum(u.get("output", 0) for u in fin.get("usage_breakdown", []))
100
+
101
+ # Model Usage Breakdown
102
+ models_used = {}
103
+ for u in fin.get("usage_breakdown", []):
104
+ m = u.get("model", "Unknown")
105
+ models_used[m] = models_used.get(m, 0) + 1
106
+
107
+ model_str = ", ".join([f"{k} ({v} calls)" for k,v in models_used.items()])
108
+
109
+ md = f"""
110
+ ### 💰 Financial Intelligence Report
111
+
112
+ | **Category** | **Cost (USD)** | **Details** |
113
+ | :--- | :--- | :--- |
114
+ | **Total Investment** | **${total:.6f}** | **Total execution cost** |
115
+ | Calibration Phase | ${calib:.6f} | Auditing models to pick the best team |
116
+ | Solution Phase | ${gen:.6f} | Drafting, refining, and judging |
117
+
118
+ ---
119
+ **Operational Metrics:**
120
+ * **Total Tokens:** {total_input + total_output:,} ({total_input:,} in / {total_output:,} out)
121
+ * **Models Deployed:** {model_str}
122
+ """
123
+ return md