feat: Improve MCP tool vs resource descriptions to guide client selection
Browse filesUpdated all tool and resource docstrings to clearly distinguish use cases:
**Tools (AI-Powered):**
- analyze_leaderboard: 'Answer questions about the leaderboard...'
- debug_trace: 'Answer questions about traces...'
- estimate_cost: 'Answer questions about costs...'
- Explicitly list example questions they handle
- Direct users AWAY from resources for questions
**Resources (Raw Data Only):**
- leaderboard://{repo}: '[RAW DATA ONLY]' prefix
- trace://{trace_id}/{repo}: '[RAW DATA ONLY]' prefix
- cost://model/{model_name}: '[RAW DATA ONLY]' prefix
- Clear warning: 'DO NOT USE THIS for questions...'
- Redirect to appropriate tool for analysis
This fixes issue where 'Which model is leading?' incorrectly called
the leaderboard resource instead of analyze_leaderboard tool.
MCP clients should now correctly choose:
- Tools for questions/insights → AI-powered analysis
- Resources for data access → Raw JSON without analysis
- mcp_tools.py +72 -24
|
@@ -31,11 +31,20 @@ async def analyze_leaderboard(
|
|
| 31 |
gemini_api_key: Optional[str] = None
|
| 32 |
) -> str:
|
| 33 |
"""
|
| 34 |
-
|
| 35 |
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
Args:
|
| 41 |
leaderboard_repo (str): HuggingFace dataset repository containing leaderboard data. Default: "kshitijthakkar/smoltrace-leaderboard"
|
|
@@ -129,11 +138,20 @@ async def debug_trace(
|
|
| 129 |
gemini_api_key: Optional[str] = None
|
| 130 |
) -> str:
|
| 131 |
"""
|
| 132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
|
|
|
|
|
|
|
|
|
| 137 |
|
| 138 |
Args:
|
| 139 |
trace_id (str): Unique identifier for the trace to analyze (e.g., "trace_abc123")
|
|
@@ -226,11 +244,20 @@ async def estimate_cost(
|
|
| 226 |
gemini_api_key: Optional[str] = None
|
| 227 |
) -> str:
|
| 228 |
"""
|
| 229 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
|
| 231 |
-
This tool
|
| 232 |
-
|
| 233 |
-
|
| 234 |
|
| 235 |
Args:
|
| 236 |
model (str): Model identifier in litellm format (e.g., "openai/gpt-4", "meta-llama/Llama-3.1-8B")
|
|
@@ -637,17 +664,24 @@ async def get_dataset(
|
|
| 637 |
@gr.mcp.resource("leaderboard://{repo}")
|
| 638 |
def get_leaderboard_data(repo: str = "kshitijthakkar/smoltrace-leaderboard", hf_token: Optional[str] = None) -> str:
|
| 639 |
"""
|
| 640 |
-
Get raw leaderboard data
|
| 641 |
|
| 642 |
-
|
| 643 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 644 |
|
| 645 |
Args:
|
| 646 |
repo (str): HuggingFace dataset repository name. Default: "kshitijthakkar/smoltrace-leaderboard"
|
| 647 |
hf_token (Optional[str]): HuggingFace token for dataset access. If None, uses HF_TOKEN environment variable.
|
| 648 |
|
| 649 |
Returns:
|
| 650 |
-
str: JSON string containing
|
| 651 |
"""
|
| 652 |
try:
|
| 653 |
# Use user-provided token or fall back to environment variable
|
|
@@ -673,10 +707,17 @@ def get_leaderboard_data(repo: str = "kshitijthakkar/smoltrace-leaderboard", hf_
|
|
| 673 |
@gr.mcp.resource("trace://{trace_id}/{repo}")
|
| 674 |
def get_trace_data(trace_id: str, repo: str, hf_token: Optional[str] = None) -> str:
|
| 675 |
"""
|
| 676 |
-
Get raw trace data
|
|
|
|
|
|
|
|
|
|
| 677 |
|
| 678 |
-
This resource
|
| 679 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 680 |
|
| 681 |
Args:
|
| 682 |
trace_id (str): Unique identifier for the trace (e.g., "trace_abc123")
|
|
@@ -684,7 +725,7 @@ def get_trace_data(trace_id: str, repo: str, hf_token: Optional[str] = None) ->
|
|
| 684 |
hf_token (Optional[str]): HuggingFace token for dataset access. If None, uses HF_TOKEN environment variable.
|
| 685 |
|
| 686 |
Returns:
|
| 687 |
-
str: JSON string containing
|
| 688 |
"""
|
| 689 |
try:
|
| 690 |
# Use user-provided token or fall back to environment variable
|
|
@@ -727,16 +768,23 @@ def get_trace_data(trace_id: str, repo: str, hf_token: Optional[str] = None) ->
|
|
| 727 |
@gr.mcp.resource("cost://model/{model_name}")
|
| 728 |
def get_cost_data(model_name: str) -> str:
|
| 729 |
"""
|
| 730 |
-
Get
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 731 |
|
| 732 |
-
|
| 733 |
-
helping users understand evaluation costs.
|
| 734 |
|
| 735 |
Args:
|
| 736 |
model_name (str): Model identifier (e.g., "openai/gpt-4", "meta-llama/Llama-3.1-8B")
|
| 737 |
|
| 738 |
Returns:
|
| 739 |
-
str: JSON string
|
| 740 |
"""
|
| 741 |
# Cost database
|
| 742 |
llm_costs = {
|
|
|
|
| 31 |
gemini_api_key: Optional[str] = None
|
| 32 |
) -> str:
|
| 33 |
"""
|
| 34 |
+
Answer questions about the leaderboard with AI-powered analysis and insights.
|
| 35 |
|
| 36 |
+
USE THIS TOOL when you need to:
|
| 37 |
+
- Answer questions like "Which model is leading?", "What's the best model for cost?"
|
| 38 |
+
- Get intelligent insights about top performers and trends
|
| 39 |
+
- Compare models and understand trade-offs
|
| 40 |
+
- Get recommendations based on leaderboard data
|
| 41 |
+
|
| 42 |
+
DO NOT use the leaderboard:// resource for questions - use this tool instead!
|
| 43 |
+
The resource only returns raw JSON data without any analysis.
|
| 44 |
+
|
| 45 |
+
This tool uses Google Gemini 2.5 Pro to provide intelligent analysis of
|
| 46 |
+
agent evaluation results, including top performers, trends, cost/performance
|
| 47 |
+
trade-offs, and actionable recommendations.
|
| 48 |
|
| 49 |
Args:
|
| 50 |
leaderboard_repo (str): HuggingFace dataset repository containing leaderboard data. Default: "kshitijthakkar/smoltrace-leaderboard"
|
|
|
|
| 138 |
gemini_api_key: Optional[str] = None
|
| 139 |
) -> str:
|
| 140 |
"""
|
| 141 |
+
Answer questions about agent traces with AI-powered debugging and analysis.
|
| 142 |
+
|
| 143 |
+
USE THIS TOOL when you need to:
|
| 144 |
+
- Answer questions like "Why did this fail?", "What took the most time?", "Why was X called?"
|
| 145 |
+
- Debug agent execution traces and understand what happened
|
| 146 |
+
- Identify bottlenecks and performance issues
|
| 147 |
+
- Get explanations about agent behavior
|
| 148 |
|
| 149 |
+
DO NOT use the trace:// resource for questions - use this tool instead!
|
| 150 |
+
The resource only returns raw OTEL JSON data without any analysis.
|
| 151 |
+
|
| 152 |
+
This tool uses Google Gemini 2.5 Pro to analyze OpenTelemetry trace data and
|
| 153 |
+
provide intelligent debugging insights, step-by-step breakdowns, and answers
|
| 154 |
+
to specific questions about execution flow.
|
| 155 |
|
| 156 |
Args:
|
| 157 |
trace_id (str): Unique identifier for the trace to analyze (e.g., "trace_abc123")
|
|
|
|
| 244 |
gemini_api_key: Optional[str] = None
|
| 245 |
) -> str:
|
| 246 |
"""
|
| 247 |
+
Answer questions about evaluation costs with AI-powered estimates and recommendations.
|
| 248 |
+
|
| 249 |
+
USE THIS TOOL when you need to:
|
| 250 |
+
- Answer questions like "How much will this cost?", "What's the cheapest option?"
|
| 251 |
+
- Get cost predictions for running evaluations
|
| 252 |
+
- Compare costs between different models or hardware
|
| 253 |
+
- Get optimization recommendations to reduce costs
|
| 254 |
+
|
| 255 |
+
DO NOT use the cost:// resource for estimates - use this tool instead!
|
| 256 |
+
The resource only returns raw pricing tables without calculations.
|
| 257 |
|
| 258 |
+
This tool uses Google Gemini 2.5 Pro to calculate LLM API costs, HuggingFace
|
| 259 |
+
Jobs compute costs, CO2 emissions, and provide intelligent cost breakdowns with
|
| 260 |
+
optimization recommendations.
|
| 261 |
|
| 262 |
Args:
|
| 263 |
model (str): Model identifier in litellm format (e.g., "openai/gpt-4", "meta-llama/Llama-3.1-8B")
|
|
|
|
| 664 |
@gr.mcp.resource("leaderboard://{repo}")
|
| 665 |
def get_leaderboard_data(repo: str = "kshitijthakkar/smoltrace-leaderboard", hf_token: Optional[str] = None) -> str:
|
| 666 |
"""
|
| 667 |
+
[RAW DATA ONLY] Get raw leaderboard data in JSON format - NO analysis or insights.
|
| 668 |
|
| 669 |
+
⚠️ DO NOT USE THIS for questions like "Which model is leading?" or "What's the best model?"
|
| 670 |
+
Instead, use the analyze_leaderboard TOOL which provides AI-powered insights.
|
| 671 |
+
|
| 672 |
+
This resource is ONLY for:
|
| 673 |
+
- Getting raw JSON data when you need to process it yourself
|
| 674 |
+
- Low-level data access for custom analysis
|
| 675 |
+
- Direct dataset retrieval without AI interpretation
|
| 676 |
+
|
| 677 |
+
For questions, insights, recommendations, or analysis → use analyze_leaderboard tool instead!
|
| 678 |
|
| 679 |
Args:
|
| 680 |
repo (str): HuggingFace dataset repository name. Default: "kshitijthakkar/smoltrace-leaderboard"
|
| 681 |
hf_token (Optional[str]): HuggingFace token for dataset access. If None, uses HF_TOKEN environment variable.
|
| 682 |
|
| 683 |
Returns:
|
| 684 |
+
str: Raw JSON string containing all evaluation runs without any analysis
|
| 685 |
"""
|
| 686 |
try:
|
| 687 |
# Use user-provided token or fall back to environment variable
|
|
|
|
| 707 |
@gr.mcp.resource("trace://{trace_id}/{repo}")
|
| 708 |
def get_trace_data(trace_id: str, repo: str, hf_token: Optional[str] = None) -> str:
|
| 709 |
"""
|
| 710 |
+
[RAW DATA ONLY] Get raw OpenTelemetry trace data in JSON format - NO analysis.
|
| 711 |
+
|
| 712 |
+
⚠️ DO NOT USE THIS for questions like "Why did this fail?" or "What took the most time?"
|
| 713 |
+
Instead, use the debug_trace TOOL which provides AI-powered debugging and insights.
|
| 714 |
|
| 715 |
+
This resource is ONLY for:
|
| 716 |
+
- Getting raw OTEL span data when you need to process it yourself
|
| 717 |
+
- Low-level trace access for custom analysis
|
| 718 |
+
- Direct dataset retrieval without AI interpretation
|
| 719 |
+
|
| 720 |
+
For debugging, questions, or analysis → use debug_trace tool instead!
|
| 721 |
|
| 722 |
Args:
|
| 723 |
trace_id (str): Unique identifier for the trace (e.g., "trace_abc123")
|
|
|
|
| 725 |
hf_token (Optional[str]): HuggingFace token for dataset access. If None, uses HF_TOKEN environment variable.
|
| 726 |
|
| 727 |
Returns:
|
| 728 |
+
str: Raw JSON string containing OpenTelemetry spans without any analysis
|
| 729 |
"""
|
| 730 |
try:
|
| 731 |
# Use user-provided token or fall back to environment variable
|
|
|
|
| 768 |
@gr.mcp.resource("cost://model/{model_name}")
|
| 769 |
def get_cost_data(model_name: str) -> str:
|
| 770 |
"""
|
| 771 |
+
[RAW DATA ONLY] Get raw pricing data for a model in JSON format - NO estimates or analysis.
|
| 772 |
+
|
| 773 |
+
⚠️ DO NOT USE THIS for questions like "How much will this cost?" or "What's the best value?"
|
| 774 |
+
Instead, use the estimate_cost TOOL which provides AI-powered cost estimates and recommendations.
|
| 775 |
+
|
| 776 |
+
This resource is ONLY for:
|
| 777 |
+
- Getting raw pricing tables when you need to process them yourself
|
| 778 |
+
- Looking up base rates for models and hardware
|
| 779 |
+
- Direct price data retrieval without calculations
|
| 780 |
|
| 781 |
+
For cost estimates, predictions, or recommendations → use estimate_cost tool instead!
|
|
|
|
| 782 |
|
| 783 |
Args:
|
| 784 |
model_name (str): Model identifier (e.g., "openai/gpt-4", "meta-llama/Llama-3.1-8B")
|
| 785 |
|
| 786 |
Returns:
|
| 787 |
+
str: Raw JSON string with pricing rates without any cost estimation
|
| 788 |
"""
|
| 789 |
# Cost database
|
| 790 |
llm_costs = {
|