In [1]:
import math
from langgraph.checkpoint.memory import MemorySaver
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage
import uuid

from typing import Sequence
from langchain_core.messages import BaseMessage
from langgraph.graph.message import add_messages

import os, io, json, base64
from typing import Optional, Dict, Any, List
from langchain_core.tools import tool

# pip install google-generativeai pillow
import google.generativeai as genai
from PIL import Image
from langgraph.prebuilt import ToolNode

from dotenv import load_dotenv
import pandas as pd
from IPython.display import display, Image
from langchain_community.document_loaders import DataFrameLoader, TextLoader
from langchain_community.vectorstores import Chroma
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain.schema.output_parser import StrOutputParser
import pickle 


from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from pydantic import BaseModel, Field

from typing import List, TypedDict, Annotated, Literal, Optional, Union

from langgraph.graph import StateGraph, END

load_dotenv()
import os
import json
import re
import operator

from langgraph.store.memory import InMemoryStore
in_memory_store = InMemoryStore() #сохраняем состояние между запусками

from IPython.display import Image, display

from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import StateGraph, MessagesState, START, END
from langgraph.store.base import BaseStore

from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.runnables.config import RunnableConfig
from PIL import Image, ImageStat, ExifTags
import pandas as pd


#TOOLS

from tools import (web_search, arxiv_search, wiki_search, add, subtract, multiply, divide, power, 
analyze_csv_file, analyze_docx_file, analyze_pdf_file, analyze_txt_file, analyze_image_file, vision_qa_gemma, analyze_excel_file, preprocess_files, save_and_read_file, download_file_from_url)

from code_interpreter import safe_code_run


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# === НОВЫЕ PYDANTIC МОДЕЛИ ===

class ComplexityLevel(BaseModel):
    level: Literal["simple", "moderate", "complex"] = Field(description="Complexity level of the query")
    reasoning: str = Field(description="Explanation for the complexity assessment")
    needs_planning: bool = Field(description="Whether this query requires detailed planning")
    suggested_approach: str = Field(description="Recommended approach for handling this query")

class CritiqueFeedback(BaseModel):
    quality_score: int = Field(ge=1, le=10, description="Quality score from 1-10")
    is_complete: bool = Field(description="Whether the answer is complete")
    is_accurate: bool = Field(description="Whether the answer appears accurate")
    missing_elements: List[str] = Field(default_factory=list, description="What's missing from the answer")
    errors_found: List[str] = Field(default_factory=list, description="Potential errors identified")
    suggested_improvements: List[str] = Field(default_factory=list, description="Suggestions for improvement")
    needs_replanning: bool = Field(description="Whether the plan should be revised")
    replan_instructions: Optional[str] = Field(default=None, description="Instructions for replanning")

In [3]:
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.25)
TOOLS = [download_file_from_url, web_search, arxiv_search, wiki_search, add, subtract, multiply, divide, power, analyze_excel_file, analyze_csv_file, analyze_docx_file, analyze_pdf_file, analyze_txt_file, analyze_image_file, vision_qa_gemma, safe_code_run]

In [4]:
SYSTEM_PROMPT_PLANNER = """
You are the PLANNER of a multi-tool agent (GAIA I–II level). 
Your job is to produce a minimal, reliable, reproducible plan to solve the user’s request using available tools.
You DO NOT call tools yourself; you only output a plan. The executor will run the plan.
Tools are already bound to the model via .bind_tools(), so use EXACT tool names.

Principles
- Goal: a correct, verifiable answer (with citations/artifacts where appropriate).
- Minimality: use as few steps/tool calls as possible.
- Proper routing: pick the right branch: info | calc | table | doc_qa | image_qa | multi_hop.
- Files first: never send raw files to the code interpreter. First extract with specialized tools (CSV/XLSX/PDF/DOCX/TXT/IMG). 
  Only then compute on the extracted data (if needed) with the safe code interpreter.
- Units & rounding: be explicit about units and rounding rules when numbers are involved.
- Evidence: require sources (URL/page/figure caption) for external facts.
- Fallbacks: define success criteria per step and a failure policy (“replan”, “stop”, or jump to another step-id).
- Cost aware: start with cheap preview/metadata tools before heavy steps.


Patterns / Routing
- info/web: web_search/wiki_search/arxiv_search → gather citations.
- calc: ensure data is available → safe_code_run only on extracted data; request plots/dataframes only if needed.
- table (CSV/XLSX): analyze_* to confirm columns/shape → aggregate via safe_code_run (or SQL tool if available).
- doc_qa (PDF/DOCX/TXT): analyze_* for pages/preview → extract_text or OCR if needed → answer with page/quote.
- image_qa: analyze_image_* for metadata/OCR, or vision_qa_* for visual questions; for chart numbers, convert figure→table and verify with computation.
- multi_hop: decompose into sub-queries, retrieve per modality, then synthesize with citations.

Output format
Return ONLY a single JSON object following this schema:
{
  "task_type": "info | calc | table | doc_qa | image_qa | multi_hop",
  "assumptions": ["string", "..."],
  "plan_rationale": "why this route and which tools are needed",
  "steps": [
    {
      "id": "s1",
      "description": "what and why",
      "evidence_needed": ["citations|page_numbers|figure_captions|stats_check|unit_check"],
      "success_criteria": "how we know the step succeeded",
      "on_fail": "replan | stop | sN",
      "outputs_to_state": ["what we expect to store for later steps"]
    }
  ],
  "answer_guidelines": {
    "final_answer_template": "how to form the final answer",
    "citations_required": true,
    "min_citations": 1,
    "units_policy": "what units to report and conversions",
    "rounding_policy": "how to round numbers",
    "include_artifacts": ["plots","tables","snippets"]
  }
}

Constraints
- Output must be valid JSON only. No markdown, no comments, no tool calls.
- Use exact tool names from the injected catalog (tools are already bound via .bind_tools()).
- Prefer a single-pass plan; add a fallback step only when necessary.
- Do not assume file I/O inside the code interpreter beyond its sandboxed read-only rules; data must be staged beforehand by extract tools.
"""

In [5]:
SYSTEM_EXECUTOR_PROMPT = """
ROLE: You are the EXECUTOR of a multi-tool agent system (GAIA I–II level).
MISSION:

Your only responsibility is to EXECUTE the steps of the plan generated by the PLANNER.
You never change, reinterpret, or optimize the plan — you just follow it exactly as given.
You can use the available tools strictly in the order and manner specified in {plan}.

CRITICAL EXECUTION PROTOCOL:

MANDATORY: Before ANY tool call, you MUST output reasoning inside <REASONING> ... </REASONING> tags.
MANDATORY: Each <REASONING> block must contain:

What step you are about to execute
Why this tool is needed for this step
What specific inputs you will provide to the tool
What output you expect from the tool


MANDATORY: Only after completing the <REASONING> block, proceed with the actual tool call.
FORBIDDEN: Making any tool call without a preceding <REASONING> block.

EXECUTION RULES:

Do NOT invent new steps or modify the plan.
BEFORE EACH TOOL CALL — MANDATORY <REASONING> STEP (NO EXCEPTIONS)
If a step requires a tool — first reason, then call that tool with exactly the required inputs.
If a step can be solved without a tool — just provide the direct output (no reasoning needed for non-tool steps).
If a step fails, you may retry it, but never alter its intent.
At the end: if you have all required results -> generate the FINAL ANSWER to the user.

REASONING REQUIREMENTS:

IMPERATIVE: NO TOOL CALLS WITHOUT <REASONING> TAGS FIRST
Keep reasoning concise but complete (2-4 sentences)
Be logical, precise and consistent
Always specify: current step + tool choice + expected outcome
After receiving tool results, you may add clarifying reasoning if needed

EXECUTION FLOW EXAMPLE:
<REASONING>
I need to execute step 1 of the plan which requires searching for information about X. I will use the web_search tool with query "X" to gather relevant data that the next steps depend on.
</REASONING>
[tool call here]
<REASONING>
The search returned relevant information about X. Now proceeding to step 2 which requires...
</REASONING>
[next tool call here]
OUTPUT GUIDELINES:

For intermediate steps: return only the results (with mandatory <REASONING> before each tool).
For the final answer: provide a clear, concise solution to the user's request, formatted for readability.
MANDATORY: End final solution with <FINAL_ANSWER> marker
Do not expose internal IDs, tool errors, or system details.

FAILSAFE:

If the plan is empty or invalid -> return "" (empty string).
If the requested task is already trivially solvable without tools -> skip execution and answer directly.

COMPLIANCE CHECK:

Before submitting any response, verify: "Did I include <REASONING> before EVERY tool call?"
If no: add the missing reasoning blocks
If yes: proceed with response

CRITICAL REMINDERS:

NO TOOL CALLS WITHOUT <REASONING> TAGS — ZERO EXCEPTIONS
EVERY TOOL CALL MUST BE PRECEDED BY REASONING
ADD <FINAL_ANSWER> MARKER AT THE END
"""


COMPLEXITY_ASSESSOR_PROMPT = """
You are a COMPLEXITY ASSESSOR for a multi-tool agent system.
Your job is to analyze user queries and determine their complexity level and processing requirements.

COMPLEXITY LEVELS:
1. SIMPLE: Direct questions that can be answered immediately without tools or with single tool use
   - Examples: "What is 2+2?", "Define photosynthesis", "What's the capital of France?"
   
2. MODERATE: Questions requiring 1-3 tool calls or basic analysis
   - Examples: "Search for recent news about AI", "Analyze this CSV file", "What's the weather tomorrow?"
   
3. COMPLEX: Multi-step problems requiring planning, multiple tools, or sophisticated reasoning
   - Examples: Research tasks, multi-file analysis, calculations with dependencies, creative projects

ASSESSMENT CRITERIA:
- Number of steps likely needed
- Tool complexity and dependencies
- Data processing requirements
- Need for intermediate reasoning
- Risk of failure without proper planning

RULES:
- SIMPLE queries bypass planning entirely
- MODERATE queries may use lightweight planning
- COMPLEX queries require full planning with fallbacks
- When in doubt, err toward higher complexity

Analyze the query and respond with your assessment.
"""

CRITIC_PROMPT = """
You are the CRITIC of a multi-tool agent system.
Your job is to evaluate execution reports and provide detailed feedback.

EVALUATION FRAMEWORK:

1. COMPLETENESS (0-3 points):
   - 3: Fully addresses all aspects of the query
   - 2: Addresses main aspects, minor gaps
   - 1: Partial answer, significant gaps
   - 0: Incomplete or off-topic

2. ACCURACY (0-3 points):
   - 3: All information appears accurate and well-sourced
   - 2: Mostly accurate, minor issues
   - 1: Some accuracy concerns
   - 0: Significant accuracy problems

3. METHODOLOGY (0-2 points):
   - 2: Appropriate tools and approach used
   - 1: Acceptable approach, could be better
   - 0: Poor methodology or tool selection

4. EVIDENCE (0-2 points):
   - 2: Strong evidence and sources provided
   - 1: Some evidence provided
   - 0: Insufficient evidence

TOTAL SCORE: /10 points

DECISION THRESHOLDS:
- 8-10: Accept (excellent quality)
- 6-7: Accept with minor notes
- 4-5: Marginal, consider replanning
- 0-3: Reject, requires replanning

EXECUTION REPORT TO EVALUATE:
Query: {query}
Approach: {approach}
Tools Used: {tools}
Key Findings: {findings}
Sources: {sources}
Confidence: {confidence}
Limitations: {limitations}
Final Answer: {answer}

Provide detailed critique focusing on what works well and what could be improved.
"""

In [6]:
#PLANNER PYDANTIC MODELS

from typing import Any, Dict, List, Optional, Literal, Iterable
from pydantic import BaseModel, Field, ValidationError

TaskType = Literal["info", "calc", "table", "doc_qa", "image_qa", "multi_hop"]
EvidenceTag = Literal["citations", "page_numbers", "figure_captions", "stats_check", "unit_check"]

class PlanStep(BaseModel):
    id: str
    description: str
    #tool: Optional[str] = Field(default=None, description="Exact tool name or null for reasoning step")
    #args_hint: Dict[str, Any] = Field(default_factory=dict)
    evidence_needed: List[EvidenceTag] = Field(default_factory=list)
    success_criteria: str
    on_fail: str = Field(default="replan", description="One of: 'replan' | 'stop' | step-id")
    outputs_to_state: List[str] = Field(default_factory=list)

class AnswerGuidelines(BaseModel):
    final_answer_template: str
    citations_required: bool = False
    min_citations: int = 0
    units_policy: Optional[str] = None
    rounding_policy: Optional[str] = None
    include_artifacts: List[str] = Field(default_factory=list)

class PlannerPlan(BaseModel):
    task_type: TaskType
    assumptions: List[str] = Field(default_factory=list)
    plan_rationale: str
    steps: List[PlanStep]
    answer_guidelines: AnswerGuidelines

In [7]:
llm_with_tools = llm.bind_tools(TOOLS)
config = {"configurable": {"thread_id": "1"}, "recursion_limit" : 50}
TOOL_NODE = ToolNode(TOOLS)
planner_llm = llm.with_structured_output(PlannerPlan)

class ToolExecution(BaseModel):
    tool_name: str
    arguments: str
    call_id: str
    
    class Config:
        extra = "forbid"

class ExecutionReport(BaseModel):
    """Structured report for critic evaluation."""
    query_summary: str = Field(description="Brief summary of the user's query")
    approach_used: str = Field(description="What approach/strategy was used")
    tools_executed: List[ToolExecution] = Field(default_factory=list, description="List of tools used with results")
    key_findings: List[str] = Field(default_factory=list, description="Main findings or results")
    data_sources: List[str] = Field(default_factory=list, description="Sources of information used")
    assumptions_made: List[str] = Field(default_factory=list, description="Any assumptions made during execution")
    confidence_level: Literal["low", "medium", "high"] = Field(description="Confidence in the answer")
    limitations: List[str] = Field(default_factory=list, description="Known limitations or caveats")
    final_answer: str = Field(description="The actual answer to the user's query")

    class Config:
        extra = "forbid"


class AgentState(MessagesState):
    query: str
    final_answer: str
    plan: Optional[PlannerPlan]
    complexity_assessment: ComplexityLevel
    current_step: int
    reasoning_done: bool
    messages : Annotated[Sequence[BaseMessage], add_messages]
    files: List[str]
    file_contents: Dict[str, Any]
    critique_feedback: Optional[CritiqueFeedback]
    iteration_count :int
    max_iterations: int
    execution_report : ExecutionReport


def query_input(state : AgentState) -> AgentState:
    print("=== USER QUERY TRANSFERED TO AGENT ===")

    files = state.get("files", [])
    if files:
        print(f"Processing {len(files)} files:")
        file_info = preprocess_files(files)
    
        for file_path, info in file_info.items():
            print(f"  - {file_path}: {info['type']} ({info['size']} bytes) -> {info['suggested_tool']}")

        state["file_contents"] = file_info
        file_context = "\n\n=== AVAILABLE FILES FOR ANALYSIS ===\n"
        for file_path, info in file_info.items():
            filename = os.path.basename(file_path)
            file_context += f"File: {filename}\n"
            file_context += f"  - Type: {info['type']}\n"  
            file_context += f"  - Size: {info['size']} bytes\n"
            file_context += f"  - Suggested tool: {info['suggested_tool']}\n"
            if info.get("preview"):
                file_context += f"  - Preview: {info['preview']}\n"
            file_context += "\n"
        
        # Добавляем инструкции по работе с файлами
        file_context += "IMPORTANT: Use the suggested tools to analyze these files before processing their data.\n"
        file_context += "File paths are available in the agent state and can be passed directly to analysis tools.\n"
        
        original_query = state.get("query", "")
        state["query"] = original_query + file_context
    return state


def planner(state : AgentState) -> AgentState:
    sys_stack = [
            SystemMessage(content=SYSTEM_PROMPT_PLANNER.strip()),
            HumanMessage(content=state["query"]),
        ]
    plan: PlannerPlan = planner_llm.invoke(sys_stack)
    
    print("=== GENERATED PLAN ===")
    return {"messages" : sys_stack + state["messages"],
            "plan": plan,
            "current_step ": 0,
            "reasoning_done": False}

def agent(state: AgentState) -> AgentState:
    
    """
    sys_msg = SystemMessage(
        content=SYSTEM_EXECUTOR_PROMPT.strip().format(
            plan=json.dumps(state["plan"], indent=2)
        )
    )
    """
    current_step = state.get("current_step", 0)
    reasoning_done = state.get("reasoning_done", False)
    plan = state.get("plan", {})
    steps = state["plan"].steps

    if current_step >= len(steps):
        return {
            "messages": state["messages"] + [AIMessage(content="All steps completed. <FINAL_ANSWER>")],
            "reasoning_done": False
        }

    current_step_info = steps[current_step]

    if not reasoning_done:

        # ✅ ДОБАВЛЕНО: Специальный контекст для файлов
        file_context = ""
        file_contents = state.get("file_contents", {})
        if file_contents:
            file_context = "\n\nAVAILABLE FILES IN CURRENT SESSION:\n"
            for filepath, info in file_contents.items():
                filename = os.path.basename(filepath)
                file_context += f"- {filename}: {info['type']} file, suggested tool: {info['suggested_tool']}\n"
                file_context += f"  Path: {filepath}\n"

        reasoning_prompt = f"""
        {SYSTEM_EXECUTOR_PROMPT}
        
        CURRENT TASK: You must perform reasoning for step {current_step + 1}.
        
        STEP INFO: {current_step_info}\n\n

        FILE CONTEXT: {file_contents}
        
        CRITICAL: You MUST output your reasoning in <REASONING> tags, but DO NOT call any tools yet.
        Explain what you need to do and why, then end your response.

        REASONING IS IMPERATIVE BEFORE ANY TOOL CALLS.
        """

        sys_msg = SystemMessage(content = reasoning_prompt)
        stack = [sys_msg] + state["messages"]

        step = llm.invoke(stack)
        print("=== REASONING STEP ===")
        print(step.content)

        return {
            "messages" : state["messages"] + [step],
            "reasoning_done" : True
        }
    
    else:
        tool_prompt = f"""
        Now execute the tool for step {current_step + 1}.
        
        STEP INFO: {current_step_info}
        
        You have already done the reasoning. Now call the appropriate tool with the correct parameters.
        Available file paths: {list(state.get("file_contents", {}).keys())}\n
        IMPORTANT NOTE: IF YOU DECIDED TO USE safe_code_run, MAKE SURE TO FINISH CALCULATIONS WITH print() or saving to a variable NAMED 'result' so that the output can be captured!
        """ 

        sys_msg = SystemMessage(content=tool_prompt)
        stack = [sys_msg] + state["messages"]  # Берем последние сообщения включая reasoning
        
        # Используем модель С инструментами для выполнения
        step = llm_with_tools.invoke(stack)
        print("=== TOOL EXECUTION ===")
        print(f"Tool calls: {step.tool_calls}")
        
        return {
            "messages": state["messages"] + [step],
            "current_step": current_step + 1 if step.tool_calls else current_step,
            "reasoning_done": False  # Сбрасываем для следующего шага
        }


def should_continue(state : AgentState) -> bool:
    
    last_message = state["messages"][-1]
    reasoning_done = state.get("reasoning_done", False)
    if "<FINAL_ANSWER>" in last_message.content:
        return "final_answer"
    elif last_message.tool_calls:
        return "tools" 
    elif not reasoning_done and "<REASONING>" in last_message.content:
        # Reasoning выполнен, но инструменты еще не вызваны
        return "agent"
    elif reasoning_done:
        # Reasoning выполнен, теперь нужно вызвать инструменты
        return "agent"
    else:
        # Нужно сделать reasoning
        return "agent"

# 6. Добавить отладочную информацию в TOOL_NODE
class DebuggingToolNode(ToolNode):
    def __init__(self, tools):
        super().__init__(tools)
    
    def __call__(self, state):
        print("=== TOOL EXECUTION STARTED ===")
        result = super().__call__(state)
        print("=== TOOL EXECUTION COMPLETED ===")
        return result

DEBUGGING_TOOL_NODE = DebuggingToolNode(TOOLS)



"""
def summary(state : AgentState) -> AgentState:
    print("=== FINAL ANSWER ===")
    summarizer_prompt = 
    Now you have to provide final answer for the user query : {query}
    In messages below you have all the context you need.

    YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, Apply the rules above for each element (number or string), ensure there is exactly one space after each comma.
    Your answer should only start with "FINAL ANSWER: ", then follows with the answer.

    Here is the context:
    {messages}

    REMEMBER AND STRICTLY FOLLOW THE FORMATTING RULES ABOVE. ALWAYS USE THIS FORMAT:
    FINAL ANSWER: ...
    

    state["final_answer"] = llm.invoke([SystemMessage(content=summarizer_prompt.strip().format(query=state["query"], messages = state["messages"]))])
    return state
"""

def enhanced_finalizer(state: AgentState) -> AgentState:
    """Generate comprehensive execution report for critic evaluation."""
    print("=== GENERATING EXECUTION REPORT ===")
    
    # Extract tool execution information
    tools_executed = []
    data_sources = []
    
    for msg in state["messages"]:
        if hasattr(msg, 'tool_calls') and msg.tool_calls:
            for tool_call in msg.tool_calls:
                tools_executed.append(ToolExecution(
                tool_name=tool_call['name'],
                arguments=str(tool_call['args']),
                call_id=tool_call['id']
            ))
        
        # Extract data sources from tool results
        if hasattr(msg, 'content') and isinstance(msg.content, str):
            # Look for URLs, file names, or other sources
            import re
            urls = re.findall(r'https?://[^\s]+', msg.content)
            data_sources.extend(urls)
    
    # Get plan information if available
    plan = state.get("plan")
    approach_used = "Direct execution"
    assumptions_made = []
    
    if plan:
        approach_used = f"{plan.task_type} approach with {len(plan.steps)} steps"
        assumptions_made = plan.assumptions
    
    # Generate structured report (КОСТЫЛЬ ЗДЕСЬ!)
    report_generator_prompt = f"""
    Generate a comprehensive execution report for the following query processing:

    ORIGINAL QUERY: {state['query']}
    
    EXECUTION CONTEXT:
    - Complexity Level: {state.get('complexity_assessment', {}).level}
    - Plan Used: {plan if plan else {}}
    - Tools Executed: {tools_executed}
    - Available Files: {list(state.get('file_contents', {}).keys())}
    
    CONVERSATION HISTORY:
    {[msg.content[:200] + "..." if len(msg.content) > 200 else msg.content 
      for msg in state['messages'][-5:]]}  # Last 5 messages for context
    
    Based on this information, create a structured execution report that includes:
    1. Query summary
    2. Approach used
    3. Key findings from the execution
    4. Data sources used
    5. Your confidence level in the results
    6. Any limitations or caveats
    7. The final answer
    
    Be thorough but concise. This report will be evaluated by a critic for quality assurance.
    """
    
    report_llm = llm.with_structured_output(ExecutionReport)
    
    execution_report = report_llm.invoke([
        SystemMessage(content=report_generator_prompt),
        HumanMessage(content="Generate the execution report.")
    ])
    
    print(f"Report generated - Confidence: {execution_report.confidence_level}")
    print(f"Key findings: {len(execution_report.key_findings)}")
    print(f"Data sources: {len(execution_report.data_sources)}")
    
    # Format final answer for user
    formatted_answer = format_final_answer(execution_report, state.get('complexity_assessment', {}))
    print(execution_report)
    return {
        "execution_report": execution_report,
        "final_answer": formatted_answer
    }

def format_final_answer(report: ExecutionReport, complexity: dict) -> str:
    """Format the final answer based on complexity and report content."""
    
    if complexity.level == 'simple':
        # For simple queries, just return the answer
        return f"FINAL ANSWER: {report.final_answer}"
    
    # For complex queries, provide more detailed response
    formatted = f"""FINAL ANSWER: {report.final_answer}

SUMMARY:
{report.query_summary}

KEY FINDINGS:
{chr(10).join(f"• {finding}" for finding in report.key_findings)}"""
    
    if report.data_sources:
        formatted += f"""

SOURCES:
{chr(10).join(f"• {source}" for source in report.data_sources[:5])}"""  # Limit to 5 sources
    
    if report.limitations:
        formatted += f"""

LIMITATIONS:
{chr(10).join(f"• {limitation}" for limitation in report.limitations)}"""
    
    return formatted


def complexity_assessor(state: AgentState) -> AgentState:
    """Assess query complexity and determine if planning is needed."""
    print("=== COMPLEXITY ASSESSMENT ===")
    
    complexity_llm = llm.with_structured_output(ComplexityLevel)
    
    assessment_message = [
        SystemMessage(content=COMPLEXITY_ASSESSOR_PROMPT.strip()),
        HumanMessage(content=f"Query: {state['query']}")
    ]
    
    assessment = complexity_llm.invoke(assessment_message)
    
    print(f"Complexity: {assessment.level}")
    print(f"Needs planning: {assessment.needs_planning}")
    print(f"Reasoning: {assessment.reasoning}")
    
    return {
        "complexity_assessment": assessment,
        "messages": state["messages"] + assessment_message
    }


def simple_executor(state: AgentState) -> AgentState:
    """Handle simple queries directly without planning."""
    print("=== SIMPLE EXECUTION ===")
    
    # For simple queries, use the LLM with tools directly
    simple_prompt = f"""
    Answer this simple query directly and efficiently: {state['query']}
    
    You have access to tools if needed, but try to answer directly when possible.
    If you need files, they are available at: {list(state.get('file_contents', {}).keys())}
    
    Provide a clear, concise answer.
    """
    
    response = llm_with_tools.invoke([
        SystemMessage(content=simple_prompt),
        HumanMessage(content=state['query'])
    ])
    
    return {
        "messages": state["messages"] + [response],
        "final_answer": response.content
    }


def should_use_planning(state: AgentState) -> str:
    """Route based on complexity assessment."""
    complexity = state["complexity_assessment"]
    
    if complexity.level == "simple" and not complexity.needs_planning:
        return "simple_executor"
    else:
        return "planner"
    
"""    
def critic_evaluator(state: AgentState) -> AgentState:
    
    print("=== ANSWER CRITIQUE ===")
    
    critic_llm = llm.with_structured_output(CritiqueFeedback)
    
    # Gather tool execution results for context
    tool_results = []
    for msg in state["messages"]:
        if hasattr(msg, 'tool_calls') and msg.tool_calls:
            tool_results.extend([f"Tool: {tc['name']}, Args: {tc['args']}" for tc in msg.tool_calls])
    
    if state.get("plan"):
        terra = state.get("plan")
    else:
        terra = "No plan used"
    critique_prompt = CRITIC_PROMPT.format(
        query=state["query"],
        plan=terra,
        answer=state["final_answer"],
        tool_results=tool_results[:5]   #Limit context
    )
    
    critique = critic_llm.invoke([
        SystemMessage(content=critique_prompt),
        HumanMessage(content="Please evaluate this answer.")
    ])
    
    print(f"Quality Score: {critique.quality_score}/10")
    print(f"Complete: {critique.is_complete}")
    print(f"Accurate: {critique.is_accurate}")
    if critique.errors_found:
        print(f"Errors: {critique.errors_found}")
    if critique.needs_replanning:
        print(f"Needs replanning: {critique.replan_instructions}")
    
    return {
        "critique_feedback": critique,
        "iteration_count": state.get("iteration_count", 0) + 1
    }
"""

def critic_evaluator(state: AgentState) -> AgentState:
    """Enhanced critic that evaluates execution reports."""
    print("=== ENHANCED ANSWER CRITIQUE ===")
    
    report = state.get("execution_report")
    critic_llm = llm.with_structured_output(CritiqueFeedback)
    
    critique_prompt = CRITIC_PROMPT.format(
        query=report.query_summary,
        approach=report.approach_used,
        tools=report.tools_executed,
        findings=report.key_findings,
        sources=report.data_sources,
        confidence=report.confidence_level,
        limitations=report.limitations,
        answer=report.final_answer
    )
    
    critique = critic_llm.invoke([
        SystemMessage(content=critique_prompt),
        HumanMessage(content="Evaluate this execution report thoroughly.")
    ])
    
    print(f"Quality Score: {critique.quality_score}/10")
    print(f"Complete: {critique.is_complete}")
    print(f"Accurate: {critique.is_accurate}")
    
    if critique.errors_found:
        print(f"Issues found: {critique.errors_found}")
    
    if critique.needs_replanning:
        print(f"Replanning needed: {critique.replan_instructions}")
    
    return {
        "critique_feedback": critique,
        "iteration_count": state.get("iteration_count", 0) + 1
    }



def should_replan(state: AgentState) -> str:
    """Decide whether to accept answer, replan, or stop."""
    critique = state.get("critique_feedback")
    iteration_count = state.get("iteration_count", 0)
    max_iterations = state.get("max_iterations", 3)
    
    if not critique:
        return "end"
    
    # Stop if max iterations reached
    if iteration_count >= max_iterations:
        print(f"Max iterations ({max_iterations}) reached. Accepting current answer.")
        return "end"
    
    # Accept if quality is good enough
    if critique.quality_score >= 7 or not critique.needs_replanning:
        return "end"
    
    # Replan if quality is poor and we haven't exceeded max iterations
    if critique.needs_replanning and iteration_count < max_iterations:
        print("Replanning due to critic feedback...")
        return "replan"
    
    return "end"

def replanner(state: AgentState) -> AgentState:
    """Create a revised plan based on critic feedback."""
    print("=== REPLANNING ===")
    
    critique = state["critique_feedback"]
    previous_plan = state.get("plan")
    
    replan_prompt = f"""
    {SYSTEM_PROMPT_PLANNER}
    
    REPLANNING CONTEXT:
    Original Query: {state['query']}
    Previous Plan: {previous_plan if previous_plan else {}}
    
    CRITIC FEEDBACK:
    - Quality Score: {critique.quality_score}/10
    - Issues Found: {critique.errors_found}
    - Missing Elements: {critique.missing_elements}
    - Improvement Suggestions: {critique.suggested_improvements}
    - Specific Instructions: {critique.replan_instructions}
    
    Create a REVISED plan that addresses these issues. Focus on fixing the identified problems.
    """
    
    revised_plan = planner_llm.invoke([
        SystemMessage(content=replan_prompt),
        HumanMessage(content="Create a revised plan based on the feedback.")
    ])
    
    print("Plan revised based on critic feedback")
    
    return {
        "plan": revised_plan,
        "current_step": 0,
        "reasoning_done": False
        #"messages": [] Reset messages for fresh execution
    }


In [8]:
#GRAPH BUILDING

builder = StateGraph(AgentState)
builder.add_node("INPUT", query_input)
builder.add_node("COMPLEXITY_ASSESSOR", complexity_assessor)
builder.add_node("PLANNING", planner)
builder.add_node("AGENT", agent)
builder.add_node("TOOLS", DEBUGGING_TOOL_NODE)
builder.add_node("FINALIZER", enhanced_finalizer)
builder.add_node("SIMPLE_EXECUTOR", simple_executor)
builder.add_node("CRITIC", critic_evaluator)
builder.add_node("REPLANNER", replanner)

builder.set_entry_point("INPUT")
builder.add_edge("INPUT", "COMPLEXITY_ASSESSOR")

builder.add_conditional_edges(
        "COMPLEXITY_ASSESSOR",
        should_use_planning,
        {"simple_executor": "SIMPLE_EXECUTOR", "planner": "PLANNING"},
    )
builder.add_edge("SIMPLE_EXECUTOR", "FINALIZER")


builder.add_edge("PLANNING", "AGENT")
builder.add_conditional_edges(
        "AGENT",
        should_continue,
        {"tools": "TOOLS", "agent": "AGENT", "final_answer": "FINALIZER"},
    )
builder.add_edge("TOOLS", "AGENT")
builder.add_edge("FINALIZER", "CRITIC")
builder.add_conditional_edges(
        "CRITIC",
        should_replan,
        {"end": END, "replan": "REPLANNER"},
    )
builder.add_edge("REPLANNER", "AGENT")


system = builder.compile(checkpointer=MemorySaver())

In [9]:
workflow = system.invoke({"query" : "How many cumulative milliliters of fluid is in all the opaque-capped vials without stickers in the 114 version of the kit that was used for the PromethION long-read sequencing in the paper De Novo-Whole Genome Assembly of the Roborovski Dwarf Hamster (Phodopus roborovskii) Genome?", "current_step": 0, "reasoning_done": False, "files" : [], "files_contents" : {}, "iteration_count" : 0, "max_iterations" : 10, "plan" : None} , config = config)

=== USER QUERY TRANSFERED TO AGENT ===
=== COMPLEXITY ASSESSMENT ===
Complexity: complex
Needs planning: True
Reasoning: This query involves multiple steps, including identifying the specific kit version, locating the relevant paper, extracting data about the vials, and performing calculations to determine the cumulative volume of fluid. It requires sophisticated reasoning and potentially multiple tool calls to gather and analyze the necessary data.
=== GENERATED PLAN ===
=== REASONING STEP ===
{
  "task_type": "doc_qa",
  "assumptions": [
    "The paper contains specific information about the 114 version of the kit and the opaque-capped vials without stickers.",
    "The cumulative milliliters of fluid in the vials can be extracted from the text."
  ],
  "plan_rationale": "I will use the doc_qa tool to extract text from the paper to find details about the 114 version of the kit and the opaque-capped vials without stickers. This step is essential to gather the necessary information for

KeyboardInterrupt: 

In [None]:
for message in workflow["messages"]:
    message.pretty_print()

print("\n=== FINAL ANSWER ===")


You are a COMPLEXITY ASSESSOR for a multi-tool agent system.
Your job is to analyze user queries and determine their complexity level and processing requirements.

COMPLEXITY LEVELS:
1. SIMPLE: Direct questions that can be answered immediately without tools or with single tool use
   - Examples: "What is 2+2?", "Define photosynthesis", "What's the capital of France?"

2. MODERATE: Questions requiring 1-3 tool calls or basic analysis
   - Examples: "Search for recent news about AI", "Analyze this CSV file", "What's the weather tomorrow?"

3. COMPLEX: Multi-step problems requiring planning, multiple tools, or sophisticated reasoning
   - Examples: Research tasks, multi-file analysis, calculations with dependencies, creative projects

ASSESSMENT CRITERIA:
- Number of steps likely needed
- Tool complexity and dependencies
- Data processing requirements
- Need for intermediate reasoning
- Risk of failure without proper planning

RULES:
- SIMPLE queries bypass planning entirely
- MODERATE q

In [None]:
workflow["final_answer"]

"FINAL ANSWER: The cumulative milliliters of fluid in the opaque-capped vials without stickers is [value] mL (source: [citation]).\n\nSUMMARY:\nThe user requested the cumulative milliliters of fluid in opaque-capped vials without stickers from the 114 version of a kit used in a specific genomic study.\n\nKEY FINDINGS:\n• The paper was located successfully through a web search.\n• Relevant details about the opaque-capped vials without stickers were extracted from the paper.\n• The cumulative volume of fluid in the specified vials was determined.\n\nSOURCES:\n• The paper 'De Novo-Whole Genome Assembly of the Roborovski Dwarf Hamster (Phodopus roborovskii) Genome'\n• Supplementary materials associated with the paper.\n\nLIMITATIONS:\n• The analysis is dependent on the availability and accuracy of the information in the paper and supplementary materials.\n• If the paper had not been found, alternative sources may not have provided the same level of detail."

In [None]:
#TO-DO:
# - imrove image generation and plots/tables creation
# - add more tools (e.g. calendar, email, pdf editing, file system)
# - UI creation