Spaces:

FractalAIResearch
/

Fathom-DeepResearch

Running

File size: 9,705 Bytes

5ab87e0

# single_question_recall.py
from __future__ import annotations
import re
import os
from typing import Any, Dict, Optional

from re_call import ReCall
from transformers import AutoTokenizer


import re
from typing import Optional, Any, Dict, Tuple, List

def _extract_answer_boxed(s: str) -> Optional[str]:
    """
    Return the content of the *last* \\boxed{...} or \\fbox{...} in `s`,
    with proper matching of nested braces. Escaped braces (\\{, \\}) are ignored
    for counting. If no balanced block is found, returns None.
    """
    def _iter_box_like_spans(text: str):
        # Find openings for \boxed{ and \fbox{
        openings: List[Tuple[str, int, int]] = []
        for m in re.finditer(r'\\boxed\s*\{', text):
            openings.append(("boxed", m.start(), m.end()))
        for m in re.finditer(r'\\fbox\s*\{', text):
            openings.append(("fbox", m.start(), m.end()))
        openings.sort(key=lambda x: x[1])
        # For each opening, scan forward to find its matching closing brace
        for kind, start, open_end in openings:
            depth = 1
            i = open_end
            n = len(text)
            while i < n:
                ch = text[i]
                # Skip escaped characters: backslash escapes the next char (including { or })
                if ch == '\\' and i + 1 < n:
                    i += 2
                    continue
                if ch == '{':
                    depth += 1
                elif ch == '}':
                    depth -= 1
                    if depth == 0:
                        # content is text[open_end:i]
                        yield (kind, start, open_end, i)
                        break
                i += 1

    last_content: Optional[str] = None
    for _, _start, open_end, close_idx in _iter_box_like_spans(s):
        last_content = s[open_end:close_idx]  # keep the *last* one

    return last_content.strip() if last_content is not None else None


def _extract_answer_tagged(s: str) -> Optional[str]:
    answer_tag_re = re.compile(r"<answer>(.*?)</answer>", re.S)
    m = answer_tag_re.findall(s)
    return m[-1].strip() if m else None


def _parse_answer_from_transcript(transcript: str) -> str:
    """
    Prefer balanced \\boxed{...}/\\fbox{...} content, then <answer>...</answer>,
    else fall back to the last 200 chars.
    """
    return (
        _extract_answer_boxed(transcript)
        or _extract_answer_tagged(transcript)
        # or transcript[-200:].strip()
    )


# --- main API: recall only ---
def answer_question_recall(
    question: str,
    *,
    model_url: Optional[str] = None,         # your thinker endpoint (if recall uses one)
    executor_url: Optional[str] = None,
    tokenizer_dir: str = "./tokenizer-info",
    temperature: float = 0.6,
    max_new_tokens: int = 40960,
    top_p: float = 0.95,
    search_env: str = "from search_api import search_urls, open_url, search_and_parse_query, query_url",
    func_schemas = [
        {
            "name": "search_urls",
            "description": "Google search and return links to web-pages with a brief snippet given a text query",
            "parameters": {
                "type": "object",
                "properties": {"query": {"type": "string"}, "top_k": {"type": "integer", "default": 10}},
                "required": ["query"],
            },
        },
        {
            "name": "query_url",
            "description": "Visit webpage and return evidence based retrival for the provided goal",
            "parameters": {
                "type": "object",
                "properties": {
                    "url": {"type": "string", "description": "The URL of the webpage to visit. Must be a single URL"},
                    "goal": {"type": "string", "description": "The specific information goal for visiting webpage"},
                },
                "required": ["url", "goal"],
            },
        },
    ],
    deepseek_name: str = "deepseek-ai/DeepSeek-R1",
    old_prompt: Optional[str] = None,
    deepresearch_on: bool = True,
    summary_llm: str = "gpt-4.1-mini",
    ):
    # ) -> Dict[str, Any]:
    """
    Runs a single question through ReCall and returns:
    {
      "answer": str,
      "transcript": str,
      "tool_calls": Any,
      "chat": Any | None
    }
    """
    if executor_url is None:
        executor_url = os.environ["HOST_SERPER_URL"]
        
    if model_url is None:
        model_url = os.environ["HF_MODEL_URL"]

    # 1) tokenizer (REQUIRED by ReCall.run)
    tok = AutoTokenizer.from_pretrained(tokenizer_dir, trust_remote_code=True)

    # 2) build agent
    agent = ReCall(executor_url=executor_url)
    
    last_out = ""

    # 3) call the correct entrypoint
    if model_url == deepseek_name:
        # some setups use a special deepseek path that returns (transcript, tool_calls)
        out = agent.run_deepseek(
            env=search_env,
            func_schemas=func_schemas,
            question=question,
            model_name=deepseek_name,
            temperature=temperature,
            max_tokens=max_new_tokens,
            top_p=top_p,
        )
        transcript, tool_calls, chat = _normalize_out(out, expect_chat=False)
        last_out = transcript
    else:
        # standard ReCall.run MUST receive tokenizer
        agent_generator = agent.run(
            env=search_env,
            func_schemas=func_schemas,
            question=question,
            model_url=model_url,
            temperature=temperature,
            max_new_tokens=max_new_tokens,
            tokenizer=tok, # <- fixes your "missing tokenizer" error
            top_p=top_p,
            old_prompt=old_prompt,# <- you can pass the raw old prompt here if there exists an older chat history
            # the function will append the question to the raw old prompt string (chat history) if it is not None
            deepresearch_on=deepresearch_on,
            summary_llm=summary_llm
            # deepresearch=deepresearch, # <- use the deepresearch prompt
        )
        
        while True:
            try:
                tag, out = next(agent_generator)
                if tag == "assistant_resp":
                    last_out = out[0]
                yield tag, out
                if tag == "end":
                    break
            except StopIteration as e:
                # the chat_str variable contains the whole conversation in the raw string form
                # it contains the raw tokens like "<|im_start|>system\n", "<|im_end|>"
                # and "<|im_start|>assistant\n<think>", "<tool_response>", "\n</tool_response>\n", etc.
                chat_str: str = e.value[1][0]
                yield "end", (chat_str,)
                break

    # 4) parse final answer
    answer = _parse_answer_from_transcript(last_out)

    return "answer", (answer,)


def _normalize_out(out: Any, expect_chat: bool) -> tuple[str, Any, Any]:
    """
    Normalize ReCall outputs to (transcript, tool_calls, chat)
    Handles:
      - (transcript, tool_calls, chat)
      - (transcript, tool_calls)
      - "transcript"
      - {"transcript": ..., "tool_calls": ..., "chat": ...} variants
    """
    transcript, tool_calls, chat = "", None, None

    if isinstance(out, tuple):
        if len(out) == 3:
            transcript, tool_calls, chat = out
        elif len(out) == 2:
            transcript, tool_calls = out
        elif len(out) == 1:
            transcript = out[0]
        else:
            transcript = str(out[-1])
    elif isinstance(out, dict):
        transcript = out.get("transcript") or out.get("output") or out.get("response") or ""
        tool_calls = out.get("tool_calls")
        chat = out.get("chat")
    else:
        transcript = str(out)

    # Some implementations return None/empty; keep things predictable
    if chat is None and expect_chat is False:
        chat = None
    return transcript, tool_calls, chat


# quick demo
if __name__ == "__main__":
    old_prompt = None
    
    answer_generator = answer_question_recall(
        "What is the most popular restraunt in kolkata?",
        old_prompt=old_prompt
    )
    
    # print("ANSWER:", res["answer"])
    # print("\n")
    # # print(type(res["tool_calls"]), len(res["tool_calls"]))
    # for i in res["tool_calls"]:
    #     print(f"{i}\n")
    # print("\n")
    # if res["chat"] is not None:
    #     # print(type(res["chat"]), len(res["chat"]))
    #     for i in res["chat"]:
    #         print(f"{i}\n")
    #     print("\n")
    # print("TRANSCRIPT (tail):\n", res["transcript"][-300:])
    
    final_chat_str = ""
    
    while True:
        try:
            tag, out = next(answer_generator)
            if tag == "assistant_resp":
                assistant_text, tool_calls = out
                print(f"ASSISTANT RESPONSE:\n{assistant_text}\n\n")
                print("TOOL CALLS:\n")
                for tool_call in tool_calls:
                    print(f"{tool_call}")
                print("\n")
            elif tag == "tool_results":
                results = out[0]
                print("TOOL RESULTS:\n")
                for result in results:
                    print(f"{result}")
                print("\n")
            elif tag == "end":
                print(f"{'='*20}\nASSISTANT RESPONSE ENDED\n{'='*20}\n\n")
                final_chat_str = out[0]
            elif tag == "answer":
                answer = out[0]
                print(f"FINAL ANSWER:\n{answer}\n\n")
                break
        except StopIteration as e:
            print(f"FINAL ANSWER:\n{e.value[1][0]}\n\n")
            break
    
    print(f"{'='*20}\nEND\n{'='*20}\n\n\nFINAL CHAT STRING:\n{final_chat_str}\n\n")