Spaces:

MCP-1st-Birthday
/

Easy-Financial-Report

Running

File size: 21,223 Bytes

abf5292

import gradio as gr
import requests
import json
import os

MCP_SPACE = "JC321/EasyReportsMCPServer"
MCP_URL = "https://jc321-easyreportsmcpserver.hf.space"

# 设置请求头
HEADERS = {
    "Content-Type": "application/json",
    "User-Agent": "SEC-Query-Assistant/1.0 (jtyxabc@gmail.com)"
}

# 格式化数值显示
def format_value(value, value_type="money"):
    """
    格式化数值：0显示为N/A，其他显示为带单位的格式
    value_type: "money" (金额), "eps" (每股收益), "number" (普通数字)
    """
    if value is None or value == 0:
        return "N/A"
    
    if value_type == "money":
        return f"${value:.2f}B"
    elif value_type == "eps":
        return f"${value:.2f}"
    else:  # number
        return f"{value:.2f}"

def normalize_cik(cik):
    """
    格式化 CIK 为标准的 10 位格式
    """
    if not cik:
        return None
    # 转换为字符串并移除非数字字符
    cik_str = str(cik).replace('-', '').replace(' ', '')
    # 仅保留数字
    cik_str = ''.join(c for c in cik_str if c.isdigit())
    # 填充前导 0 至 10 位
    return cik_str.zfill(10) if cik_str else None

def parse_mcp_response(response_data):
    """
    解析 MCP 协议响应数据
    支持格式:
    1. {"result": {"content": [{"type": "text", "text": "{...}"}]}}
    2. {"content": [{"type": "text", "text": "{...}"}]}
    3. 直接的 JSON 数据
    """
    if not isinstance(response_data, dict):
        return response_data
    
    # 格式 1: {"result": {"content": [...]}}
    if "result" in response_data and "content" in response_data["result"]:
        content = response_data["result"]["content"]
        if content and len(content) > 0:
            text_content = content[0].get("text", "{}")
            # 直接解析 JSON（MCP Server 已移除 emoji 前缀）
            try:
                return json.loads(text_content)
            except json.JSONDecodeError:
                return text_content
        return {}
    
    # 格式 2: {"content": [...]}
    elif "content" in response_data:
        content = response_data.get("content", [])
        if content and len(content) > 0:
            text_content = content[0].get("text", "{}")
            # 直接解析 JSON
            try:
                return json.loads(text_content)
            except json.JSONDecodeError:
                return text_content
        return {}
    
    # 格式 3: 直接返回
    return response_data

# MCP 工具定义
def create_mcp_tools():
    """创建 MCP 工具列表"""
    return [
        {
            "name": "query_financial_data",
            "description": "Query SEC financial data for US listed companies",
            "parameters": {
                "type": "object",
                "properties": {
                    "company_name": {
                        "type": "string",
                        "description": "Company name or stock symbol (e.g., Apple, NVIDIA, AAPL)"
                    },
                    "query_type": {
                        "type": "string",
                        "enum": ["Latest Financial Data", "3-Year Trends", "5-Year Trends"],
                        "description": "Type of financial query"
                    }
                },
                "required": ["company_name", "query_type"]
            }
        }
    ]

# 工具执行函数
def execute_tool(tool_name, **kwargs):
    """执行 MCP 工具"""
    if tool_name == "query_financial_data":
        return query_financial_data(kwargs.get("company_name"), kwargs.get("query_type"))
    return f"Unknown tool: {tool_name}"
# 创建超链接
def create_source_link(source_form, source_url=None):
    """为Source Form创建超链接，使用MCP后端返回的URL"""
    if not source_form or source_form == 'N/A':
        return source_form
    
    # 如果后端提供了URL，使用后端的URL
    if source_url and source_url != 'N/A':
        return f"[{source_form}]({source_url})"
    
    # 如果没有URL，只显示文本
    return source_form

def query_financial_data(company_name, query_type):
    """查询财务数据的主函数"""
    
    if not company_name:
        return "Please enter a company name or stock symbol"
    
    # 翻译英文查询类型为中文（用于后端处理）
    query_type_mapping = {
        "Latest": "最新财务数据",
        "3-Year": "3年趋势",
        "5-Year": "5年趋势",
        "Filings": "公司报表列表"
    }
    internal_query_type = query_type_mapping.get(query_type, query_type)
    
    try:
        # 使用 MCP 协议调用工具
        # 先搜索公司（使用 advanced_search_company）
        search_resp = requests.post(
            f"{MCP_URL}/message",
            json={
                "method": "tools/call",
                "params": {
                    "name": "advanced_search_company",
                    "arguments": {"company_input": company_name}
                }
            },
            headers=HEADERS,
            timeout=30
        )

        print(f"搜索公司：{company_name}，search_resp.status_code: {search_resp.status_code}\nSearch Response: {search_resp.text}")
        
        if search_resp.status_code != 200:
            print(f"❌ Server Error: HTTP {search_resp.status_code}\n\nResponse: {search_resp.text[:500]}")
            return []
        
        try:
            result = search_resp.json()
            # 使用统一的 MCP 响应解析函数
            company = parse_mcp_response(result)
        except (ValueError, KeyError, json.JSONDecodeError) as e:
            return f"❌ JSON Parse Error: {str(e)}\n\nResponse: {search_resp.text[:500]}"
        
        if isinstance(company, dict) and company.get("error"):
            return f"❌ Error: {company['error']}"
        
        # advanced_search 返回的字段: cik, name, ticker
        # 注意: 不是 tickers 和 sic_description
        company_name = company.get('name', 'Unknown')
        ticker = company.get('ticker', 'N/A')
        
        result = f"# {company_name}\n\n"
        result += f"**Stock Symbol**: {ticker}\n"
        # sic_description 需要后续通过 get_company_info 获取，这里暂时不显示
        result += "\n---\n\n"
        
        # 获取并格式化 CIK 为 10 位标准格式
        cik = normalize_cik(company.get('cik'))
        if not cik:
            return result + f"❌ Error: Invalid CIK from company search\n\nDebug: company data = {json.dumps(company, indent=2)}"
        
        # 根据查询类型获取数据
        if internal_query_type == "最新财务数据":
            data_resp = requests.post(
                f"{MCP_URL}/message",
                json={
                    "method": "tools/call",
                    "params": {
                        "name": "get_latest_financial_data",
                        "arguments": {"cik": cik}
                    }
                },
                headers=HEADERS,
                timeout=30
            )
            
            if data_resp.status_code != 200:
                return result + f"❌ Server Error: HTTP {data_resp.status_code}\n\n{data_resp.text[:500]}"
            
            try:
                data_result = data_resp.json()
                # 使用统一的 MCP 响应解析函数
                data = parse_mcp_response(data_result)
            except (ValueError, KeyError, json.JSONDecodeError) as e:
                return result + f"❌ JSON Parse Error: {str(e)}\n\n{data_resp.text[:500]}"
            
            if isinstance(data, dict) and data.get("error"):
                return result + f"❌ {data['error']}"
            
            cik = data.get('cik')
            result += f"## Fiscal Year {data.get('period', 'N/A')}\n\n"
            
            total_revenue = data.get('total_revenue', 0) / 1e9 if data.get('total_revenue') else 0
            net_income = data.get('net_income', 0) / 1e9 if data.get('net_income') else 0
            eps = data.get('earnings_per_share', 0) if data.get('earnings_per_share') else 0
            opex = data.get('operating_expenses', 0) / 1e9 if data.get('operating_expenses') else 0
            ocf = data.get('operating_cash_flow', 0) / 1e9 if data.get('operating_cash_flow') else 0
            
            result += f"- **Total Revenue**: {format_value(total_revenue)}\n"
            result += f"- **Net Income**: {format_value(net_income)}\n"
            result += f"- **Earnings Per Share**: {format_value(eps, 'eps')}\n"
            result += f"- **Operating Expenses**: {format_value(opex)}\n"
            result += f"- **Operating Cash Flow**: {format_value(ocf)}\n"
            # 使用后端返回的 source_url
            source_form = data.get('source_form', 'N/A')
            source_url = data.get('source_url', None)  # 从后端获取URL
            result += f"- **Source Form**: {create_source_link(source_form, source_url)}\n"
        
        elif internal_query_type == "3年趋势":
            metrics_resp = requests.post(
                f"{MCP_URL}/message",
                json={
                    "method": "tools/call",
                    "params": {
                        "name": "extract_financial_metrics",
                        "arguments": {"cik": cik, "years": 3}
                    }
                },
                headers=HEADERS,
                timeout=60
            )
            
            # 调试：显示 HTTP 响应状态
            result += f"\n**Debug Info (3-Year)**:\n- HTTP Status: {metrics_resp.status_code}\n"
            
            if metrics_resp.status_code != 200:
                return result + f"❌ Server Error: HTTP {metrics_resp.status_code}\n\n{metrics_resp.text[:500]}"
            
            try:
                metrics_result = metrics_resp.json()
                # 调试：显示原始 JSON 响应
                result += f"- Raw Response Length: {len(metrics_resp.text)} chars\n"
                result += f"- Response Preview: {metrics_resp.text[:200]}...\n\n"
                
                # 使用统一的 MCP 响应解析函数
                metrics = parse_mcp_response(metrics_result)
                
                # 调试：显示解析后的数据类型和内容
                result += f"- Parsed Type: {type(metrics).__name__}\n"
                if isinstance(metrics, dict):
                    result += f"- Parsed Keys: {list(metrics.keys())}\n"
                    result += f"- Periods: {metrics.get('periods', 'N/A')}\n"
                    result += f"- Data Length: {len(metrics.get('data', []))}\n\n"
            except (ValueError, KeyError, json.JSONDecodeError) as e:
                return result + f"❌ JSON Parse Error: {str(e)}\n\nResponse: {metrics_resp.text[:500]}"
            
            if isinstance(metrics, dict) and metrics.get("error"):
                return result + f"❌ {metrics['error']}"
            
            # 调试：显示原始响应
            if not isinstance(metrics, dict):
                return result + f"❌ Invalid response format\n\nDebug: {str(metrics)[:500]}"
            
            result += f"## 3-Year Financial Trends ({metrics.get('periods', 0)} periods)\n\n"
            
            # 显示所有数据（包括年度和季度）
            all_data = metrics.get('data', [])  # MCP Server 返回的字段是 'data'
            
            # 调试：检查是否有数据
            if not all_data:
                return result + f"❌ No data returned from MCP Server\n\nDebug: metrics keys = {list(metrics.keys())}\n\nFull response: {json.dumps(metrics, indent=2, ensure_ascii=False)[:1000]}"
            
            # 去重：根据period和source_form去重
            seen = set()
            unique_data = []
            for m in all_data:
                key = (m.get('period', 'N/A'), m.get('source_form', 'N/A'))
                if key not in seen:
                    seen.add(key)
                    unique_data.append(m)
            
            # 按期间降序排序，确保显示最近的3年数据
            # 使用更智能的排序：先按年份，再按是否是季度
            # 正确顺序：FY2024 → 2024Q3 → 2024Q2 → 2024Q1 → FY2023
            def sort_key(x):
                period = x.get('period', '0000')
                # 提取年份（前4位）
                year = period[:4] if len(period) >= 4 else '0000'
                # 如果有Q，提取季度号
                if 'Q' in period:
                    quarter = period[period.index('Q')+1] if period.index('Q')+1 < len(period) else '0'
                    return (year, 1, 4 - int(quarter))  # Q在FY后面：Q3, Q2, Q1 (4-3=1, 4-2=2, 4-1=3)
                else:
                    return (year, 0, 0)  # FY 排在同年的所有Q之前
            
            unique_data = sorted(unique_data, key=sort_key, reverse=True)
            
            result += "| Period | Revenue (B) | Net Income (B) | EPS | Operating Expenses (B) | Operating Cash Flow (B) | Source Form |\n"
            result += "|--------|-------------|----------------|-----|------------------------|-------------------------|-------------|\n"
            
            for m in unique_data:
                period = m.get('period', 'N/A')
                rev = (m.get('total_revenue') or 0) / 1e9
                inc = (m.get('net_income') or 0) / 1e9
                eps_val = m.get('earnings_per_share') or 0
                opex = (m.get('operating_expenses') or 0) / 1e9
                ocf = (m.get('operating_cash_flow') or 0) / 1e9
                source_form = m.get('source_form', 'N/A')
                source_url = m.get('source_url', None)  # 从后端获取URL
                
                # 区分年度和季度，修复双重FY前缀问题
                if 'Q' in period:
                    # 季度数据，不添加前缀
                    display_period = period
                else:
                    # 年度数据，只在没有FY的情况下添加
                    display_period = period if period.startswith('FY') else f"FY{period}"
                
                source_link = create_source_link(source_form, source_url)
                
                result += f"| {display_period} | {format_value(rev)} | {format_value(inc)} | {format_value(eps_val, 'eps')} | {format_value(opex)} | {format_value(ocf)} | {source_link} |\n"
        
        elif internal_query_type == "5年趋势":
            metrics_resp = requests.post(
                f"{MCP_URL}/message",
                json={
                    "method": "tools/call",
                    "params": {
                        "name": "extract_financial_metrics",
                        "arguments": {"cik": cik, "years": 5}
                    }
                },
                headers=HEADERS,
                timeout=60
            )
            
            # 调试：显示 HTTP 响应状态
            result += f"\n**Debug Info (5-Year)**:\n- HTTP Status: {metrics_resp.status_code}\n"
            
            if metrics_resp.status_code != 200:
                return result + f"❌ Server Error: HTTP {metrics_resp.status_code}\n\n{metrics_resp.text[:500]}"
            
            try:
                metrics_result = metrics_resp.json()
                # 调试：显示原始 JSON 响应
                result += f"- Raw Response Length: {len(metrics_resp.text)} chars\n"
                result += f"- Response Preview: {metrics_resp.text[:200]}...\n\n"
                
                # 使用统一的 MCP 响应解析函数
                metrics = parse_mcp_response(metrics_result)
                
                # 调试：显示解析后的数据类型和内容
                result += f"- Parsed Type: {type(metrics).__name__}\n"
                if isinstance(metrics, dict):
                    result += f"- Parsed Keys: {list(metrics.keys())}\n"
                    result += f"- Periods: {metrics.get('periods', 'N/A')}\n"
                    result += f"- Data Length: {len(metrics.get('data', []))}\n\n"
            except (ValueError, KeyError, json.JSONDecodeError) as e:
                return result + f"❌ JSON Parse Error: {str(e)}\n\nResponse: {metrics_resp.text[:500]}"
            
            if isinstance(metrics, dict) and metrics.get("error"):
                return result + f"❌ {metrics['error']}"
            
            # 调试：显示原始响应
            if not isinstance(metrics, dict):
                return result + f"❌ Invalid response format\n\nDebug: {str(metrics)[:500]}"
            
            # 显示所有数据（包括年度和季度）
            all_data = metrics.get('data', [])  # MCP Server 返回的字段是 'data'
            
            # 调试：检查是否有数据
            if not all_data:
                return result + f"❌ No data returned from MCP Server\n\nDebug: metrics keys = {list(metrics.keys())}\n\nFull response: {json.dumps(metrics, indent=2, ensure_ascii=False)[:1000]}"
            
            # 去重：根据period和source_form去重
            seen = set()
            unique_data = []
            for m in all_data:
                key = (m.get('period', 'N/A'), m.get('source_form', 'N/A'))
                if key not in seen:
                    seen.add(key)
                    unique_data.append(m)
            
            # 按期间降序排序，确保显示最近的5年数据
            # 使用更智能的排序：先按年份，再按是否是季度
            # 正确顺序：FY2024 → 2024Q3 → 2024Q2 → 2024Q1 → FY2023
            def sort_key(x):
                period = x.get('period', '0000')
                # 提取年份（前4位）
                year = period[:4] if len(period) >= 4 else '0000'
                # 如果有Q，提取季度号
                if 'Q' in period:
                    quarter = period[period.index('Q')+1] if period.index('Q')+1 < len(period) else '0'
                    return (year, 1, 4 - int(quarter))  # Q在FY后面：Q3, Q2, Q1 (4-3=1, 4-2=2, 4-1=3)
                else:
                    return (year, 0, 0)  # FY 排在同年的所有Q之前
            
            unique_data = sorted(unique_data, key=sort_key, reverse=True)
            print(f'5年数据：:{unique_data}')
            result = unique_data    
        
        elif internal_query_type == "公司报表列表":
            # 查询公司所有报表
            filings_resp = requests.post(
                f"{MCP_URL}/message",
                json={
                    "method": "tools/call",
                    "params": {
                        "name": "get_company_filings",
                        "arguments": {"cik": cik, "limit": 50}
                    }
                },
                headers=HEADERS,
                timeout=60
            )
            
            if filings_resp.status_code != 200:
                return result + f"❌ Server Error: HTTP {filings_resp.status_code}\n\n{filings_resp.text[:500]}"
            
            try:
                filings_result = filings_resp.json()
                # 使用统一的 MCP 响应解析函数
                filings_data = parse_mcp_response(filings_result)
            except (ValueError, KeyError, json.JSONDecodeError) as e:
                return result + f"❌ JSON Parse Error: {str(e)}\n\n{filings_resp.text[:500]}"
            
            if isinstance(filings_data, dict) and filings_data.get("error"):
                return result + f"❌ {filings_data['error']}"
            
            filings = filings_data.get('filings', []) if isinstance(filings_data, dict) else filings_data
            
            result += f"## Company Filings ({len(filings)} records)\n\n"
            result += "| Form Type | Filing Date | Accession Number | Primary Document |\n"
            result += "|-----------|-------------|------------------|------------------|\n"
            
            for filing in filings:
                form_type = filing.get('form_type', 'N/A')
                filing_date = filing.get('filing_date', 'N/A')
                accession_num = filing.get('accession_number', 'N/A')
                primary_doc = filing.get('primary_document', 'N/A')
                filing_url = filing.get('filing_url', None)  # 从后端获取URL
                
                # 使用后端返回的URL创建链接
                if filing_url and filing_url != 'N/A':
                    form_link = f"[{form_type}]({filing_url})"
                    primary_doc_link = f"[{primary_doc}]({filing_url})"
                else:
                    form_link = form_type
                    primary_doc_link = primary_doc
                
                result += f"| {form_link} | {filing_date} | {accession_num} | {primary_doc_link} |\n"
        
        return result
        
    except requests.exceptions.RequestException as e:
        return f"❌ Network Error: {str(e)}\n\nMCP Server: {MCP_URL}"
    except Exception as e:
        import traceback
        return f"❌ Unexpected Error: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"