Spaces:

tomvaillant
/

graphics-llm

Running

File size: 4,784 Bytes

7114af0

"""Utility functions for the Datawrapper MCP server."""

import json
import os

import pandas as pd


def get_api_token() -> str:
    """Get the Datawrapper API token from environment."""
    api_token = os.environ.get("DATAWRAPPER_ACCESS_TOKEN")
    if not api_token:
        raise ValueError(
            "DATAWRAPPER_ACCESS_TOKEN environment variable is required. "
            "Get your token from https://app.datawrapper.de/account/api-tokens"
        )
    return api_token


def json_to_dataframe(data: str | list | dict) -> pd.DataFrame:
    """Convert JSON data to a pandas DataFrame.

    Args:
        data: One of:
            - File path to CSV or JSON file (e.g., "/path/to/data.csv")
            - List of records: [{"col1": val1, "col2": val2}, ...]
            - Dict of arrays: {"col1": [val1, val2], "col2": [val3, val4]}
            - JSON string in either format above

    Returns:
        pandas DataFrame

    Examples:
        >>> json_to_dataframe("/tmp/data.csv")
        >>> json_to_dataframe("/tmp/data.json")
        >>> json_to_dataframe([{"a": 1, "b": 2}, {"a": 3, "b": 4}])
        >>> json_to_dataframe({"a": [1, 3], "b": [2, 4]})
        >>> json_to_dataframe('[{"a": 1, "b": 2}]')
    """
    if isinstance(data, str):
        # Check if it's a file path that exists
        if os.path.isfile(data):
            if data.endswith(".csv"):
                return pd.read_csv(data)
            elif data.endswith(".json"):
                with open(data) as f:
                    file_data = json.load(f)
                # Recursively process the loaded JSON data
                return json_to_dataframe(file_data)
            else:
                raise ValueError(
                    f"Unsupported file type: {data}\n\n"
                    "Supported file types:\n"
                    "  - .csv (CSV files)\n"
                    "  - .json (JSON files containing list of dicts or dict of arrays)"
                )

        # Check if it looks like CSV content (not a file path)
        if "\n" in data and "," in data and not data.strip().startswith(("[", "{")):
            raise ValueError(
                "CSV strings are not supported. Please save to a file first.\n\n"
                "Options:\n"
                "  1. Save CSV to a file and pass the file path\n"
                '  2. Parse CSV to list of dicts: [{"col": val}, ...]\n'
                '  3. Parse CSV to dict of arrays: {"col": [vals]}\n\n'
                "Example:\n"
                '  data = [{"year": 2020, "value": 100}, {"year": 2021, "value": 150}]'
            )

        # Try to parse as JSON string
        try:
            data = json.loads(data)
        except json.JSONDecodeError as e:
            raise ValueError(
                f"Invalid JSON string: {e}\n\n"
                "Expected one of:\n"
                "  1. File path: '/path/to/data.csv' or '/path/to/data.json'\n"
                '  2. JSON string: \'[{"year": 2020, "value": 100}, ...]\'\n'
                '  3. JSON string: \'{"year": [2020, 2021], "value": [100, 150]}\''
            )

    if isinstance(data, list):
        if not data:
            raise ValueError(
                "Data list is empty. Please provide at least one row of data."
            )
        if not all(isinstance(item, dict) for item in data):
            raise ValueError(
                "List format must contain dictionaries.\n\n"
                "Expected format:\n"
                '  [{"year": 2020, "value": 100}, {"year": 2021, "value": 150}]\n\n'
                f"Got: {type(data[0]).__name__} in list"
            )
        # List of records: [{"col1": val1, "col2": val2}, ...]
        return pd.DataFrame(data)
    elif isinstance(data, dict):
        if not data:
            raise ValueError(
                "Data dict is empty. Please provide at least one column of data."
            )
        # Check if it's a dict of arrays (all values should be lists)
        if not all(isinstance(v, list) for v in data.values()):
            raise ValueError(
                "Dict format must have lists as values.\n\n"
                "Expected format:\n"
                '  {"year": [2020, 2021], "value": [100, 150]}\n\n'
                f"Got dict with values of type: {[type(v).__name__ for v in data.values()]}"
            )
        # Dict of arrays: {"col1": [val1, val2], "col2": [val3, val4]}
        return pd.DataFrame(data)
    else:
        raise ValueError(
            f"Unsupported data type: {type(data).__name__}\n\n"
            "Data must be one of:\n"
            '  1. List of dicts: [{"year": 2020, "value": 100}, ...]\n'
            '  2. Dict of arrays: {"year": [2020, 2021], "value": [100, 150]}\n'
            "  3. JSON string in either format above"
        )