File size: 4,784 Bytes
7114af0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
"""Utility functions for the Datawrapper MCP server."""

import json
import os

import pandas as pd


def get_api_token() -> str:
    """Get the Datawrapper API token from environment."""
    api_token = os.environ.get("DATAWRAPPER_ACCESS_TOKEN")
    if not api_token:
        raise ValueError(
            "DATAWRAPPER_ACCESS_TOKEN environment variable is required. "
            "Get your token from https://app.datawrapper.de/account/api-tokens"
        )
    return api_token


def json_to_dataframe(data: str | list | dict) -> pd.DataFrame:
    """Convert JSON data to a pandas DataFrame.

    Args:
        data: One of:
            - File path to CSV or JSON file (e.g., "/path/to/data.csv")
            - List of records: [{"col1": val1, "col2": val2}, ...]
            - Dict of arrays: {"col1": [val1, val2], "col2": [val3, val4]}
            - JSON string in either format above

    Returns:
        pandas DataFrame

    Examples:
        >>> json_to_dataframe("/tmp/data.csv")
        >>> json_to_dataframe("/tmp/data.json")
        >>> json_to_dataframe([{"a": 1, "b": 2}, {"a": 3, "b": 4}])
        >>> json_to_dataframe({"a": [1, 3], "b": [2, 4]})
        >>> json_to_dataframe('[{"a": 1, "b": 2}]')
    """
    if isinstance(data, str):
        # Check if it's a file path that exists
        if os.path.isfile(data):
            if data.endswith(".csv"):
                return pd.read_csv(data)
            elif data.endswith(".json"):
                with open(data) as f:
                    file_data = json.load(f)
                # Recursively process the loaded JSON data
                return json_to_dataframe(file_data)
            else:
                raise ValueError(
                    f"Unsupported file type: {data}\n\n"
                    "Supported file types:\n"
                    "  - .csv (CSV files)\n"
                    "  - .json (JSON files containing list of dicts or dict of arrays)"
                )

        # Check if it looks like CSV content (not a file path)
        if "\n" in data and "," in data and not data.strip().startswith(("[", "{")):
            raise ValueError(
                "CSV strings are not supported. Please save to a file first.\n\n"
                "Options:\n"
                "  1. Save CSV to a file and pass the file path\n"
                '  2. Parse CSV to list of dicts: [{"col": val}, ...]\n'
                '  3. Parse CSV to dict of arrays: {"col": [vals]}\n\n'
                "Example:\n"
                '  data = [{"year": 2020, "value": 100}, {"year": 2021, "value": 150}]'
            )

        # Try to parse as JSON string
        try:
            data = json.loads(data)
        except json.JSONDecodeError as e:
            raise ValueError(
                f"Invalid JSON string: {e}\n\n"
                "Expected one of:\n"
                "  1. File path: '/path/to/data.csv' or '/path/to/data.json'\n"
                '  2. JSON string: \'[{"year": 2020, "value": 100}, ...]\'\n'
                '  3. JSON string: \'{"year": [2020, 2021], "value": [100, 150]}\''
            )

    if isinstance(data, list):
        if not data:
            raise ValueError(
                "Data list is empty. Please provide at least one row of data."
            )
        if not all(isinstance(item, dict) for item in data):
            raise ValueError(
                "List format must contain dictionaries.\n\n"
                "Expected format:\n"
                '  [{"year": 2020, "value": 100}, {"year": 2021, "value": 150}]\n\n'
                f"Got: {type(data[0]).__name__} in list"
            )
        # List of records: [{"col1": val1, "col2": val2}, ...]
        return pd.DataFrame(data)
    elif isinstance(data, dict):
        if not data:
            raise ValueError(
                "Data dict is empty. Please provide at least one column of data."
            )
        # Check if it's a dict of arrays (all values should be lists)
        if not all(isinstance(v, list) for v in data.values()):
            raise ValueError(
                "Dict format must have lists as values.\n\n"
                "Expected format:\n"
                '  {"year": [2020, 2021], "value": [100, 150]}\n\n'
                f"Got dict with values of type: {[type(v).__name__ for v in data.values()]}"
            )
        # Dict of arrays: {"col1": [val1, val2], "col2": [val3, val4]}
        return pd.DataFrame(data)
    else:
        raise ValueError(
            f"Unsupported data type: {type(data).__name__}\n\n"
            "Data must be one of:\n"
            '  1. List of dicts: [{"year": 2020, "value": 100}, ...]\n'
            '  2. Dict of arrays: {"year": [2020, 2021], "value": [100, 150]}\n'
            "  3. JSON string in either format above"
        )