File size: 6,544 Bytes
dbc8c36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
"""
Graph rendering utilities for Collatz trees using Graphviz.

Main entry point:

    render_collatz_tree_graphviz(df_edges, ...)

It takes a DataFrame of edges with columns ["Source", "Target"]
and renders a PNG image using Graphviz. The function returns the
absolute path to the generated image file, which is convenient
for Gradio's image components.
"""

from __future__ import annotations

from pathlib import Path
from typing import Optional, Union

import pandas as pd
from graphviz import Digraph


PathLike = Union[str, Path]


def render_collatz_tree_graphviz(
    df_edges: pd.DataFrame,
    filename: str = "collatz_tree",
    directory: Optional[PathLike] = None,
    *,
    font_size: int = 12,
    node_size: float = 0.05,
    arrow_size: float = 0.5,
    nodesep: float = 0.2,
    ranksep: float = 0.15,
    dpi: int = 100,
    image_format: str = "png",
) -> str:
    """
    Render a Collatz inverse tree from a DataFrame of directed edges using Graphviz,
    and save it as a single image file.

    Parameters
    ----------
    df_edges : pd.DataFrame
        DataFrame with at least two columns: "Source" and "Target".
        Each row defines a directed edge: Source -> Target.
    filename : str, default="collatz_tree"
        Base file name (without extension) for the generated image.
    directory : str or Path, optional
        Directory where the image will be written. If None, uses the current
        working directory.
    font_size : int, default=11
        Fixed font size used for node labels. This stays constant.
    node_size : float, default=0.40
        Minimum diameter of node circles in inches. Nodes will grow
        automatically if labels are larger.
    arrow_size : float, default=0.5
        Arrowhead size.
    nodesep : float, default=0.2
        Minimum space between nodes on the same rank/level.
    ranksep : float, default=0.15
        Minimum space between different ranks/levels in the tree.
    dpi : int, default=200
        Render resolution in dots-per-inch.
    image_format : str, default="png"
        Output image format supported by Graphviz (e.g. "png", "svg").

    Returns
    -------
    str
        Absolute filesystem path to the generated image file.

    Notes
    -----
    - Layout direction is Bottom-to-Top ("BT"), so the root (1) appears near
      the bottom and branches extend upwards.
    - Font size is fixed; node circles automatically expand when labels
      require more space, because fixedsize="false".
    """

    required_columns = {"Source", "Target"}
    if not required_columns.issubset(df_edges.columns):
        missing = required_columns.difference(df_edges.columns)
        raise ValueError(
            f"df_edges must contain columns {required_columns}, missing: {missing}"
        )

    # Output directory
    if directory is None:
        directory_path = Path(".").resolve()
    else:
        directory_path = Path(directory).resolve()

    directory_path.mkdir(parents=True, exist_ok=True)

    dot = Digraph(
        comment="Collatz Inverse Tree",
        format=image_format,
    )

    # Collect labels (just to ensure clean string formatting)
    raw_nodes = set(df_edges["Source"]).union(set(df_edges["Target"]))
    labels = []
    for node in raw_nodes:
        if isinstance(node, (int, float)) and int(node) == node:
            label = str(int(node))
        else:
            label = str(node)
        labels.append(label)

    # Global graph layout: Bottom-to-Top tree
    dot.attr(
        rankdir="BT",
        dpi=str(dpi),
        nodesep=str(nodesep),
        ranksep=str(ranksep),
    )

    # Node style:
    # - fontsize is fixed (for consistent readability),
    # - width/height is the *minimum* size,
    # - fixedsize="false" lets Graphviz enlarge nodes as needed to fit labels.
    dot.attr(
        "node",
        shape="circle",
        fontsize=str(font_size),
        width=str(node_size),
        height=str(node_size),
        fixedsize="false",
    )

    # Edge style
    dot.attr("edge", arrowsize=str(arrow_size))
    dot.attr(splines="true")

    # Add nodes with parity-based coloring and special styling for 1, 2, 4
    for label in labels:
        attrs = {}

        # Try to interpret the label as an integer to check parity
        try:
            n = int(label)
            is_int = True
        except Exception:
            n = None
            is_int = False

        if is_int:
            # Base colors by parity
            if n % 2 == 0:
                # Even nodes: light blue
                attrs.update(style="filled", fillcolor="#ddeeff")
            else:
                # Odd nodes: light orange
                attrs.update(style="filled", fillcolor="#ffe5cc")

            # Special highlight for the trivial cycle 1 → 2 → 4 → 1
            if n == 1:
                attrs.update(
                    style="filled,bold",
                    fillcolor="#fff2cc",  # brighter yellow
                    penwidth="2",
                )
            elif n in (2, 4):
                attrs.update(
                    style="filled",
                    fillcolor="#d0e3ff",  # slightly stronger blue
                )
        else:
            # Non-integer labels, if any, keep default styling
            pass

        dot.node(label, **attrs)

    # Add edges
    for source, target in df_edges[["Source", "Target"]].itertuples(index=False):
        # Normalize labels to nice integer strings when possible
        if isinstance(source, (int, float)) and int(source) == source:
            src_label = str(int(source))
        else:
            src_label = str(source)

        if isinstance(target, (int, float)) and int(target) == target:
            tgt_label = str(int(target))
        else:
            tgt_label = str(target)

        # Special case: cycle-closing edge 1 -> 4
        if src_label == "1" and tgt_label == "4":
            dot.edge(
                src_label,
                tgt_label,
                constraint="false",   # do not affect layout / ranks
                color="gray40",
                penwidth="1.6",
                style="curved",       # request a curved spline
                arrowsize="0.8",
            )
        else:
            dot.edge(src_label, tgt_label)

    # Render to file. Graphviz's render() returns the path including extension.
    output_path = dot.render(
        filename=filename,
        directory=str(directory_path),
        cleanup=True,
    )

    return str(Path(output_path).resolve())