File size: 6,544 Bytes
dbc8c36 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 |
"""
Graph rendering utilities for Collatz trees using Graphviz.
Main entry point:
render_collatz_tree_graphviz(df_edges, ...)
It takes a DataFrame of edges with columns ["Source", "Target"]
and renders a PNG image using Graphviz. The function returns the
absolute path to the generated image file, which is convenient
for Gradio's image components.
"""
from __future__ import annotations
from pathlib import Path
from typing import Optional, Union
import pandas as pd
from graphviz import Digraph
PathLike = Union[str, Path]
def render_collatz_tree_graphviz(
df_edges: pd.DataFrame,
filename: str = "collatz_tree",
directory: Optional[PathLike] = None,
*,
font_size: int = 12,
node_size: float = 0.05,
arrow_size: float = 0.5,
nodesep: float = 0.2,
ranksep: float = 0.15,
dpi: int = 100,
image_format: str = "png",
) -> str:
"""
Render a Collatz inverse tree from a DataFrame of directed edges using Graphviz,
and save it as a single image file.
Parameters
----------
df_edges : pd.DataFrame
DataFrame with at least two columns: "Source" and "Target".
Each row defines a directed edge: Source -> Target.
filename : str, default="collatz_tree"
Base file name (without extension) for the generated image.
directory : str or Path, optional
Directory where the image will be written. If None, uses the current
working directory.
font_size : int, default=11
Fixed font size used for node labels. This stays constant.
node_size : float, default=0.40
Minimum diameter of node circles in inches. Nodes will grow
automatically if labels are larger.
arrow_size : float, default=0.5
Arrowhead size.
nodesep : float, default=0.2
Minimum space between nodes on the same rank/level.
ranksep : float, default=0.15
Minimum space between different ranks/levels in the tree.
dpi : int, default=200
Render resolution in dots-per-inch.
image_format : str, default="png"
Output image format supported by Graphviz (e.g. "png", "svg").
Returns
-------
str
Absolute filesystem path to the generated image file.
Notes
-----
- Layout direction is Bottom-to-Top ("BT"), so the root (1) appears near
the bottom and branches extend upwards.
- Font size is fixed; node circles automatically expand when labels
require more space, because fixedsize="false".
"""
required_columns = {"Source", "Target"}
if not required_columns.issubset(df_edges.columns):
missing = required_columns.difference(df_edges.columns)
raise ValueError(
f"df_edges must contain columns {required_columns}, missing: {missing}"
)
# Output directory
if directory is None:
directory_path = Path(".").resolve()
else:
directory_path = Path(directory).resolve()
directory_path.mkdir(parents=True, exist_ok=True)
dot = Digraph(
comment="Collatz Inverse Tree",
format=image_format,
)
# Collect labels (just to ensure clean string formatting)
raw_nodes = set(df_edges["Source"]).union(set(df_edges["Target"]))
labels = []
for node in raw_nodes:
if isinstance(node, (int, float)) and int(node) == node:
label = str(int(node))
else:
label = str(node)
labels.append(label)
# Global graph layout: Bottom-to-Top tree
dot.attr(
rankdir="BT",
dpi=str(dpi),
nodesep=str(nodesep),
ranksep=str(ranksep),
)
# Node style:
# - fontsize is fixed (for consistent readability),
# - width/height is the *minimum* size,
# - fixedsize="false" lets Graphviz enlarge nodes as needed to fit labels.
dot.attr(
"node",
shape="circle",
fontsize=str(font_size),
width=str(node_size),
height=str(node_size),
fixedsize="false",
)
# Edge style
dot.attr("edge", arrowsize=str(arrow_size))
dot.attr(splines="true")
# Add nodes with parity-based coloring and special styling for 1, 2, 4
for label in labels:
attrs = {}
# Try to interpret the label as an integer to check parity
try:
n = int(label)
is_int = True
except Exception:
n = None
is_int = False
if is_int:
# Base colors by parity
if n % 2 == 0:
# Even nodes: light blue
attrs.update(style="filled", fillcolor="#ddeeff")
else:
# Odd nodes: light orange
attrs.update(style="filled", fillcolor="#ffe5cc")
# Special highlight for the trivial cycle 1 → 2 → 4 → 1
if n == 1:
attrs.update(
style="filled,bold",
fillcolor="#fff2cc", # brighter yellow
penwidth="2",
)
elif n in (2, 4):
attrs.update(
style="filled",
fillcolor="#d0e3ff", # slightly stronger blue
)
else:
# Non-integer labels, if any, keep default styling
pass
dot.node(label, **attrs)
# Add edges
for source, target in df_edges[["Source", "Target"]].itertuples(index=False):
# Normalize labels to nice integer strings when possible
if isinstance(source, (int, float)) and int(source) == source:
src_label = str(int(source))
else:
src_label = str(source)
if isinstance(target, (int, float)) and int(target) == target:
tgt_label = str(int(target))
else:
tgt_label = str(target)
# Special case: cycle-closing edge 1 -> 4
if src_label == "1" and tgt_label == "4":
dot.edge(
src_label,
tgt_label,
constraint="false", # do not affect layout / ranks
color="gray40",
penwidth="1.6",
style="curved", # request a curved spline
arrowsize="0.8",
)
else:
dot.edge(src_label, tgt_label)
# Render to file. Graphviz's render() returns the path including extension.
output_path = dot.render(
filename=filename,
directory=str(directory_path),
cleanup=True,
)
return str(Path(output_path).resolve()) |