Eyob-Sol's picture
Upload 24 files
dbc8c36 verified
"""
Graph rendering utilities for Collatz trees using Graphviz.
Main entry point:
render_collatz_tree_graphviz(df_edges, ...)
It takes a DataFrame of edges with columns ["Source", "Target"]
and renders a PNG image using Graphviz. The function returns the
absolute path to the generated image file, which is convenient
for Gradio's image components.
"""
from __future__ import annotations
from pathlib import Path
from typing import Optional, Union
import pandas as pd
from graphviz import Digraph
PathLike = Union[str, Path]
def render_collatz_tree_graphviz(
df_edges: pd.DataFrame,
filename: str = "collatz_tree",
directory: Optional[PathLike] = None,
*,
font_size: int = 12,
node_size: float = 0.05,
arrow_size: float = 0.5,
nodesep: float = 0.2,
ranksep: float = 0.15,
dpi: int = 100,
image_format: str = "png",
) -> str:
"""
Render a Collatz inverse tree from a DataFrame of directed edges using Graphviz,
and save it as a single image file.
Parameters
----------
df_edges : pd.DataFrame
DataFrame with at least two columns: "Source" and "Target".
Each row defines a directed edge: Source -> Target.
filename : str, default="collatz_tree"
Base file name (without extension) for the generated image.
directory : str or Path, optional
Directory where the image will be written. If None, uses the current
working directory.
font_size : int, default=11
Fixed font size used for node labels. This stays constant.
node_size : float, default=0.40
Minimum diameter of node circles in inches. Nodes will grow
automatically if labels are larger.
arrow_size : float, default=0.5
Arrowhead size.
nodesep : float, default=0.2
Minimum space between nodes on the same rank/level.
ranksep : float, default=0.15
Minimum space between different ranks/levels in the tree.
dpi : int, default=200
Render resolution in dots-per-inch.
image_format : str, default="png"
Output image format supported by Graphviz (e.g. "png", "svg").
Returns
-------
str
Absolute filesystem path to the generated image file.
Notes
-----
- Layout direction is Bottom-to-Top ("BT"), so the root (1) appears near
the bottom and branches extend upwards.
- Font size is fixed; node circles automatically expand when labels
require more space, because fixedsize="false".
"""
required_columns = {"Source", "Target"}
if not required_columns.issubset(df_edges.columns):
missing = required_columns.difference(df_edges.columns)
raise ValueError(
f"df_edges must contain columns {required_columns}, missing: {missing}"
)
# Output directory
if directory is None:
directory_path = Path(".").resolve()
else:
directory_path = Path(directory).resolve()
directory_path.mkdir(parents=True, exist_ok=True)
dot = Digraph(
comment="Collatz Inverse Tree",
format=image_format,
)
# Collect labels (just to ensure clean string formatting)
raw_nodes = set(df_edges["Source"]).union(set(df_edges["Target"]))
labels = []
for node in raw_nodes:
if isinstance(node, (int, float)) and int(node) == node:
label = str(int(node))
else:
label = str(node)
labels.append(label)
# Global graph layout: Bottom-to-Top tree
dot.attr(
rankdir="BT",
dpi=str(dpi),
nodesep=str(nodesep),
ranksep=str(ranksep),
)
# Node style:
# - fontsize is fixed (for consistent readability),
# - width/height is the *minimum* size,
# - fixedsize="false" lets Graphviz enlarge nodes as needed to fit labels.
dot.attr(
"node",
shape="circle",
fontsize=str(font_size),
width=str(node_size),
height=str(node_size),
fixedsize="false",
)
# Edge style
dot.attr("edge", arrowsize=str(arrow_size))
dot.attr(splines="true")
# Add nodes with parity-based coloring and special styling for 1, 2, 4
for label in labels:
attrs = {}
# Try to interpret the label as an integer to check parity
try:
n = int(label)
is_int = True
except Exception:
n = None
is_int = False
if is_int:
# Base colors by parity
if n % 2 == 0:
# Even nodes: light blue
attrs.update(style="filled", fillcolor="#ddeeff")
else:
# Odd nodes: light orange
attrs.update(style="filled", fillcolor="#ffe5cc")
# Special highlight for the trivial cycle 1 → 2 → 4 → 1
if n == 1:
attrs.update(
style="filled,bold",
fillcolor="#fff2cc", # brighter yellow
penwidth="2",
)
elif n in (2, 4):
attrs.update(
style="filled",
fillcolor="#d0e3ff", # slightly stronger blue
)
else:
# Non-integer labels, if any, keep default styling
pass
dot.node(label, **attrs)
# Add edges
for source, target in df_edges[["Source", "Target"]].itertuples(index=False):
# Normalize labels to nice integer strings when possible
if isinstance(source, (int, float)) and int(source) == source:
src_label = str(int(source))
else:
src_label = str(source)
if isinstance(target, (int, float)) and int(target) == target:
tgt_label = str(int(target))
else:
tgt_label = str(target)
# Special case: cycle-closing edge 1 -> 4
if src_label == "1" and tgt_label == "4":
dot.edge(
src_label,
tgt_label,
constraint="false", # do not affect layout / ranks
color="gray40",
penwidth="1.6",
style="curved", # request a curved spline
arrowsize="0.8",
)
else:
dot.edge(src_label, tgt_label)
# Render to file. Graphviz's render() returns the path including extension.
output_path = dot.render(
filename=filename,
directory=str(directory_path),
cleanup=True,
)
return str(Path(output_path).resolve())