""" Graph rendering utilities for Collatz trees using Graphviz. Main entry point: render_collatz_tree_graphviz(df_edges, ...) It takes a DataFrame of edges with columns ["Source", "Target"] and renders a PNG image using Graphviz. The function returns the absolute path to the generated image file, which is convenient for Gradio's image components. """ from __future__ import annotations from pathlib import Path from typing import Optional, Union import pandas as pd from graphviz import Digraph PathLike = Union[str, Path] def render_collatz_tree_graphviz( df_edges: pd.DataFrame, filename: str = "collatz_tree", directory: Optional[PathLike] = None, *, font_size: int = 12, node_size: float = 0.05, arrow_size: float = 0.5, nodesep: float = 0.2, ranksep: float = 0.15, dpi: int = 100, image_format: str = "png", ) -> str: """ Render a Collatz inverse tree from a DataFrame of directed edges using Graphviz, and save it as a single image file. Parameters ---------- df_edges : pd.DataFrame DataFrame with at least two columns: "Source" and "Target". Each row defines a directed edge: Source -> Target. filename : str, default="collatz_tree" Base file name (without extension) for the generated image. directory : str or Path, optional Directory where the image will be written. If None, uses the current working directory. font_size : int, default=11 Fixed font size used for node labels. This stays constant. node_size : float, default=0.40 Minimum diameter of node circles in inches. Nodes will grow automatically if labels are larger. arrow_size : float, default=0.5 Arrowhead size. nodesep : float, default=0.2 Minimum space between nodes on the same rank/level. ranksep : float, default=0.15 Minimum space between different ranks/levels in the tree. dpi : int, default=200 Render resolution in dots-per-inch. image_format : str, default="png" Output image format supported by Graphviz (e.g. "png", "svg"). Returns ------- str Absolute filesystem path to the generated image file. Notes ----- - Layout direction is Bottom-to-Top ("BT"), so the root (1) appears near the bottom and branches extend upwards. - Font size is fixed; node circles automatically expand when labels require more space, because fixedsize="false". """ required_columns = {"Source", "Target"} if not required_columns.issubset(df_edges.columns): missing = required_columns.difference(df_edges.columns) raise ValueError( f"df_edges must contain columns {required_columns}, missing: {missing}" ) # Output directory if directory is None: directory_path = Path(".").resolve() else: directory_path = Path(directory).resolve() directory_path.mkdir(parents=True, exist_ok=True) dot = Digraph( comment="Collatz Inverse Tree", format=image_format, ) # Collect labels (just to ensure clean string formatting) raw_nodes = set(df_edges["Source"]).union(set(df_edges["Target"])) labels = [] for node in raw_nodes: if isinstance(node, (int, float)) and int(node) == node: label = str(int(node)) else: label = str(node) labels.append(label) # Global graph layout: Bottom-to-Top tree dot.attr( rankdir="BT", dpi=str(dpi), nodesep=str(nodesep), ranksep=str(ranksep), ) # Node style: # - fontsize is fixed (for consistent readability), # - width/height is the *minimum* size, # - fixedsize="false" lets Graphviz enlarge nodes as needed to fit labels. dot.attr( "node", shape="circle", fontsize=str(font_size), width=str(node_size), height=str(node_size), fixedsize="false", ) # Edge style dot.attr("edge", arrowsize=str(arrow_size)) dot.attr(splines="true") # Add nodes with parity-based coloring and special styling for 1, 2, 4 for label in labels: attrs = {} # Try to interpret the label as an integer to check parity try: n = int(label) is_int = True except Exception: n = None is_int = False if is_int: # Base colors by parity if n % 2 == 0: # Even nodes: light blue attrs.update(style="filled", fillcolor="#ddeeff") else: # Odd nodes: light orange attrs.update(style="filled", fillcolor="#ffe5cc") # Special highlight for the trivial cycle 1 → 2 → 4 → 1 if n == 1: attrs.update( style="filled,bold", fillcolor="#fff2cc", # brighter yellow penwidth="2", ) elif n in (2, 4): attrs.update( style="filled", fillcolor="#d0e3ff", # slightly stronger blue ) else: # Non-integer labels, if any, keep default styling pass dot.node(label, **attrs) # Add edges for source, target in df_edges[["Source", "Target"]].itertuples(index=False): # Normalize labels to nice integer strings when possible if isinstance(source, (int, float)) and int(source) == source: src_label = str(int(source)) else: src_label = str(source) if isinstance(target, (int, float)) and int(target) == target: tgt_label = str(int(target)) else: tgt_label = str(target) # Special case: cycle-closing edge 1 -> 4 if src_label == "1" and tgt_label == "4": dot.edge( src_label, tgt_label, constraint="false", # do not affect layout / ranks color="gray40", penwidth="1.6", style="curved", # request a curved spline arrowsize="0.8", ) else: dot.edge(src_label, tgt_label) # Render to file. Graphviz's render() returns the path including extension. output_path = dot.render( filename=filename, directory=str(directory_path), cleanup=True, ) return str(Path(output_path).resolve())