Spaces:
Running
Running
File size: 2,419 Bytes
799ac7c 31086ae 799ac7c 31086ae 799ac7c 31086ae 9e67c3b 799ac7c 31086ae |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import random
from collections import deque
from typing import Any, Iterable, List
from graphgen.bases import BaseGraphStorage, BasePartitioner
from graphgen.bases.datatypes import Community
NODE_UNIT: str = "n"
EDGE_UNIT: str = "e"
class BFSPartitioner(BasePartitioner):
"""
BFS partitioner that partitions the graph into communities of a fixed size.
1. Randomly choose a unit.
2. Expand the community using BFS until the max unit size is reached.
(A unit is a node or an edge.)
"""
def partition(
self,
g: BaseGraphStorage,
max_units_per_community: int = 1,
**kwargs: Any,
) -> Iterable[Community]:
nodes = g.get_all_nodes()
edges = g.get_all_edges()
adj, _ = self._build_adjacency_list(nodes, edges)
used_n: set[str] = set()
used_e: set[frozenset[str]] = set()
units = [(NODE_UNIT, n[0]) for n in nodes] + [
(EDGE_UNIT, frozenset((u, v))) for u, v, _ in edges
]
random.shuffle(units)
for kind, seed in units:
if (kind == NODE_UNIT and seed in used_n) or (
kind == EDGE_UNIT and seed in used_e
):
continue
comm_n: List[str] = []
comm_e: List[tuple[str, str]] = []
queue: deque[tuple[str, Any]] = deque([(kind, seed)])
cnt = 0
while queue and cnt < max_units_per_community:
k, it = queue.popleft()
if k == NODE_UNIT:
if it in used_n:
continue
used_n.add(it)
comm_n.append(it)
cnt += 1
for nei in adj[it]:
e_key = frozenset((it, nei))
if e_key not in used_e:
queue.append((EDGE_UNIT, e_key))
else:
if it in used_e:
continue
used_e.add(it)
u, v = it
comm_e.append((u, v))
cnt += 1
# push nodes that are not visited
for n in it:
if n not in used_n:
queue.append((NODE_UNIT, n))
if comm_n or comm_e:
yield Community(id=seed, nodes=comm_n, edges=comm_e)
|