File size: 2,419 Bytes
799ac7c
 
31086ae
799ac7c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31086ae
799ac7c
 
 
 
31086ae
9e67c3b
 
799ac7c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31086ae
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import random
from collections import deque
from typing import Any, Iterable, List

from graphgen.bases import BaseGraphStorage, BasePartitioner
from graphgen.bases.datatypes import Community

NODE_UNIT: str = "n"
EDGE_UNIT: str = "e"


class BFSPartitioner(BasePartitioner):
    """
    BFS partitioner that partitions the graph into communities of a fixed size.
    1. Randomly choose a unit.
    2. Expand the community using BFS until the max unit size is reached.
    (A unit is a node or an edge.)
    """

    def partition(
        self,
        g: BaseGraphStorage,
        max_units_per_community: int = 1,
        **kwargs: Any,
    ) -> Iterable[Community]:
        nodes = g.get_all_nodes()
        edges = g.get_all_edges()

        adj, _ = self._build_adjacency_list(nodes, edges)

        used_n: set[str] = set()
        used_e: set[frozenset[str]] = set()

        units = [(NODE_UNIT, n[0]) for n in nodes] + [
            (EDGE_UNIT, frozenset((u, v))) for u, v, _ in edges
        ]
        random.shuffle(units)

        for kind, seed in units:
            if (kind == NODE_UNIT and seed in used_n) or (
                kind == EDGE_UNIT and seed in used_e
            ):
                continue

            comm_n: List[str] = []
            comm_e: List[tuple[str, str]] = []
            queue: deque[tuple[str, Any]] = deque([(kind, seed)])
            cnt = 0

            while queue and cnt < max_units_per_community:
                k, it = queue.popleft()
                if k == NODE_UNIT:
                    if it in used_n:
                        continue
                    used_n.add(it)
                    comm_n.append(it)
                    cnt += 1
                    for nei in adj[it]:
                        e_key = frozenset((it, nei))
                        if e_key not in used_e:
                            queue.append((EDGE_UNIT, e_key))
                else:
                    if it in used_e:
                        continue
                    used_e.add(it)

                    u, v = it
                    comm_e.append((u, v))
                    cnt += 1
                    # push nodes that are not visited
                    for n in it:
                        if n not in used_n:
                            queue.append((NODE_UNIT, n))

            if comm_n or comm_e:
                yield Community(id=seed, nodes=comm_n, edges=comm_e)