File size: 5,615 Bytes
ba9863e
 
 
6c40f96
ba9863e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6c40f96
ba9863e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6c40f96
ba9863e
 
 
 
 
6c40f96
ba9863e
6c40f96
ba9863e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6c40f96
ba9863e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6c40f96
ba9863e
 
 
 
 
6c40f96
ba9863e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6c40f96
ba9863e
 
 
 
6c40f96
 
ba9863e
 
 
 
6c40f96
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import re
from typing import List, Tuple, Dict, Set


def _parse_list(content: str) -> List[str]:
    """
    Parse a string representing a list of literals into a Python list of strings.
    Args:
        content (str): A string in the form "[a,b,c]" or "a,b,c".
    Returns:
        List[str]: A list of literal identifiers as strings.
    Examples:
        >>> _parse_list("[a,b,c]")
        ['a', 'b', 'c']
        >>> _parse_list("x,y,z")
        ['x', 'y', 'z']
        >>> _parse_list("[]")
        []
    """
    content = content.strip()
    if content.startswith("[") and content.endswith("]"):
        content = content[1:-1].strip()
    if not content:
        return []
    return [x.strip() for x in content.split(",") if x.strip()]


def _parse_rule_line(line: str) -> Dict[str, Dict[str, Set[str]]]:
    """
    Parse a single rule line of the form [rX]: head <- body.
    Args:
        line (str): Rule string, e.g. "[r1]: p <- q,a".
    Returns:
        Dict[str, Dict[str, Set[str]]]: Mapping of rule ID to {head: set of body literals}.
    Examples:
        >>> _parse_rule_line("[r1]: p <- q,a")
        {'r1': {'p': {'q', 'a'}}}
        >>> _parse_rule_line("[r2]: q <- ")
        {'r2': {'q': set()}}
    """
    match = re.match(r"\[(r\d+)\]:\s*(\w+)\s*<-\s*(.*)", line)
    if match:
        r_id = match.group(1)
        head = match.group(2)
        body = match.group(3).strip()
        body_atoms = set(_parse_list(body)) if body else set()
        return {r_id: {head: body_atoms}}
    return {}


def _parse_pref_line(line: str) -> Dict[str, Set[str]]:
    """
    Parse a preference line of the form:
    - PREF: a,b > c,d > e
    - PREF: a,b > c and a > d

    Supports multiple chains joined by "and".

    Returns:
        Dict[str, Set[str]]: Dictionary mapping each element to all elements
                             that come after it in the preference chain(s).
                             Literals with no less-preferred values are omitted.
    """
    content = line.split(":", 1)[1].strip()

    # Split into separate chains by 'and'
    chains = [chain.strip() for chain in content.split("and") if chain.strip()]

    pref_dict: Dict[str, Set[str]] = {}

    for chain in chains:
        groups = [group.strip() for group in chain.split(">")]
        parsed_groups = [_parse_list(group) for group in groups]

        for i, current_group in enumerate(parsed_groups):
            less_preferred = set()
            for j in range(i + 1, len(parsed_groups)):
                less_preferred.update(parsed_groups[j])

            # Only add mapping if there are less preferred items
            if less_preferred:
                for literal in current_group:
                    if literal not in pref_dict:
                        pref_dict[literal] = set()
                    pref_dict[literal].update(less_preferred)

    return pref_dict


def parse_doc(path: str) -> Tuple[
    List[str],
    List[str],
    List[Tuple[str, str]],
    List[Dict[str, Dict[str, Set[str]]]],
    List[Dict[str, Set[str]]]
]:
    """
    Parse a structured document containing literals, assumptions, contraries,
    rules, and preferences.
    Args:
        path (str): Path to the text file to parse.
    Returns:
        Tuple containing:
        - language (List[str]): List of all literals in the language.
        - assumptions (List[str]): List of assumed literals.
        - contraries (List[Tuple[str, str]]): List of contrary pairs (literal, contrary).
        - rules (List[Dict[str, Dict[str, Set[str]]]]): List of rules,
          each rule is {rule_id: {head: set of body literals}}.
        - preferences (List[Dict[str, Set[str]]]): List of preference dictionaries,
          each mapping literals to sets of less preferred literals.
    Examples:
        >>> # doc.txt content:
        >>> # L: [a,b,c]
        >>> # A: [a,b]
        >>> # C(a): b
        >>> # [r1]: p <- q,a
        >>> # PREF: a,b > c,d > e > g > f
        >>> parse_doc("doc.txt")
        (['a','b','c'], ['a','b'], [('a','b')], [{'r1': {'p': {'q','a'}}}], 
         [{'a': {'c','d','e','g','f'}, 'b': {'c','d','e','g','f'}, ...}])
    """
    language: List[str] = []
    assumptions: List[str] = []
    contraries: List[Tuple[str, str]] = []
    rules: List[Dict[str, Dict[str, Set[str]]]] = []
    preferences: List[Dict[str, Set[str]]] = []

    with open(path, "r") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue

            if line.startswith("L:"):
                language = _parse_list(line.split(":", 1)[1])
            elif line.startswith("A:"):
                assumptions = _parse_list(line.split(":", 1)[1])
            elif line.startswith("C("):
                match = re.match(r"C\((\w+)\):\s*(\w+)", line)
                if match:
                    contraries.append((match.group(1), match.group(2)))
            elif line.startswith("[r"):
                rule = _parse_rule_line(line)
                if rule:
                    rules.append(rule)
            elif line.startswith("PREF:"):
                pref_dict = _parse_pref_line(line)
                if pref_dict:
                    preferences.append(pref_dict)

    return language, assumptions, contraries, rules, preferences


if __name__ == "__main__":
    language, assumptions, contraries, rules, preferences = parse_doc(
        "./backend/data/simple_plus2AND.txt")
    print("Language:", language)
    print("Assumptions:", assumptions)
    print("Contraries:", contraries)
    print("Rules:", rules)
    print("Preferences:", preferences)