Spaces:
Sleeping
Sleeping
File size: 5,615 Bytes
ba9863e 6c40f96 ba9863e 6c40f96 ba9863e 6c40f96 ba9863e 6c40f96 ba9863e 6c40f96 ba9863e 6c40f96 ba9863e 6c40f96 ba9863e 6c40f96 ba9863e 6c40f96 ba9863e 6c40f96 ba9863e 6c40f96 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
import re
from typing import List, Tuple, Dict, Set
def _parse_list(content: str) -> List[str]:
"""
Parse a string representing a list of literals into a Python list of strings.
Args:
content (str): A string in the form "[a,b,c]" or "a,b,c".
Returns:
List[str]: A list of literal identifiers as strings.
Examples:
>>> _parse_list("[a,b,c]")
['a', 'b', 'c']
>>> _parse_list("x,y,z")
['x', 'y', 'z']
>>> _parse_list("[]")
[]
"""
content = content.strip()
if content.startswith("[") and content.endswith("]"):
content = content[1:-1].strip()
if not content:
return []
return [x.strip() for x in content.split(",") if x.strip()]
def _parse_rule_line(line: str) -> Dict[str, Dict[str, Set[str]]]:
"""
Parse a single rule line of the form [rX]: head <- body.
Args:
line (str): Rule string, e.g. "[r1]: p <- q,a".
Returns:
Dict[str, Dict[str, Set[str]]]: Mapping of rule ID to {head: set of body literals}.
Examples:
>>> _parse_rule_line("[r1]: p <- q,a")
{'r1': {'p': {'q', 'a'}}}
>>> _parse_rule_line("[r2]: q <- ")
{'r2': {'q': set()}}
"""
match = re.match(r"\[(r\d+)\]:\s*(\w+)\s*<-\s*(.*)", line)
if match:
r_id = match.group(1)
head = match.group(2)
body = match.group(3).strip()
body_atoms = set(_parse_list(body)) if body else set()
return {r_id: {head: body_atoms}}
return {}
def _parse_pref_line(line: str) -> Dict[str, Set[str]]:
"""
Parse a preference line of the form:
- PREF: a,b > c,d > e
- PREF: a,b > c and a > d
Supports multiple chains joined by "and".
Returns:
Dict[str, Set[str]]: Dictionary mapping each element to all elements
that come after it in the preference chain(s).
Literals with no less-preferred values are omitted.
"""
content = line.split(":", 1)[1].strip()
# Split into separate chains by 'and'
chains = [chain.strip() for chain in content.split("and") if chain.strip()]
pref_dict: Dict[str, Set[str]] = {}
for chain in chains:
groups = [group.strip() for group in chain.split(">")]
parsed_groups = [_parse_list(group) for group in groups]
for i, current_group in enumerate(parsed_groups):
less_preferred = set()
for j in range(i + 1, len(parsed_groups)):
less_preferred.update(parsed_groups[j])
# Only add mapping if there are less preferred items
if less_preferred:
for literal in current_group:
if literal not in pref_dict:
pref_dict[literal] = set()
pref_dict[literal].update(less_preferred)
return pref_dict
def parse_doc(path: str) -> Tuple[
List[str],
List[str],
List[Tuple[str, str]],
List[Dict[str, Dict[str, Set[str]]]],
List[Dict[str, Set[str]]]
]:
"""
Parse a structured document containing literals, assumptions, contraries,
rules, and preferences.
Args:
path (str): Path to the text file to parse.
Returns:
Tuple containing:
- language (List[str]): List of all literals in the language.
- assumptions (List[str]): List of assumed literals.
- contraries (List[Tuple[str, str]]): List of contrary pairs (literal, contrary).
- rules (List[Dict[str, Dict[str, Set[str]]]]): List of rules,
each rule is {rule_id: {head: set of body literals}}.
- preferences (List[Dict[str, Set[str]]]): List of preference dictionaries,
each mapping literals to sets of less preferred literals.
Examples:
>>> # doc.txt content:
>>> # L: [a,b,c]
>>> # A: [a,b]
>>> # C(a): b
>>> # [r1]: p <- q,a
>>> # PREF: a,b > c,d > e > g > f
>>> parse_doc("doc.txt")
(['a','b','c'], ['a','b'], [('a','b')], [{'r1': {'p': {'q','a'}}}],
[{'a': {'c','d','e','g','f'}, 'b': {'c','d','e','g','f'}, ...}])
"""
language: List[str] = []
assumptions: List[str] = []
contraries: List[Tuple[str, str]] = []
rules: List[Dict[str, Dict[str, Set[str]]]] = []
preferences: List[Dict[str, Set[str]]] = []
with open(path, "r") as f:
for line in f:
line = line.strip()
if not line:
continue
if line.startswith("L:"):
language = _parse_list(line.split(":", 1)[1])
elif line.startswith("A:"):
assumptions = _parse_list(line.split(":", 1)[1])
elif line.startswith("C("):
match = re.match(r"C\((\w+)\):\s*(\w+)", line)
if match:
contraries.append((match.group(1), match.group(2)))
elif line.startswith("[r"):
rule = _parse_rule_line(line)
if rule:
rules.append(rule)
elif line.startswith("PREF:"):
pref_dict = _parse_pref_line(line)
if pref_dict:
preferences.append(pref_dict)
return language, assumptions, contraries, rules, preferences
if __name__ == "__main__":
language, assumptions, contraries, rules, preferences = parse_doc(
"./backend/data/simple_plus2AND.txt")
print("Language:", language)
print("Assumptions:", assumptions)
print("Contraries:", contraries)
print("Rules:", rules)
print("Preferences:", preferences) |