File size: 1,001 Bytes
f6f73f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
"""
Helpers and constants for generated legal QA datasets.

This package contains JSON files with automatically generated
question/answer-style prompts for legal documents stored in the DB.
Each JSON file should follow the schema documented in
`QA_ITEM_SCHEMA` below.
"""

from __future__ import annotations

from typing import TypedDict, Literal, List


DifficultyLevel = Literal["basic", "medium", "advanced"]


class QAItem(TypedDict):
    """
    Schema for a single generated QA-style training example.

    This is intentionally lightweight and independent from any
    specific ML framework so it can be reused by multiple
    training or evaluation scripts.
    """

    question: str
    difficulty: DifficultyLevel
    intent: str
    document_code: str
    section_code: str
    document_title: str
    section_title: str


QA_ITEM_SCHEMA: List[str] = [
    "question",
    "difficulty",
    "intent",
    "document_code",
    "section_code",
    "document_title",
    "section_title",
]