davidtran999 commited on
Commit
f6f73f2
·
verified ·
1 Parent(s): 3e33c3f

Upload backend/chatbot/training/generated_qa/__init__.py with huggingface_hub

Browse files
backend/chatbot/training/generated_qa/__init__.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Helpers and constants for generated legal QA datasets.
3
+
4
+ This package contains JSON files with automatically generated
5
+ question/answer-style prompts for legal documents stored in the DB.
6
+ Each JSON file should follow the schema documented in
7
+ `QA_ITEM_SCHEMA` below.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from typing import TypedDict, Literal, List
13
+
14
+
15
+ DifficultyLevel = Literal["basic", "medium", "advanced"]
16
+
17
+
18
+ class QAItem(TypedDict):
19
+ """
20
+ Schema for a single generated QA-style training example.
21
+
22
+ This is intentionally lightweight and independent from any
23
+ specific ML framework so it can be reused by multiple
24
+ training or evaluation scripts.
25
+ """
26
+
27
+ question: str
28
+ difficulty: DifficultyLevel
29
+ intent: str
30
+ document_code: str
31
+ section_code: str
32
+ document_title: str
33
+ section_title: str
34
+
35
+
36
+ QA_ITEM_SCHEMA: List[str] = [
37
+ "question",
38
+ "difficulty",
39
+ "intent",
40
+ "document_code",
41
+ "section_code",
42
+ "document_title",
43
+ "section_title",
44
+ ]
45
+
46
+