davidtran999 commited on
Commit
1463e3d
·
verified ·
1 Parent(s): 5aa8ea6

Upload backend/hue_portal/core/models.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. backend/hue_portal/core/models.py +480 -0
backend/hue_portal/core/models.py ADDED
@@ -0,0 +1,480 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from django.db import models
2
+ from django.contrib.postgres.search import SearchVectorField
3
+ from django.contrib.postgres.indexes import GinIndex
4
+ from django.utils import timezone
5
+ from django.conf import settings
6
+ import uuid
7
+
8
+
9
+ class UserProfile(models.Model):
10
+ class Roles(models.TextChoices):
11
+ ADMIN = ("admin", "Admin")
12
+ USER = ("user", "User")
13
+
14
+ user = models.OneToOneField(
15
+ settings.AUTH_USER_MODEL,
16
+ on_delete=models.CASCADE,
17
+ related_name="profile",
18
+ )
19
+ role = models.CharField(max_length=20, choices=Roles.choices, default=Roles.USER)
20
+ title = models.CharField(max_length=120, blank=True)
21
+ phone = models.CharField(max_length=30, blank=True)
22
+ created_at = models.DateTimeField(auto_now_add=True)
23
+ updated_at = models.DateTimeField(auto_now=True)
24
+
25
+ def __str__(self):
26
+ return f"{self.user.username} ({self.get_role_display()})"
27
+
28
+
29
+ def legal_document_upload_path(instance, filename):
30
+ base = "legal_uploads"
31
+ code = (instance.code or uuid.uuid4().hex).replace("/", "_")
32
+ return f"{base}/{code}/{filename}"
33
+
34
+
35
+ def legal_document_image_upload_path(instance, filename):
36
+ base = "legal_images"
37
+ code = (instance.document.code if instance.document else uuid.uuid4().hex).replace("/", "_")
38
+ timestamp = timezone.now().strftime("%Y%m%d%H%M%S")
39
+ return f"{base}/{code}/{timestamp}_{filename}"
40
+
41
+ class Procedure(models.Model):
42
+ title = models.CharField(max_length=500)
43
+ domain = models.CharField(max_length=100, db_index=True) # ANTT/Cư trú/PCCC/GT
44
+ level = models.CharField(max_length=50, blank=True) # Tỉnh/Huyện/Xã
45
+ conditions = models.TextField(blank=True)
46
+ dossier = models.TextField(blank=True)
47
+ fee = models.CharField(max_length=200, blank=True)
48
+ duration = models.CharField(max_length=200, blank=True)
49
+ authority = models.CharField(max_length=300, blank=True)
50
+ source_url = models.URLField(max_length=1000, blank=True)
51
+ updated_at = models.DateTimeField(auto_now=True)
52
+ tsv_body = SearchVectorField(null=True, editable=False)
53
+ embedding = models.BinaryField(null=True, blank=True, editable=False)
54
+
55
+ class Meta:
56
+ indexes = [
57
+ GinIndex(fields=["tsv_body"], name="procedure_tsv_idx"),
58
+ ]
59
+
60
+ def search_vector(self) -> str:
61
+ """Create searchable text vector for this procedure."""
62
+ fields = [self.title, self.domain, self.level, self.conditions, self.dossier]
63
+ return " ".join(str(f) for f in fields if f)
64
+
65
+ class Fine(models.Model):
66
+ code = models.CharField(max_length=50, unique=True)
67
+ name = models.CharField(max_length=500)
68
+ article = models.CharField(max_length=100, blank=True)
69
+ decree = models.CharField(max_length=100, blank=True)
70
+ min_fine = models.DecimalField(max_digits=12, decimal_places=0, null=True, blank=True)
71
+ max_fine = models.DecimalField(max_digits=12, decimal_places=0, null=True, blank=True)
72
+ license_points = models.CharField(max_length=50, blank=True)
73
+ remedial = models.TextField(blank=True)
74
+ source_url = models.URLField(max_length=1000, blank=True)
75
+ tsv_body = SearchVectorField(null=True, editable=False)
76
+ embedding = models.BinaryField(null=True, blank=True, editable=False)
77
+
78
+ class Meta:
79
+ indexes = [
80
+ GinIndex(fields=["tsv_body"], name="fine_tsv_idx"),
81
+ ]
82
+
83
+ def search_vector(self) -> str:
84
+ """Create searchable text vector for this fine."""
85
+ fields = [self.name, self.code, self.article, self.decree, self.remedial]
86
+ return " ".join(str(f) for f in fields if f)
87
+
88
+ class Office(models.Model):
89
+ unit_name = models.CharField(max_length=300)
90
+ address = models.CharField(max_length=500, blank=True)
91
+ district = models.CharField(max_length=100, blank=True, db_index=True)
92
+ working_hours = models.CharField(max_length=200, blank=True)
93
+ phone = models.CharField(max_length=100, blank=True)
94
+ email = models.EmailField(blank=True)
95
+ latitude = models.FloatField(null=True, blank=True)
96
+ longitude = models.FloatField(null=True, blank=True)
97
+ service_scope = models.CharField(max_length=300, blank=True)
98
+ tsv_body = SearchVectorField(null=True, editable=False)
99
+ embedding = models.BinaryField(null=True, blank=True, editable=False)
100
+
101
+ class Meta:
102
+ indexes = [
103
+ GinIndex(fields=["tsv_body"], name="office_tsv_idx"),
104
+ ]
105
+
106
+ def search_vector(self) -> str:
107
+ """Create searchable text vector for this office."""
108
+ fields = [self.unit_name, self.address, self.district, self.service_scope]
109
+ return " ".join(str(f) for f in fields if f)
110
+
111
+ class Advisory(models.Model):
112
+ title = models.CharField(max_length=500)
113
+ summary = models.TextField()
114
+ source_url = models.URLField(max_length=1000, blank=True)
115
+ published_at = models.DateField(null=True, blank=True)
116
+ tsv_body = SearchVectorField(null=True, editable=False)
117
+ embedding = models.BinaryField(null=True, blank=True, editable=False)
118
+
119
+ class Meta:
120
+ indexes = [
121
+ GinIndex(fields=["tsv_body"], name="advisory_tsv_idx"),
122
+ ]
123
+
124
+ def search_vector(self) -> str:
125
+ """Create searchable text vector for this advisory."""
126
+ fields = [self.title, self.summary]
127
+ return " ".join(str(f) for f in fields if f)
128
+
129
+
130
+ class LegalDocument(models.Model):
131
+ """Metadata + raw text for authoritative legal documents."""
132
+
133
+ DOCUMENT_TYPES = [
134
+ ("decision", "Decision"),
135
+ ("circular", "Circular"),
136
+ ("guideline", "Guideline"),
137
+ ("plan", "Plan"),
138
+ ("other", "Other"),
139
+ ]
140
+
141
+ code = models.CharField(max_length=100, unique=True)
142
+ title = models.CharField(max_length=500)
143
+ doc_type = models.CharField(max_length=30, choices=DOCUMENT_TYPES, default="other")
144
+ summary = models.TextField(blank=True)
145
+ issued_by = models.CharField(max_length=200, blank=True)
146
+ issued_at = models.DateField(null=True, blank=True)
147
+ source_file = models.CharField(max_length=500, blank=True)
148
+ uploaded_file = models.FileField(upload_to=legal_document_upload_path, null=True, blank=True)
149
+ original_filename = models.CharField(max_length=255, blank=True)
150
+ mime_type = models.CharField(max_length=120, blank=True)
151
+ file_size = models.BigIntegerField(null=True, blank=True)
152
+ file_checksum = models.CharField(max_length=128, blank=True)
153
+ content_checksum = models.CharField(max_length=128, blank=True)
154
+ source_url = models.URLField(max_length=1000, blank=True)
155
+ page_count = models.IntegerField(null=True, blank=True)
156
+ raw_text = models.TextField()
157
+ raw_text_ocr = models.TextField(blank=True)
158
+ metadata = models.JSONField(default=dict, blank=True)
159
+ created_at = models.DateTimeField(auto_now_add=True)
160
+ updated_at = models.DateTimeField(auto_now=True)
161
+ tsv_body = SearchVectorField(null=True, editable=False)
162
+
163
+ class Meta:
164
+ indexes = [
165
+ GinIndex(fields=["tsv_body"], name="legal_document_tsv_idx"),
166
+ models.Index(fields=["doc_type"]),
167
+ models.Index(fields=["issued_at"]),
168
+ ]
169
+ ordering = ["title"]
170
+
171
+ def search_vector(self) -> str:
172
+ """Return concatenated searchable text."""
173
+ fields = [
174
+ self.title,
175
+ self.code,
176
+ self.summary,
177
+ self.issued_by,
178
+ self.raw_text,
179
+ ]
180
+ return " ".join(str(f) for f in fields if f)
181
+
182
+
183
+ class LegalSection(models.Model):
184
+ """Structured snippet (chapter/section/article) for each legal document."""
185
+
186
+ LEVEL_CHOICES = [
187
+ ("chapter", "Chapter"),
188
+ ("section", "Section"),
189
+ ("article", "Article"),
190
+ ("clause", "Clause"),
191
+ ("note", "Note"),
192
+ ("other", "Other"),
193
+ ]
194
+
195
+ document = models.ForeignKey(
196
+ LegalDocument,
197
+ on_delete=models.CASCADE,
198
+ related_name="sections",
199
+ )
200
+ section_code = models.CharField(max_length=120)
201
+ section_title = models.CharField(max_length=500, blank=True)
202
+ level = models.CharField(max_length=30, choices=LEVEL_CHOICES, default="other")
203
+ order = models.PositiveIntegerField(default=0, db_index=True)
204
+ page_start = models.IntegerField(null=True, blank=True)
205
+ page_end = models.IntegerField(null=True, blank=True)
206
+ content = models.TextField()
207
+ excerpt = models.TextField(blank=True)
208
+ metadata = models.JSONField(default=dict, blank=True)
209
+ is_ocr = models.BooleanField(default=False)
210
+ tsv_body = SearchVectorField(null=True, editable=False)
211
+ embedding = models.BinaryField(null=True, blank=True, editable=False)
212
+
213
+ class Meta:
214
+ indexes = [
215
+ GinIndex(fields=["tsv_body"], name="legal_section_tsv_idx"),
216
+ models.Index(fields=["document", "order"]),
217
+ models.Index(fields=["level"]),
218
+ ]
219
+ ordering = ["document", "order"]
220
+ unique_together = ("document", "section_code", "order")
221
+
222
+ def search_vector(self) -> str:
223
+ fields = [
224
+ self.section_title,
225
+ self.section_code,
226
+ self.content,
227
+ self.excerpt,
228
+ ]
229
+ return " ".join(str(f) for f in fields if f)
230
+
231
+
232
+ class Synonym(models.Model):
233
+ keyword = models.CharField(max_length=120, unique=True)
234
+ alias = models.CharField(max_length=120)
235
+
236
+
237
+ class LegalDocumentImage(models.Model):
238
+ """Metadata for images extracted from uploaded legal documents."""
239
+
240
+ document = models.ForeignKey(
241
+ LegalDocument,
242
+ on_delete=models.CASCADE,
243
+ related_name="images",
244
+ )
245
+ image = models.ImageField(upload_to=legal_document_image_upload_path)
246
+ page_number = models.IntegerField(null=True, blank=True)
247
+ description = models.CharField(max_length=255, blank=True)
248
+ width = models.IntegerField(null=True, blank=True)
249
+ height = models.IntegerField(null=True, blank=True)
250
+ checksum = models.CharField(max_length=128, blank=True)
251
+ created_at = models.DateTimeField(auto_now_add=True)
252
+
253
+ class Meta:
254
+ indexes = [
255
+ models.Index(fields=["document", "page_number"]),
256
+ models.Index(fields=["checksum"]),
257
+ ]
258
+
259
+ def __str__(self) -> str:
260
+ return f"Image {self.id} of {self.document.code}"
261
+
262
+
263
+ class IngestionJob(models.Model):
264
+ """Background ingestion task information."""
265
+
266
+ STATUS_PENDING = "pending"
267
+ STATUS_RUNNING = "running"
268
+ STATUS_COMPLETED = "completed"
269
+ STATUS_FAILED = "failed"
270
+
271
+ STATUS_CHOICES = [
272
+ (STATUS_PENDING, "Pending"),
273
+ (STATUS_RUNNING, "Running"),
274
+ (STATUS_COMPLETED, "Completed"),
275
+ (STATUS_FAILED, "Failed"),
276
+ ]
277
+
278
+ id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
279
+ code = models.CharField(max_length=128)
280
+ filename = models.CharField(max_length=255)
281
+ document = models.ForeignKey(
282
+ LegalDocument,
283
+ related_name="ingestion_jobs",
284
+ on_delete=models.SET_NULL,
285
+ null=True,
286
+ blank=True,
287
+ )
288
+ metadata = models.JSONField(default=dict, blank=True)
289
+ stats = models.JSONField(default=dict, blank=True)
290
+ status = models.CharField(max_length=20, choices=STATUS_CHOICES, default=STATUS_PENDING)
291
+ error_message = models.TextField(blank=True)
292
+ storage_path = models.CharField(max_length=512, blank=True)
293
+ progress = models.PositiveIntegerField(default=0)
294
+ created_at = models.DateTimeField(auto_now_add=True)
295
+ updated_at = models.DateTimeField(auto_now=True)
296
+ started_at = models.DateTimeField(null=True, blank=True)
297
+ finished_at = models.DateTimeField(null=True, blank=True)
298
+
299
+ class Meta:
300
+ ordering = ("-created_at",)
301
+
302
+ def __str__(self) -> str: # pragma: no cover - trivial
303
+ return f"IngestionJob({self.code}, {self.status})"
304
+
305
+ class AuditLog(models.Model):
306
+ created_at = models.DateTimeField(auto_now_add=True)
307
+ ip = models.GenericIPAddressField(null=True, blank=True)
308
+ user_agent = models.CharField(max_length=300, blank=True)
309
+ path = models.CharField(max_length=300)
310
+ query = models.CharField(max_length=500, blank=True)
311
+ status = models.IntegerField(default=200)
312
+ intent = models.CharField(max_length=50, blank=True)
313
+ confidence = models.FloatField(null=True, blank=True)
314
+ latency_ms = models.FloatField(null=True, blank=True)
315
+
316
+
317
+ class MLMetrics(models.Model):
318
+ date = models.DateField(unique=True)
319
+ total_requests = models.IntegerField(default=0)
320
+ intent_accuracy = models.FloatField(null=True, blank=True)
321
+ average_latency_ms = models.FloatField(null=True, blank=True)
322
+ error_rate = models.FloatField(null=True, blank=True)
323
+ intent_breakdown = models.JSONField(default=dict, blank=True)
324
+ generated_at = models.DateTimeField(auto_now_add=True)
325
+
326
+ class Meta:
327
+ ordering = ["-date"]
328
+ verbose_name = "ML Metrics"
329
+ verbose_name_plural = "ML Metrics"
330
+
331
+
332
+ class ConversationSession(models.Model):
333
+ """Model to store conversation sessions for context management."""
334
+ session_id = models.UUIDField(default=uuid.uuid4, unique=True, editable=False)
335
+ user_id = models.CharField(max_length=100, null=True, blank=True, db_index=True)
336
+ created_at = models.DateTimeField(auto_now_add=True)
337
+ updated_at = models.DateTimeField(auto_now=True)
338
+ metadata = models.JSONField(default=dict, blank=True)
339
+
340
+ class Meta:
341
+ ordering = ["-updated_at"]
342
+ verbose_name = "Conversation Session"
343
+ verbose_name_plural = "Conversation Sessions"
344
+ indexes = [
345
+ models.Index(fields=["session_id"]),
346
+ models.Index(fields=["user_id", "-updated_at"]),
347
+ ]
348
+
349
+ def __str__(self):
350
+ return f"Session {self.session_id}"
351
+
352
+
353
+ class ConversationMessage(models.Model):
354
+ """Model to store individual messages in a conversation session."""
355
+ ROLE_CHOICES = [
356
+ ("user", "User"),
357
+ ("bot", "Bot"),
358
+ ]
359
+
360
+ session = models.ForeignKey(
361
+ ConversationSession,
362
+ on_delete=models.CASCADE,
363
+ related_name="messages"
364
+ )
365
+ role = models.CharField(max_length=10, choices=ROLE_CHOICES)
366
+ content = models.TextField()
367
+ intent = models.CharField(max_length=50, blank=True, null=True)
368
+ entities = models.JSONField(default=dict, blank=True)
369
+ timestamp = models.DateTimeField(auto_now_add=True)
370
+ metadata = models.JSONField(default=dict, blank=True)
371
+
372
+ class Meta:
373
+ ordering = ["timestamp"]
374
+ verbose_name = "Conversation Message"
375
+ verbose_name_plural = "Conversation Messages"
376
+ indexes = [
377
+ models.Index(fields=["session", "timestamp"]),
378
+ models.Index(fields=["session", "role", "timestamp"]),
379
+ ]
380
+
381
+ def __str__(self):
382
+ return f"{self.role}: {self.content[:50]}..."
383
+
384
+
385
+ class GoldenQuery(models.Model):
386
+ """Golden dataset - verified queries và responses for Fast Path."""
387
+ query = models.TextField(unique=True, db_index=True)
388
+ query_normalized = models.TextField(db_index=True) # Normalized for matching
389
+ query_embedding = models.JSONField(null=True, blank=True) # Vector embedding for semantic search
390
+
391
+ intent = models.CharField(max_length=50, db_index=True)
392
+ response_message = models.TextField() # Verified response text
393
+ response_data = models.JSONField() # Full response dict (results, citations, etc.)
394
+
395
+ # Metadata
396
+ verified_by = models.CharField(max_length=100) # "legal_expert" or "gpt4" or "claude"
397
+ verified_at = models.DateTimeField(auto_now_add=True)
398
+ last_updated = models.DateTimeField(auto_now=True)
399
+ usage_count = models.IntegerField(default=0) # Track how often used
400
+ accuracy_score = models.FloatField(default=1.0) # 1.0 = perfect
401
+
402
+ # Versioning
403
+ version = models.IntegerField(default=1)
404
+ is_active = models.BooleanField(default=True, db_index=True)
405
+
406
+ class Meta:
407
+ verbose_name = "Golden Query"
408
+ verbose_name_plural = "Golden Queries"
409
+ indexes = [
410
+ models.Index(fields=['query_normalized', 'intent']),
411
+ models.Index(fields=['is_active', 'intent']),
412
+ models.Index(fields=['usage_count']),
413
+ ]
414
+ ordering = ['-usage_count', '-verified_at']
415
+
416
+ def __str__(self):
417
+ return f"GoldenQuery: {self.query[:50]}... ({self.intent})"
418
+
419
+
420
+ class QueryRoutingLog(models.Model):
421
+ """Log routing decisions for monitoring Dual-Path RAG."""
422
+ query = models.TextField()
423
+ route = models.CharField(max_length=20, db_index=True) # "fast_path" or "slow_path"
424
+ router_confidence = models.FloatField()
425
+ router_method = models.CharField(max_length=20, db_index=True) # "keyword" or "llm" or "similarity" or "default"
426
+ matched_golden_query_id = models.IntegerField(null=True, blank=True)
427
+ similarity_score = models.FloatField(null=True, blank=True)
428
+ response_time_ms = models.IntegerField()
429
+ intent = models.CharField(max_length=50, blank=True, db_index=True)
430
+ created_at = models.DateTimeField(auto_now_add=True, db_index=True)
431
+
432
+ class Meta:
433
+ verbose_name = "Query Routing Log"
434
+ verbose_name_plural = "Query Routing Logs"
435
+ indexes = [
436
+ models.Index(fields=['route', 'created_at']),
437
+ models.Index(fields=['router_method', 'created_at']),
438
+ models.Index(fields=['intent', 'created_at']),
439
+ ]
440
+ ordering = ['-created_at']
441
+
442
+ def __str__(self):
443
+ return f"RoutingLog: {self.route} ({self.router_method}) - {self.response_time_ms}ms"
444
+
445
+
446
+ class SystemAlert(models.Model):
447
+ """System alerts for admin dashboard (security, import failures, system errors)."""
448
+
449
+ ALERT_TYPES = [
450
+ ("security", "Security"),
451
+ ("import", "Import"),
452
+ ("system", "System"),
453
+ ]
454
+
455
+ SEVERITY_CHOICES = [
456
+ ("info", "Info"),
457
+ ("warning", "Warning"),
458
+ ("error", "Error"),
459
+ ]
460
+
461
+ alert_type = models.CharField(max_length=20, choices=ALERT_TYPES, db_index=True)
462
+ title = models.CharField(max_length=200)
463
+ message = models.TextField()
464
+ severity = models.CharField(max_length=10, choices=SEVERITY_CHOICES, default="warning")
465
+ created_at = models.DateTimeField(auto_now_add=True, db_index=True)
466
+ resolved_at = models.DateTimeField(null=True, blank=True)
467
+ metadata = models.JSONField(default=dict, blank=True)
468
+
469
+ class Meta:
470
+ ordering = ["-created_at"]
471
+ indexes = [
472
+ models.Index(fields=["alert_type", "-created_at"]),
473
+ models.Index(fields=["resolved_at"]),
474
+ ]
475
+ verbose_name = "System Alert"
476
+ verbose_name_plural = "System Alerts"
477
+
478
+ def __str__(self):
479
+ return f"{self.get_alert_type_display()}: {self.title} ({self.get_severity_display()})"
480
+