davidtran999 commited on
Commit
54c8c7b
·
verified ·
1 Parent(s): 9e94154

Upload backend/hue_portal/core/views.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. backend/hue_portal/core/views.py +333 -0
backend/hue_portal/core/views.py ADDED
@@ -0,0 +1,333 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from django.conf import settings
3
+ from django.db.models.functions import Lower
4
+ from django.db.models import Q
5
+ from django.http import FileResponse, Http404
6
+ from django.shortcuts import get_object_or_404
7
+ from pathlib import Path
8
+ from rest_framework.decorators import api_view, parser_classes
9
+ from rest_framework.parsers import MultiPartParser, FormParser
10
+ from rest_framework.response import Response
11
+ from .models import (
12
+ Procedure,
13
+ Fine,
14
+ Office,
15
+ Advisory,
16
+ LegalSection,
17
+ LegalDocument,
18
+ Synonym,
19
+ IngestionJob,
20
+ UserProfile,
21
+ )
22
+ from .serializers import (
23
+ ProcedureSerializer,
24
+ FineSerializer,
25
+ OfficeSerializer,
26
+ AdvisorySerializer,
27
+ LegalSectionSerializer,
28
+ LegalDocumentSerializer,
29
+ IngestionJobSerializer,
30
+ )
31
+ from .services import enqueue_ingestion_job
32
+ from .search_ml import search_with_ml
33
+ # Chatbot moved to hue_portal.chatbot app
34
+ # Keeping import for backward compatibility
35
+ try:
36
+ from hue_portal.chatbot.chatbot import get_chatbot
37
+ except ImportError:
38
+ from .chatbot import get_chatbot
39
+
40
+ def normalize_query(q: str) -> str:
41
+ return (q or "").strip()
42
+
43
+ @api_view(["GET"])
44
+ def search(request):
45
+ """Unified search endpoint - searches across all models."""
46
+ q = normalize_query(request.GET.get("q", ""))
47
+ type_ = request.GET.get("type") # Optional: filter by type
48
+
49
+ if not q:
50
+ return Response({"error": "q parameter is required"}, status=400)
51
+
52
+ results = []
53
+
54
+ # Search Procedures
55
+ if not type_ or type_ == "procedure":
56
+ proc_qs = Procedure.objects.all()
57
+ proc_text_fields = ["title", "domain", "conditions", "dossier"]
58
+ proc_results = search_with_ml(proc_qs, q, proc_text_fields, top_k=10, min_score=0.1)
59
+ for obj in proc_results:
60
+ results.append({
61
+ "type": "procedure",
62
+ "data": ProcedureSerializer(obj).data,
63
+ "relevance": getattr(obj, '_ml_score', 0.5)
64
+ })
65
+
66
+ # Search Fines
67
+ if not type_ or type_ == "fine":
68
+ fine_qs = Fine.objects.all()
69
+ fine_text_fields = ["name", "code", "article", "decree", "remedial"]
70
+ fine_results = search_with_ml(fine_qs, q, fine_text_fields, top_k=10, min_score=0.1)
71
+ for obj in fine_results:
72
+ results.append({
73
+ "type": "fine",
74
+ "data": FineSerializer(obj).data,
75
+ "relevance": getattr(obj, '_ml_score', 0.5)
76
+ })
77
+
78
+ # Search Offices
79
+ if not type_ or type_ == "office":
80
+ office_qs = Office.objects.all()
81
+ office_text_fields = ["unit_name", "address", "district", "service_scope"]
82
+ office_results = search_with_ml(office_qs, q, office_text_fields, top_k=10, min_score=0.1)
83
+ for obj in office_results:
84
+ results.append({
85
+ "type": "office",
86
+ "data": OfficeSerializer(obj).data,
87
+ "relevance": getattr(obj, '_ml_score', 0.5)
88
+ })
89
+
90
+ # Search Advisories
91
+ if not type_ or type_ == "advisory":
92
+ adv_qs = Advisory.objects.all()
93
+ adv_text_fields = ["title", "summary"]
94
+ adv_results = search_with_ml(adv_qs, q, adv_text_fields, top_k=10, min_score=0.1)
95
+ for obj in adv_results:
96
+ results.append({
97
+ "type": "advisory",
98
+ "data": AdvisorySerializer(obj).data,
99
+ "relevance": getattr(obj, '_ml_score', 0.5)
100
+ })
101
+
102
+ if not type_ or type_ == "legal":
103
+ legal_qs = LegalSection.objects.select_related("document").all()
104
+ legal_text_fields = ["section_title", "section_code", "content"]
105
+ legal_results = search_with_ml(legal_qs, q, legal_text_fields, top_k=10, min_score=0.1)
106
+ for obj in legal_results:
107
+ results.append({
108
+ "type": "legal",
109
+ "data": LegalSectionSerializer(obj, context={"request": request}).data,
110
+ "relevance": getattr(obj, '_ml_score', 0.5)
111
+ })
112
+
113
+ # Sort by relevance score
114
+ results.sort(key=lambda x: x["relevance"], reverse=True)
115
+
116
+ return Response({
117
+ "query": q,
118
+ "count": len(results),
119
+ "results": results[:50] # Limit total results
120
+ })
121
+
122
+ @api_view(["GET"])
123
+ def procedures_list(request):
124
+ q = normalize_query(request.GET.get("q", ""))
125
+ domain = request.GET.get("domain")
126
+ level = request.GET.get("level")
127
+ qs = Procedure.objects.all()
128
+ if domain: qs = qs.filter(domain__iexact=domain)
129
+ if level: qs = qs.filter(level__iexact=level)
130
+ if q:
131
+ # Use ML-based search for better results
132
+ text_fields = ["title", "domain", "conditions", "dossier"]
133
+ qs = search_with_ml(qs, q, text_fields, top_k=100, min_score=0.1)
134
+ return Response(ProcedureSerializer(qs[:100], many=True).data)
135
+
136
+ @api_view(["GET"])
137
+ def procedures_detail(request, pk:int):
138
+ try:
139
+ obj = Procedure.objects.get(pk=pk)
140
+ except Procedure.DoesNotExist:
141
+ return Response(status=404)
142
+ return Response(ProcedureSerializer(obj).data)
143
+
144
+ @api_view(["GET"])
145
+ def fines_list(request):
146
+ q = normalize_query(request.GET.get("q", ""))
147
+ code = request.GET.get("code")
148
+ qs = Fine.objects.all()
149
+ if code: qs = qs.filter(code__iexact=code)
150
+ if q:
151
+ # Use ML-based search for better results
152
+ text_fields = ["name", "code", "article", "decree", "remedial"]
153
+ qs = search_with_ml(qs, q, text_fields, top_k=100, min_score=0.1)
154
+ return Response(FineSerializer(qs[:100], many=True).data)
155
+
156
+ @api_view(["GET"])
157
+ def fines_detail(request, pk:int):
158
+ try:
159
+ obj = Fine.objects.get(pk=pk)
160
+ except Fine.DoesNotExist:
161
+ return Response(status=404)
162
+ return Response(FineSerializer(obj).data)
163
+
164
+ @api_view(["GET"])
165
+ def offices_list(request):
166
+ q = normalize_query(request.GET.get("q", ""))
167
+ district = request.GET.get("district")
168
+ qs = Office.objects.all()
169
+ if district: qs = qs.filter(district__iexact=district)
170
+ if q:
171
+ # Use ML-based search for better results
172
+ text_fields = ["unit_name", "address", "district", "service_scope"]
173
+ qs = search_with_ml(qs, q, text_fields, top_k=100, min_score=0.1)
174
+ return Response(OfficeSerializer(qs[:100], many=True).data)
175
+
176
+ @api_view(["GET"])
177
+ def offices_detail(request, pk:int):
178
+ try:
179
+ obj = Office.objects.get(pk=pk)
180
+ except Office.DoesNotExist:
181
+ return Response(status=404)
182
+ return Response(OfficeSerializer(obj).data)
183
+
184
+ @api_view(["GET"])
185
+ def advisories_list(request):
186
+ q = normalize_query(request.GET.get("q", ""))
187
+ qs = Advisory.objects.all().order_by("-published_at")
188
+ if q:
189
+ # Use ML-based search for better results
190
+ text_fields = ["title", "summary"]
191
+ qs = search_with_ml(qs, q, text_fields, top_k=100, min_score=0.1)
192
+ return Response(AdvisorySerializer(qs[:100], many=True).data)
193
+
194
+ @api_view(["GET"])
195
+ def advisories_detail(request, pk:int):
196
+ try:
197
+ obj = Advisory.objects.get(pk=pk)
198
+ except Advisory.DoesNotExist:
199
+ return Response(status=404)
200
+ return Response(AdvisorySerializer(obj).data)
201
+
202
+ @api_view(["GET"])
203
+ def legal_sections_list(request):
204
+ q = normalize_query(request.GET.get("q", ""))
205
+ document_code = request.GET.get("document_code")
206
+ section_code = request.GET.get("section_code")
207
+ qs = LegalSection.objects.select_related("document").all()
208
+ if document_code:
209
+ qs = qs.filter(document__code__iexact=document_code)
210
+ if section_code:
211
+ qs = qs.filter(section_code__icontains=section_code)
212
+ if q:
213
+ text_fields = ["section_title", "section_code", "content"]
214
+ qs = search_with_ml(qs, q, text_fields, top_k=100, min_score=0.1)
215
+ return Response(LegalSectionSerializer(qs[:100], many=True, context={"request": request}).data)
216
+
217
+ @api_view(["GET"])
218
+ def legal_sections_detail(request, pk:int):
219
+ try:
220
+ obj = LegalSection.objects.select_related("document").get(pk=pk)
221
+ except LegalSection.DoesNotExist:
222
+ return Response(status=404)
223
+ return Response(LegalSectionSerializer(obj, context={"request": request}).data)
224
+
225
+ @api_view(["GET"])
226
+ def legal_document_download(request, pk:int):
227
+ try:
228
+ doc = LegalDocument.objects.get(pk=pk)
229
+ except LegalDocument.DoesNotExist:
230
+ raise Http404("Document not found")
231
+ if not doc.source_file:
232
+ raise Http404("Document missing source file")
233
+ file_path = Path(doc.source_file)
234
+ if not file_path.exists():
235
+ raise Http404("Source file not found on server")
236
+ response = FileResponse(open(file_path, "rb"), as_attachment=True, filename=file_path.name)
237
+ return response
238
+
239
+
240
+ def _has_upload_access(request):
241
+ user = getattr(request, "user", None)
242
+ if user and user.is_authenticated:
243
+ profile = getattr(user, "profile", None)
244
+ if profile and profile.role == UserProfile.Roles.ADMIN:
245
+ return True
246
+ expected = getattr(settings, "LEGAL_UPLOAD_TOKEN", "")
247
+ header_token = request.headers.get("X-Upload-Token")
248
+ return bool(expected and header_token and header_token == expected)
249
+
250
+
251
+ @api_view(["POST"])
252
+ @parser_classes([MultiPartParser, FormParser])
253
+ def legal_document_upload(request):
254
+ if not _has_upload_access(request):
255
+ return Response({"error": "unauthorized"}, status=403)
256
+
257
+ upload = request.FILES.get("file")
258
+ if not upload:
259
+ return Response({"error": "file is required"}, status=400)
260
+
261
+ code = (request.data.get("code") or "").strip()
262
+ if not code:
263
+ return Response({"error": "code is required"}, status=400)
264
+
265
+ metadata = {
266
+ "code": code,
267
+ "title": request.data.get("title") or code,
268
+ "doc_type": request.data.get("doc_type", "other"),
269
+ "summary": request.data.get("summary", ""),
270
+ "issued_by": request.data.get("issued_by", ""),
271
+ "issued_at": request.data.get("issued_at"),
272
+ "source_url": request.data.get("source_url", ""),
273
+ "mime_type": request.data.get("mime_type") or getattr(upload, "content_type", ""),
274
+ "metadata": {},
275
+ }
276
+ extra_meta = request.data.get("metadata")
277
+ if extra_meta:
278
+ try:
279
+ metadata["metadata"] = json.loads(extra_meta) if isinstance(extra_meta, str) else extra_meta
280
+ except Exception:
281
+ return Response({"error": "metadata must be valid JSON"}, status=400)
282
+
283
+ try:
284
+ job = enqueue_ingestion_job(
285
+ file_obj=upload,
286
+ filename=upload.name,
287
+ metadata=metadata,
288
+ )
289
+ except ValueError as exc:
290
+ return Response({"error": str(exc)}, status=400)
291
+ except Exception as exc:
292
+ return Response({"error": str(exc)}, status=500)
293
+
294
+ serialized = IngestionJobSerializer(job, context={"request": request}).data
295
+ return Response(serialized, status=202)
296
+
297
+
298
+ @api_view(["GET"])
299
+ def legal_ingestion_job_detail(request, job_id):
300
+ job = get_object_or_404(IngestionJob, id=job_id)
301
+ return Response(IngestionJobSerializer(job, context={"request": request}).data)
302
+
303
+
304
+ @api_view(["GET"])
305
+ def legal_ingestion_job_list(request):
306
+ code = request.GET.get("code")
307
+ qs = IngestionJob.objects.all()
308
+ if code:
309
+ qs = qs.filter(code=code)
310
+ qs = qs.order_by("-created_at")[:20]
311
+ serializer = IngestionJobSerializer(qs, many=True, context={"request": request})
312
+ return Response(serializer.data)
313
+
314
+ @api_view(["POST"])
315
+ def chat(request):
316
+ """Chatbot endpoint for natural language queries."""
317
+ message = request.data.get("message", "").strip()
318
+ if not message:
319
+ return Response({"error": "message is required"}, status=400)
320
+
321
+ try:
322
+ chatbot = get_chatbot()
323
+ response = chatbot.generate_response(message)
324
+ return Response(response)
325
+ except Exception as e:
326
+ return Response({
327
+ "message": "Xin lỗi, có lỗi xảy ra. Vui lòng thử lại.",
328
+ "intent": "error",
329
+ "error": str(e),
330
+ "results": [],
331
+ "count": 0
332
+ }, status=500)
333
+