|
|
import json |
|
|
from django.conf import settings |
|
|
from django.db.models.functions import Lower |
|
|
from django.db.models import Q |
|
|
from django.http import FileResponse, Http404 |
|
|
from django.shortcuts import get_object_or_404 |
|
|
from pathlib import Path |
|
|
from rest_framework.decorators import api_view, parser_classes |
|
|
from rest_framework.parsers import MultiPartParser, FormParser |
|
|
from rest_framework.response import Response |
|
|
from .models import Procedure, Fine, Office, Advisory, LegalSection, LegalDocument, Synonym, IngestionJob |
|
|
from .serializers import ( |
|
|
ProcedureSerializer, |
|
|
FineSerializer, |
|
|
OfficeSerializer, |
|
|
AdvisorySerializer, |
|
|
LegalSectionSerializer, |
|
|
LegalDocumentSerializer, |
|
|
IngestionJobSerializer, |
|
|
) |
|
|
from .services import enqueue_ingestion_job |
|
|
from .search_ml import search_with_ml |
|
|
|
|
|
|
|
|
try: |
|
|
from hue_portal.chatbot.chatbot import get_chatbot |
|
|
except ImportError: |
|
|
from .chatbot import get_chatbot |
|
|
|
|
|
def normalize_query(q: str) -> str: |
|
|
return (q or "").strip() |
|
|
|
|
|
@api_view(["GET"]) |
|
|
def search(request): |
|
|
"""Unified search endpoint - searches across all models.""" |
|
|
q = normalize_query(request.GET.get("q", "")) |
|
|
type_ = request.GET.get("type") |
|
|
|
|
|
if not q: |
|
|
return Response({"error": "q parameter is required"}, status=400) |
|
|
|
|
|
results = [] |
|
|
|
|
|
|
|
|
if not type_ or type_ == "procedure": |
|
|
proc_qs = Procedure.objects.all() |
|
|
proc_text_fields = ["title", "domain", "conditions", "dossier"] |
|
|
proc_results = search_with_ml(proc_qs, q, proc_text_fields, top_k=10, min_score=0.1) |
|
|
for obj in proc_results: |
|
|
results.append({ |
|
|
"type": "procedure", |
|
|
"data": ProcedureSerializer(obj).data, |
|
|
"relevance": getattr(obj, '_ml_score', 0.5) |
|
|
}) |
|
|
|
|
|
|
|
|
if not type_ or type_ == "fine": |
|
|
fine_qs = Fine.objects.all() |
|
|
fine_text_fields = ["name", "code", "article", "decree", "remedial"] |
|
|
fine_results = search_with_ml(fine_qs, q, fine_text_fields, top_k=10, min_score=0.1) |
|
|
for obj in fine_results: |
|
|
results.append({ |
|
|
"type": "fine", |
|
|
"data": FineSerializer(obj).data, |
|
|
"relevance": getattr(obj, '_ml_score', 0.5) |
|
|
}) |
|
|
|
|
|
|
|
|
if not type_ or type_ == "office": |
|
|
office_qs = Office.objects.all() |
|
|
office_text_fields = ["unit_name", "address", "district", "service_scope"] |
|
|
office_results = search_with_ml(office_qs, q, office_text_fields, top_k=10, min_score=0.1) |
|
|
for obj in office_results: |
|
|
results.append({ |
|
|
"type": "office", |
|
|
"data": OfficeSerializer(obj).data, |
|
|
"relevance": getattr(obj, '_ml_score', 0.5) |
|
|
}) |
|
|
|
|
|
|
|
|
if not type_ or type_ == "advisory": |
|
|
adv_qs = Advisory.objects.all() |
|
|
adv_text_fields = ["title", "summary"] |
|
|
adv_results = search_with_ml(adv_qs, q, adv_text_fields, top_k=10, min_score=0.1) |
|
|
for obj in adv_results: |
|
|
results.append({ |
|
|
"type": "advisory", |
|
|
"data": AdvisorySerializer(obj).data, |
|
|
"relevance": getattr(obj, '_ml_score', 0.5) |
|
|
}) |
|
|
|
|
|
if not type_ or type_ == "legal": |
|
|
legal_qs = LegalSection.objects.select_related("document").all() |
|
|
legal_text_fields = ["section_title", "section_code", "content"] |
|
|
legal_results = search_with_ml(legal_qs, q, legal_text_fields, top_k=10, min_score=0.1) |
|
|
for obj in legal_results: |
|
|
results.append({ |
|
|
"type": "legal", |
|
|
"data": LegalSectionSerializer(obj, context={"request": request}).data, |
|
|
"relevance": getattr(obj, '_ml_score', 0.5) |
|
|
}) |
|
|
|
|
|
|
|
|
results.sort(key=lambda x: x["relevance"], reverse=True) |
|
|
|
|
|
return Response({ |
|
|
"query": q, |
|
|
"count": len(results), |
|
|
"results": results[:50] |
|
|
}) |
|
|
|
|
|
@api_view(["GET"]) |
|
|
def procedures_list(request): |
|
|
q = normalize_query(request.GET.get("q", "")) |
|
|
domain = request.GET.get("domain") |
|
|
level = request.GET.get("level") |
|
|
qs = Procedure.objects.all() |
|
|
if domain: qs = qs.filter(domain__iexact=domain) |
|
|
if level: qs = qs.filter(level__iexact=level) |
|
|
if q: |
|
|
|
|
|
text_fields = ["title", "domain", "conditions", "dossier"] |
|
|
qs = search_with_ml(qs, q, text_fields, top_k=100, min_score=0.1) |
|
|
return Response(ProcedureSerializer(qs[:100], many=True).data) |
|
|
|
|
|
@api_view(["GET"]) |
|
|
def procedures_detail(request, pk:int): |
|
|
try: |
|
|
obj = Procedure.objects.get(pk=pk) |
|
|
except Procedure.DoesNotExist: |
|
|
return Response(status=404) |
|
|
return Response(ProcedureSerializer(obj).data) |
|
|
|
|
|
@api_view(["GET"]) |
|
|
def fines_list(request): |
|
|
q = normalize_query(request.GET.get("q", "")) |
|
|
code = request.GET.get("code") |
|
|
qs = Fine.objects.all() |
|
|
if code: qs = qs.filter(code__iexact=code) |
|
|
if q: |
|
|
|
|
|
text_fields = ["name", "code", "article", "decree", "remedial"] |
|
|
qs = search_with_ml(qs, q, text_fields, top_k=100, min_score=0.1) |
|
|
return Response(FineSerializer(qs[:100], many=True).data) |
|
|
|
|
|
@api_view(["GET"]) |
|
|
def fines_detail(request, pk:int): |
|
|
try: |
|
|
obj = Fine.objects.get(pk=pk) |
|
|
except Fine.DoesNotExist: |
|
|
return Response(status=404) |
|
|
return Response(FineSerializer(obj).data) |
|
|
|
|
|
@api_view(["GET"]) |
|
|
def offices_list(request): |
|
|
q = normalize_query(request.GET.get("q", "")) |
|
|
district = request.GET.get("district") |
|
|
qs = Office.objects.all() |
|
|
if district: qs = qs.filter(district__iexact=district) |
|
|
if q: |
|
|
|
|
|
text_fields = ["unit_name", "address", "district", "service_scope"] |
|
|
qs = search_with_ml(qs, q, text_fields, top_k=100, min_score=0.1) |
|
|
return Response(OfficeSerializer(qs[:100], many=True).data) |
|
|
|
|
|
@api_view(["GET"]) |
|
|
def offices_detail(request, pk:int): |
|
|
try: |
|
|
obj = Office.objects.get(pk=pk) |
|
|
except Office.DoesNotExist: |
|
|
return Response(status=404) |
|
|
return Response(OfficeSerializer(obj).data) |
|
|
|
|
|
@api_view(["GET"]) |
|
|
def advisories_list(request): |
|
|
q = normalize_query(request.GET.get("q", "")) |
|
|
qs = Advisory.objects.all().order_by("-published_at") |
|
|
if q: |
|
|
|
|
|
text_fields = ["title", "summary"] |
|
|
qs = search_with_ml(qs, q, text_fields, top_k=100, min_score=0.1) |
|
|
return Response(AdvisorySerializer(qs[:100], many=True).data) |
|
|
|
|
|
@api_view(["GET"]) |
|
|
def advisories_detail(request, pk:int): |
|
|
try: |
|
|
obj = Advisory.objects.get(pk=pk) |
|
|
except Advisory.DoesNotExist: |
|
|
return Response(status=404) |
|
|
return Response(AdvisorySerializer(obj).data) |
|
|
|
|
|
@api_view(["GET"]) |
|
|
def legal_sections_list(request): |
|
|
q = normalize_query(request.GET.get("q", "")) |
|
|
document_code = request.GET.get("document_code") |
|
|
section_code = request.GET.get("section_code") |
|
|
qs = LegalSection.objects.select_related("document").all() |
|
|
if document_code: |
|
|
qs = qs.filter(document__code__iexact=document_code) |
|
|
if section_code: |
|
|
qs = qs.filter(section_code__icontains=section_code) |
|
|
if q: |
|
|
text_fields = ["section_title", "section_code", "content"] |
|
|
qs = search_with_ml(qs, q, text_fields, top_k=100, min_score=0.1) |
|
|
return Response(LegalSectionSerializer(qs[:100], many=True, context={"request": request}).data) |
|
|
|
|
|
@api_view(["GET"]) |
|
|
def legal_sections_detail(request, pk:int): |
|
|
try: |
|
|
obj = LegalSection.objects.select_related("document").get(pk=pk) |
|
|
except LegalSection.DoesNotExist: |
|
|
return Response(status=404) |
|
|
return Response(LegalSectionSerializer(obj, context={"request": request}).data) |
|
|
|
|
|
@api_view(["GET"]) |
|
|
def legal_document_download(request, pk:int): |
|
|
try: |
|
|
doc = LegalDocument.objects.get(pk=pk) |
|
|
except LegalDocument.DoesNotExist: |
|
|
raise Http404("Document not found") |
|
|
if not doc.source_file: |
|
|
raise Http404("Document missing source file") |
|
|
file_path = Path(doc.source_file) |
|
|
if not file_path.exists(): |
|
|
raise Http404("Source file not found on server") |
|
|
response = FileResponse(open(file_path, "rb"), as_attachment=True, filename=file_path.name) |
|
|
return response |
|
|
|
|
|
|
|
|
def _has_upload_access(request): |
|
|
if getattr(request, "user", None) and request.user.is_authenticated: |
|
|
return True |
|
|
expected = getattr(settings, "LEGAL_UPLOAD_TOKEN", "") |
|
|
header_token = request.headers.get("X-Upload-Token") |
|
|
return bool(expected and header_token and header_token == expected) |
|
|
|
|
|
|
|
|
@api_view(["POST"]) |
|
|
@parser_classes([MultiPartParser, FormParser]) |
|
|
def legal_document_upload(request): |
|
|
if not _has_upload_access(request): |
|
|
return Response({"error": "unauthorized"}, status=403) |
|
|
|
|
|
upload = request.FILES.get("file") |
|
|
if not upload: |
|
|
return Response({"error": "file is required"}, status=400) |
|
|
|
|
|
code = (request.data.get("code") or "").strip() |
|
|
if not code: |
|
|
return Response({"error": "code is required"}, status=400) |
|
|
|
|
|
metadata = { |
|
|
"code": code, |
|
|
"title": request.data.get("title") or code, |
|
|
"doc_type": request.data.get("doc_type", "other"), |
|
|
"summary": request.data.get("summary", ""), |
|
|
"issued_by": request.data.get("issued_by", ""), |
|
|
"issued_at": request.data.get("issued_at"), |
|
|
"source_url": request.data.get("source_url", ""), |
|
|
"mime_type": request.data.get("mime_type") or getattr(upload, "content_type", ""), |
|
|
"metadata": {}, |
|
|
} |
|
|
extra_meta = request.data.get("metadata") |
|
|
if extra_meta: |
|
|
try: |
|
|
metadata["metadata"] = json.loads(extra_meta) if isinstance(extra_meta, str) else extra_meta |
|
|
except Exception: |
|
|
return Response({"error": "metadata must be valid JSON"}, status=400) |
|
|
|
|
|
try: |
|
|
job = enqueue_ingestion_job( |
|
|
file_obj=upload, |
|
|
filename=upload.name, |
|
|
metadata=metadata, |
|
|
) |
|
|
except ValueError as exc: |
|
|
return Response({"error": str(exc)}, status=400) |
|
|
except Exception as exc: |
|
|
return Response({"error": str(exc)}, status=500) |
|
|
|
|
|
serialized = IngestionJobSerializer(job, context={"request": request}).data |
|
|
return Response(serialized, status=202) |
|
|
|
|
|
|
|
|
@api_view(["GET"]) |
|
|
def legal_ingestion_job_detail(request, job_id): |
|
|
job = get_object_or_404(IngestionJob, id=job_id) |
|
|
return Response(IngestionJobSerializer(job, context={"request": request}).data) |
|
|
|
|
|
|
|
|
@api_view(["GET"]) |
|
|
def legal_ingestion_job_list(request): |
|
|
code = request.GET.get("code") |
|
|
qs = IngestionJob.objects.all() |
|
|
if code: |
|
|
qs = qs.filter(code=code) |
|
|
qs = qs.order_by("-created_at")[:20] |
|
|
serializer = IngestionJobSerializer(qs, many=True, context={"request": request}) |
|
|
return Response(serializer.data) |
|
|
|
|
|
@api_view(["POST"]) |
|
|
def chat(request): |
|
|
"""Chatbot endpoint for natural language queries.""" |
|
|
message = request.data.get("message", "").strip() |
|
|
if not message: |
|
|
return Response({"error": "message is required"}, status=400) |
|
|
|
|
|
try: |
|
|
chatbot = get_chatbot() |
|
|
response = chatbot.generate_response(message) |
|
|
return Response(response) |
|
|
except Exception as e: |
|
|
return Response({ |
|
|
"message": "Xin lỗi, có lỗi xảy ra. Vui lòng thử lại.", |
|
|
"intent": "error", |
|
|
"error": str(e), |
|
|
"results": [], |
|
|
"count": 0 |
|
|
}, status=500) |
|
|
|
|
|
|