Spaces:

davidtran999
/

hue-portal-backend-v2

Sleeping

File size: 10,282 Bytes

519b145

"""
Dual-Path RAG Router - Routes queries to Fast Path (golden dataset) or Slow Path (full RAG).
"""
from __future__ import annotations

import re
import unicodedata
from dataclasses import dataclass
from typing import Dict, Optional, List, Tuple
import numpy as np
from django.db.models import Q

from hue_portal.core.models import GoldenQuery
from hue_portal.core.embeddings import get_embedding_model


@dataclass
class RouteDecision:
    """Decision from Dual-Path Router."""
    path: str  # "fast_path" or "slow_path"
    method: str  # "keyword" or "llm" or "similarity" or "default"
    confidence: float
    matched_golden_query_id: Optional[int] = None
    similarity_score: Optional[float] = None
    intent: Optional[str] = None
    rationale: str = ""


class KeywordRouter:
    """Fast keyword-based router to match queries against golden dataset."""
    
    def __init__(self):
        self._normalize_cache = {}
    
    def _normalize_query(self, query: str) -> str:
        """Normalize query for matching (lowercase, remove accents, extra spaces)."""
        if query in self._normalize_cache:
            return self._normalize_cache[query]
        
        normalized = query.lower().strip()
        # Remove accents for accent-insensitive matching
        normalized = unicodedata.normalize("NFD", normalized)
        normalized = "".join(ch for ch in normalized if unicodedata.category(ch) != "Mn")
        # Remove extra spaces
        normalized = re.sub(r'\s+', ' ', normalized).strip()
        
        self._normalize_cache[query] = normalized
        return normalized
    
    def route(self, query: str, intent: str, confidence: float) -> RouteDecision:
        """
        Try to match query against golden dataset using keyword matching.
        
        Returns:
            RouteDecision with path="fast_path" if match found, else path="slow_path"
        """
        query_normalized = self._normalize_query(query)
        
        # Try exact match first (fastest)
        try:
            golden_query = GoldenQuery.objects.get(
                query_normalized=query_normalized,
                is_active=True
            )
            return RouteDecision(
                path="fast_path",
                method="keyword",
                confidence=1.0,
                matched_golden_query_id=golden_query.id,
                intent=intent,
                rationale="exact_match"
            )
        except (GoldenQuery.DoesNotExist, GoldenQuery.MultipleObjectsReturned):
            pass
        
        # Try fuzzy match: check if query contains golden query or vice versa
        # This handles variations like "mức phạt vượt đèn đỏ" vs "vượt đèn đỏ phạt bao nhiêu"
        try:
            # Find golden queries with same intent
            golden_queries = GoldenQuery.objects.filter(
                intent=intent,
                is_active=True
            )[:50]  # Limit to avoid too many comparisons
            
            for gq in golden_queries:
                gq_normalized = self._normalize_query(gq.query)
                
                # Check if query is substring of golden query or vice versa
                if (query_normalized in gq_normalized or 
                    gq_normalized in query_normalized):
                    # Calculate similarity (simple Jaccard similarity)
                    query_words = set(query_normalized.split())
                    gq_words = set(gq_normalized.split())
                    if query_words and gq_words:
                        similarity = len(query_words & gq_words) / len(query_words | gq_words)
                        if similarity >= 0.7:  # 70% word overlap
                            return RouteDecision(
                                path="fast_path",
                                method="keyword",
                                confidence=similarity,
                                matched_golden_query_id=gq.id,
                                similarity_score=similarity,
                                intent=intent,
                                rationale="fuzzy_match"
                            )
        except Exception:
            pass
        
        # No match found
        return RouteDecision(
            path="slow_path",
            method="keyword",
            confidence=confidence,
            intent=intent,
            rationale="no_keyword_match"
        )


class DualPathRouter:
    """Main router that decides Fast Path vs Slow Path using hybrid approach."""
    
    def __init__(self, similarity_threshold: float = 0.85):
        """
        Initialize Dual-Path Router.
        
        Args:
            similarity_threshold: Minimum similarity score for semantic matching (default: 0.85)
        """
        self.keyword_router = KeywordRouter()
        self.llm_router = None  # Lazy load if needed
        self.similarity_threshold = similarity_threshold
        self._embedding_model = None
    
    def route(self, query: str, intent: str, confidence: float) -> RouteDecision:
        """
        Route query to Fast Path or Slow Path.
        
        Args:
            query: User query string.
            intent: Detected intent.
            confidence: Intent classification confidence.
        
        Returns:
            RouteDecision with path, method, and matched golden query ID if applicable.
        """
        # Step 1: Keyword-based routing (fastest, ~1-5ms)
        keyword_decision = self.keyword_router.route(query, intent, confidence)
        if keyword_decision.path == "fast_path":
            return keyword_decision
        
        # Step 2: Semantic similarity search in golden dataset (~50-100ms)
        similarity_match = self._find_similar_golden_query(query, intent)
        if similarity_match and similarity_match['score'] >= self.similarity_threshold:
            return RouteDecision(
                path="fast_path",
                method="similarity",
                confidence=similarity_match['score'],
                matched_golden_query_id=similarity_match['id'],
                similarity_score=similarity_match['score'],
                intent=intent,
                rationale="semantic_similarity"
            )
        
        # Step 3: LLM router fallback (for edge cases, ~100-200ms)
        # Only use if confidence is low (uncertain intent)
        if confidence < 0.7:
            llm_decision = self._llm_route(query, intent)
            if llm_decision and llm_decision.path == "fast_path":
                return llm_decision
        
        # Default: Slow Path (full RAG pipeline)
        return RouteDecision(
            path="slow_path",
            method="default",
            confidence=confidence,
            intent=intent,
            rationale="no_fast_path_match"
        )
    
    def _find_similar_golden_query(self, query: str, intent: str) -> Optional[Dict]:
        """
        Find similar query in golden dataset using semantic search.
        
        Args:
            query: User query.
            intent: Detected intent.
        
        Returns:
            Dict with 'id' and 'score' if match found, None otherwise.
        """
        try:
            # Get active golden queries with same intent
            golden_queries = list(
                GoldenQuery.objects.filter(
                    intent=intent,
                    is_active=True,
                    query_embedding__isnull=False
                )[:100]  # Limit for performance
            )
            
            if not golden_queries:
                return None
            
            # Get embedding model
            embedding_model = self._get_embedding_model()
            if not embedding_model:
                return None
            
            # Generate query embedding
            query_embedding = embedding_model.encode(query, convert_to_numpy=True)
            query_embedding = query_embedding / np.linalg.norm(query_embedding)  # Normalize
            
            # Calculate similarities
            best_match = None
            best_score = 0.0
            
            for gq in golden_queries:
                if not gq.query_embedding:
                    continue
                
                # Load golden query embedding
                gq_embedding = np.array(gq.query_embedding)
                if len(gq_embedding) == 0:
                    continue
                
                # Normalize
                gq_embedding = gq_embedding / np.linalg.norm(gq_embedding)
                
                # Calculate cosine similarity
                similarity = float(np.dot(query_embedding, gq_embedding))
                
                if similarity > best_score:
                    best_score = similarity
                    best_match = gq.id
            
            if best_match and best_score >= self.similarity_threshold:
                return {
                    'id': best_match,
                    'score': best_score
                }
            
            return None
            
        except Exception as e:
            # Log error but don't fail
            import logging
            logger = logging.getLogger(__name__)
            logger.warning(f"Error in semantic similarity search: {e}")
            return None
    
    def _get_embedding_model(self):
        """Lazy load embedding model."""
        if self._embedding_model is None:
            self._embedding_model = get_embedding_model()
        return self._embedding_model
    
    def _llm_route(self, query: str, intent: str) -> Optional[RouteDecision]:
        """
        Use LLM to decide routing (optional, for edge cases).
        
        This is a fallback for low-confidence queries where keyword and similarity
        didn't find a match, but LLM might recognize it as a common query.
        
        Args:
            query: User query.
            intent: Detected intent.
        
        Returns:
            RouteDecision if LLM finds a match, None otherwise.
        """
        # For now, return None (LLM routing can be implemented later if needed)
        # This would require a small LLM (7B) to classify if query matches golden dataset
        return None