davidtran999's picture
Push full code from hue-portal-backend folder
519b145
"""
Dual-Path RAG Router - Routes queries to Fast Path (golden dataset) or Slow Path (full RAG).
"""
from __future__ import annotations
import re
import unicodedata
from dataclasses import dataclass
from typing import Dict, Optional, List, Tuple
import numpy as np
from django.db.models import Q
from hue_portal.core.models import GoldenQuery
from hue_portal.core.embeddings import get_embedding_model
@dataclass
class RouteDecision:
"""Decision from Dual-Path Router."""
path: str # "fast_path" or "slow_path"
method: str # "keyword" or "llm" or "similarity" or "default"
confidence: float
matched_golden_query_id: Optional[int] = None
similarity_score: Optional[float] = None
intent: Optional[str] = None
rationale: str = ""
class KeywordRouter:
"""Fast keyword-based router to match queries against golden dataset."""
def __init__(self):
self._normalize_cache = {}
def _normalize_query(self, query: str) -> str:
"""Normalize query for matching (lowercase, remove accents, extra spaces)."""
if query in self._normalize_cache:
return self._normalize_cache[query]
normalized = query.lower().strip()
# Remove accents for accent-insensitive matching
normalized = unicodedata.normalize("NFD", normalized)
normalized = "".join(ch for ch in normalized if unicodedata.category(ch) != "Mn")
# Remove extra spaces
normalized = re.sub(r'\s+', ' ', normalized).strip()
self._normalize_cache[query] = normalized
return normalized
def route(self, query: str, intent: str, confidence: float) -> RouteDecision:
"""
Try to match query against golden dataset using keyword matching.
Returns:
RouteDecision with path="fast_path" if match found, else path="slow_path"
"""
query_normalized = self._normalize_query(query)
# Try exact match first (fastest)
try:
golden_query = GoldenQuery.objects.get(
query_normalized=query_normalized,
is_active=True
)
return RouteDecision(
path="fast_path",
method="keyword",
confidence=1.0,
matched_golden_query_id=golden_query.id,
intent=intent,
rationale="exact_match"
)
except (GoldenQuery.DoesNotExist, GoldenQuery.MultipleObjectsReturned):
pass
# Try fuzzy match: check if query contains golden query or vice versa
# This handles variations like "mức phạt vượt đèn đỏ" vs "vượt đèn đỏ phạt bao nhiêu"
try:
# Find golden queries with same intent
golden_queries = GoldenQuery.objects.filter(
intent=intent,
is_active=True
)[:50] # Limit to avoid too many comparisons
for gq in golden_queries:
gq_normalized = self._normalize_query(gq.query)
# Check if query is substring of golden query or vice versa
if (query_normalized in gq_normalized or
gq_normalized in query_normalized):
# Calculate similarity (simple Jaccard similarity)
query_words = set(query_normalized.split())
gq_words = set(gq_normalized.split())
if query_words and gq_words:
similarity = len(query_words & gq_words) / len(query_words | gq_words)
if similarity >= 0.7: # 70% word overlap
return RouteDecision(
path="fast_path",
method="keyword",
confidence=similarity,
matched_golden_query_id=gq.id,
similarity_score=similarity,
intent=intent,
rationale="fuzzy_match"
)
except Exception:
pass
# No match found
return RouteDecision(
path="slow_path",
method="keyword",
confidence=confidence,
intent=intent,
rationale="no_keyword_match"
)
class DualPathRouter:
"""Main router that decides Fast Path vs Slow Path using hybrid approach."""
def __init__(self, similarity_threshold: float = 0.85):
"""
Initialize Dual-Path Router.
Args:
similarity_threshold: Minimum similarity score for semantic matching (default: 0.85)
"""
self.keyword_router = KeywordRouter()
self.llm_router = None # Lazy load if needed
self.similarity_threshold = similarity_threshold
self._embedding_model = None
def route(self, query: str, intent: str, confidence: float) -> RouteDecision:
"""
Route query to Fast Path or Slow Path.
Args:
query: User query string.
intent: Detected intent.
confidence: Intent classification confidence.
Returns:
RouteDecision with path, method, and matched golden query ID if applicable.
"""
# Step 1: Keyword-based routing (fastest, ~1-5ms)
keyword_decision = self.keyword_router.route(query, intent, confidence)
if keyword_decision.path == "fast_path":
return keyword_decision
# Step 2: Semantic similarity search in golden dataset (~50-100ms)
similarity_match = self._find_similar_golden_query(query, intent)
if similarity_match and similarity_match['score'] >= self.similarity_threshold:
return RouteDecision(
path="fast_path",
method="similarity",
confidence=similarity_match['score'],
matched_golden_query_id=similarity_match['id'],
similarity_score=similarity_match['score'],
intent=intent,
rationale="semantic_similarity"
)
# Step 3: LLM router fallback (for edge cases, ~100-200ms)
# Only use if confidence is low (uncertain intent)
if confidence < 0.7:
llm_decision = self._llm_route(query, intent)
if llm_decision and llm_decision.path == "fast_path":
return llm_decision
# Default: Slow Path (full RAG pipeline)
return RouteDecision(
path="slow_path",
method="default",
confidence=confidence,
intent=intent,
rationale="no_fast_path_match"
)
def _find_similar_golden_query(self, query: str, intent: str) -> Optional[Dict]:
"""
Find similar query in golden dataset using semantic search.
Args:
query: User query.
intent: Detected intent.
Returns:
Dict with 'id' and 'score' if match found, None otherwise.
"""
try:
# Get active golden queries with same intent
golden_queries = list(
GoldenQuery.objects.filter(
intent=intent,
is_active=True,
query_embedding__isnull=False
)[:100] # Limit for performance
)
if not golden_queries:
return None
# Get embedding model
embedding_model = self._get_embedding_model()
if not embedding_model:
return None
# Generate query embedding
query_embedding = embedding_model.encode(query, convert_to_numpy=True)
query_embedding = query_embedding / np.linalg.norm(query_embedding) # Normalize
# Calculate similarities
best_match = None
best_score = 0.0
for gq in golden_queries:
if not gq.query_embedding:
continue
# Load golden query embedding
gq_embedding = np.array(gq.query_embedding)
if len(gq_embedding) == 0:
continue
# Normalize
gq_embedding = gq_embedding / np.linalg.norm(gq_embedding)
# Calculate cosine similarity
similarity = float(np.dot(query_embedding, gq_embedding))
if similarity > best_score:
best_score = similarity
best_match = gq.id
if best_match and best_score >= self.similarity_threshold:
return {
'id': best_match,
'score': best_score
}
return None
except Exception as e:
# Log error but don't fail
import logging
logger = logging.getLogger(__name__)
logger.warning(f"Error in semantic similarity search: {e}")
return None
def _get_embedding_model(self):
"""Lazy load embedding model."""
if self._embedding_model is None:
self._embedding_model = get_embedding_model()
return self._embedding_model
def _llm_route(self, query: str, intent: str) -> Optional[RouteDecision]:
"""
Use LLM to decide routing (optional, for edge cases).
This is a fallback for low-confidence queries where keyword and similarity
didn't find a match, but LLM might recognize it as a common query.
Args:
query: User query.
intent: Detected intent.
Returns:
RouteDecision if LLM finds a match, None otherwise.
"""
# For now, return None (LLM routing can be implemented later if needed)
# This would require a small LLM (7B) to classify if query matches golden dataset
return None