Spaces:
Sleeping
Sleeping
| """ | |
| Dual-Path RAG Router - Routes queries to Fast Path (golden dataset) or Slow Path (full RAG). | |
| """ | |
| from __future__ import annotations | |
| import re | |
| import unicodedata | |
| from dataclasses import dataclass | |
| from typing import Dict, Optional, List, Tuple | |
| import numpy as np | |
| from django.db.models import Q | |
| from hue_portal.core.models import GoldenQuery | |
| from hue_portal.core.embeddings import get_embedding_model | |
| class RouteDecision: | |
| """Decision from Dual-Path Router.""" | |
| path: str # "fast_path" or "slow_path" | |
| method: str # "keyword" or "llm" or "similarity" or "default" | |
| confidence: float | |
| matched_golden_query_id: Optional[int] = None | |
| similarity_score: Optional[float] = None | |
| intent: Optional[str] = None | |
| rationale: str = "" | |
| class KeywordRouter: | |
| """Fast keyword-based router to match queries against golden dataset.""" | |
| def __init__(self): | |
| self._normalize_cache = {} | |
| def _normalize_query(self, query: str) -> str: | |
| """Normalize query for matching (lowercase, remove accents, extra spaces).""" | |
| if query in self._normalize_cache: | |
| return self._normalize_cache[query] | |
| normalized = query.lower().strip() | |
| # Remove accents for accent-insensitive matching | |
| normalized = unicodedata.normalize("NFD", normalized) | |
| normalized = "".join(ch for ch in normalized if unicodedata.category(ch) != "Mn") | |
| # Remove extra spaces | |
| normalized = re.sub(r'\s+', ' ', normalized).strip() | |
| self._normalize_cache[query] = normalized | |
| return normalized | |
| def route(self, query: str, intent: str, confidence: float) -> RouteDecision: | |
| """ | |
| Try to match query against golden dataset using keyword matching. | |
| Returns: | |
| RouteDecision with path="fast_path" if match found, else path="slow_path" | |
| """ | |
| query_normalized = self._normalize_query(query) | |
| # Try exact match first (fastest) | |
| try: | |
| golden_query = GoldenQuery.objects.get( | |
| query_normalized=query_normalized, | |
| is_active=True | |
| ) | |
| return RouteDecision( | |
| path="fast_path", | |
| method="keyword", | |
| confidence=1.0, | |
| matched_golden_query_id=golden_query.id, | |
| intent=intent, | |
| rationale="exact_match" | |
| ) | |
| except (GoldenQuery.DoesNotExist, GoldenQuery.MultipleObjectsReturned): | |
| pass | |
| # Try fuzzy match: check if query contains golden query or vice versa | |
| # This handles variations like "mức phạt vượt đèn đỏ" vs "vượt đèn đỏ phạt bao nhiêu" | |
| try: | |
| # Find golden queries with same intent | |
| golden_queries = GoldenQuery.objects.filter( | |
| intent=intent, | |
| is_active=True | |
| )[:50] # Limit to avoid too many comparisons | |
| for gq in golden_queries: | |
| gq_normalized = self._normalize_query(gq.query) | |
| # Check if query is substring of golden query or vice versa | |
| if (query_normalized in gq_normalized or | |
| gq_normalized in query_normalized): | |
| # Calculate similarity (simple Jaccard similarity) | |
| query_words = set(query_normalized.split()) | |
| gq_words = set(gq_normalized.split()) | |
| if query_words and gq_words: | |
| similarity = len(query_words & gq_words) / len(query_words | gq_words) | |
| if similarity >= 0.7: # 70% word overlap | |
| return RouteDecision( | |
| path="fast_path", | |
| method="keyword", | |
| confidence=similarity, | |
| matched_golden_query_id=gq.id, | |
| similarity_score=similarity, | |
| intent=intent, | |
| rationale="fuzzy_match" | |
| ) | |
| except Exception: | |
| pass | |
| # No match found | |
| return RouteDecision( | |
| path="slow_path", | |
| method="keyword", | |
| confidence=confidence, | |
| intent=intent, | |
| rationale="no_keyword_match" | |
| ) | |
| class DualPathRouter: | |
| """Main router that decides Fast Path vs Slow Path using hybrid approach.""" | |
| def __init__(self, similarity_threshold: float = 0.85): | |
| """ | |
| Initialize Dual-Path Router. | |
| Args: | |
| similarity_threshold: Minimum similarity score for semantic matching (default: 0.85) | |
| """ | |
| self.keyword_router = KeywordRouter() | |
| self.llm_router = None # Lazy load if needed | |
| self.similarity_threshold = similarity_threshold | |
| self._embedding_model = None | |
| def route(self, query: str, intent: str, confidence: float) -> RouteDecision: | |
| """ | |
| Route query to Fast Path or Slow Path. | |
| Args: | |
| query: User query string. | |
| intent: Detected intent. | |
| confidence: Intent classification confidence. | |
| Returns: | |
| RouteDecision with path, method, and matched golden query ID if applicable. | |
| """ | |
| # Step 1: Keyword-based routing (fastest, ~1-5ms) | |
| keyword_decision = self.keyword_router.route(query, intent, confidence) | |
| if keyword_decision.path == "fast_path": | |
| return keyword_decision | |
| # Step 2: Semantic similarity search in golden dataset (~50-100ms) | |
| similarity_match = self._find_similar_golden_query(query, intent) | |
| if similarity_match and similarity_match['score'] >= self.similarity_threshold: | |
| return RouteDecision( | |
| path="fast_path", | |
| method="similarity", | |
| confidence=similarity_match['score'], | |
| matched_golden_query_id=similarity_match['id'], | |
| similarity_score=similarity_match['score'], | |
| intent=intent, | |
| rationale="semantic_similarity" | |
| ) | |
| # Step 3: LLM router fallback (for edge cases, ~100-200ms) | |
| # Only use if confidence is low (uncertain intent) | |
| if confidence < 0.7: | |
| llm_decision = self._llm_route(query, intent) | |
| if llm_decision and llm_decision.path == "fast_path": | |
| return llm_decision | |
| # Default: Slow Path (full RAG pipeline) | |
| return RouteDecision( | |
| path="slow_path", | |
| method="default", | |
| confidence=confidence, | |
| intent=intent, | |
| rationale="no_fast_path_match" | |
| ) | |
| def _find_similar_golden_query(self, query: str, intent: str) -> Optional[Dict]: | |
| """ | |
| Find similar query in golden dataset using semantic search. | |
| Args: | |
| query: User query. | |
| intent: Detected intent. | |
| Returns: | |
| Dict with 'id' and 'score' if match found, None otherwise. | |
| """ | |
| try: | |
| # Get active golden queries with same intent | |
| golden_queries = list( | |
| GoldenQuery.objects.filter( | |
| intent=intent, | |
| is_active=True, | |
| query_embedding__isnull=False | |
| )[:100] # Limit for performance | |
| ) | |
| if not golden_queries: | |
| return None | |
| # Get embedding model | |
| embedding_model = self._get_embedding_model() | |
| if not embedding_model: | |
| return None | |
| # Generate query embedding | |
| query_embedding = embedding_model.encode(query, convert_to_numpy=True) | |
| query_embedding = query_embedding / np.linalg.norm(query_embedding) # Normalize | |
| # Calculate similarities | |
| best_match = None | |
| best_score = 0.0 | |
| for gq in golden_queries: | |
| if not gq.query_embedding: | |
| continue | |
| # Load golden query embedding | |
| gq_embedding = np.array(gq.query_embedding) | |
| if len(gq_embedding) == 0: | |
| continue | |
| # Normalize | |
| gq_embedding = gq_embedding / np.linalg.norm(gq_embedding) | |
| # Calculate cosine similarity | |
| similarity = float(np.dot(query_embedding, gq_embedding)) | |
| if similarity > best_score: | |
| best_score = similarity | |
| best_match = gq.id | |
| if best_match and best_score >= self.similarity_threshold: | |
| return { | |
| 'id': best_match, | |
| 'score': best_score | |
| } | |
| return None | |
| except Exception as e: | |
| # Log error but don't fail | |
| import logging | |
| logger = logging.getLogger(__name__) | |
| logger.warning(f"Error in semantic similarity search: {e}") | |
| return None | |
| def _get_embedding_model(self): | |
| """Lazy load embedding model.""" | |
| if self._embedding_model is None: | |
| self._embedding_model = get_embedding_model() | |
| return self._embedding_model | |
| def _llm_route(self, query: str, intent: str) -> Optional[RouteDecision]: | |
| """ | |
| Use LLM to decide routing (optional, for edge cases). | |
| This is a fallback for low-confidence queries where keyword and similarity | |
| didn't find a match, but LLM might recognize it as a common query. | |
| Args: | |
| query: User query. | |
| intent: Detected intent. | |
| Returns: | |
| RouteDecision if LLM finds a match, None otherwise. | |
| """ | |
| # For now, return None (LLM routing can be implemented later if needed) | |
| # This would require a small LLM (7B) to classify if query matches golden dataset | |
| return None | |