""" Intent Classifier for Hybrid RAG + FSM Chatbot Detects user intent to route between scenario flows and RAG queries """ from typing import Dict, Optional, List import re class IntentClassifier: """ Classify user intent using keyword matching Routes to either: - Scenario flows (scripted conversations) - RAG queries (knowledge retrieval) """ def __init__(self, scenarios_dir: str = "scenarios"): """ Initialize with auto-loading triggers from scenario JSON files Args: scenarios_dir: Directory containing scenario JSON files """ # Auto-load scenario patterns from JSON files self.scenario_patterns = self._load_scenario_patterns(scenarios_dir) # General question patterns (RAG) self.general_patterns = [ # Location "ở đâu", "địa điểm", "location", "where", "chỗ nào", "tổ chức tại", # Time "mấy giờ", "khi nào", "when", "time", "bao giờ", "thời gian", "ngày nào", # Info "thông tin", "info", "information", "chi tiết", "details", "về", # Parking "đậu xe", "parking", "gửi xe", # Contact "liên hệ", "contact", "số điện thoại", # Events/content "sự kiện", "event", "đâu", "show nào", "line-up", "lineup", "performer" ] def _load_scenario_patterns(self, scenarios_dir: str) -> dict: """ Auto-load triggers from all scenario JSON files Returns: {"scenario_id": ["trigger1", "trigger2", ...]} """ import json import os patterns = {} if not os.path.exists(scenarios_dir): print(f"⚠ Scenarios directory not found: {scenarios_dir}") return patterns for filename in os.listdir(scenarios_dir): if filename.endswith('.json'): filepath = os.path.join(scenarios_dir, filename) try: with open(filepath, 'r', encoding='utf-8') as f: scenario = json.load(f) scenario_id = scenario.get('scenario_id') triggers = scenario.get('triggers', []) if scenario_id and triggers: patterns[scenario_id] = triggers print(f"✓ Loaded triggers for: {scenario_id} ({len(triggers)} patterns)") except Exception as e: print(f"⚠ Error loading {filename}: {e}") return patterns def classify( self, message: str, conversation_state: Optional[Dict] = None ) -> str: """ Classify user intent with improved mid-scenario detection Returns: - "scenario:{scenario_id}" - Trigger new scenario - "scenario:continue" - Continue active scenario - "rag:general" - General RAG query (no active scenario) - "rag:with_resume" - RAG query mid-scenario (then resume) """ message_lower = message.lower().strip() # Check if user is in active scenario active_scenario = conversation_state.get('active_scenario') if conversation_state else None if active_scenario: # User is in a scenario - check if this is off-topic or continuation # Valid choice keywords (answers to scenario questions) choice_keywords = [ # Event recommendation choices 'giá', 'price', 'vé', 'ticket', 'lineup', 'line-up', 'nghệ sĩ', 'artist', 'địa điểm', 'location', 'chỗ', 'thời gian', 'time', 'lịch', # General answers 'có', 'yes', 'ok', 'được', 'không', 'no', 'chill', 'sôi động', 'hài', 'workshop', '1', '2', '3', '4', '5' # Ratings or choices ] # Check if message matches valid answer is_valid_answer = any(keyword in message_lower for keyword in choice_keywords) # Check if this is a question (off-topic) has_question_mark = "?" in message question_words = ["gì", "sao", "thế nào", "bao nhiêu", "mấy giờ", "ai", "how", "what", "why"] has_question_word = any(qw in message_lower for qw in question_words) # Classify as off-topic ONLY if: # 1. Has question mark OR question words # 2. AND does NOT match valid answer keywords # 3. AND is asking about new information is_off_topic = (has_question_mark or has_question_word) and not is_valid_answer if is_off_topic: print(f"🔀 Off-topic question detected: '{message}' → rag:with_resume") return "rag:with_resume" else: # Normal scenario continuation return "scenario:continue" # Not in scenario - check for scenario triggers for scenario_id, patterns in self.scenario_patterns.items(): for pattern in patterns: if pattern.lower() in message_lower: return f"scenario:{scenario_id}" # No scenario match - general RAG query return "rag:general" def _matches_any_pattern(self, message: str, patterns: List[str]) -> bool: """ Check if message matches any pattern in list """ for pattern in patterns: # Simple substring match if pattern in message: return True # Word boundary check if re.search(rf'\b{re.escape(pattern)}\b', message, re.IGNORECASE): return True return False def get_scenario_type(self, intent: str) -> Optional[str]: """ Extract scenario type from intent string Args: intent: "scenario:price_inquiry" or "scenario:continue" Returns: "price_inquiry" or None """ if not intent.startswith("scenario:"): return None parts = intent.split(":", 1) if len(parts) < 2: return None scenario_type = parts[1] if scenario_type == "continue": return None return scenario_type def add_scenario_pattern(self, scenario_id: str, patterns: List[str]): """ Dynamically add new scenario patterns """ if scenario_id in self.scenario_patterns: self.scenario_patterns[scenario_id].extend(patterns) else: self.scenario_patterns[scenario_id] = patterns def add_general_pattern(self, patterns: List[str]): """ Dynamically add new general question patterns """ self.general_patterns.extend(patterns)