Spaces:
Sleeping
Sleeping
| # embedding_service.py | |
| import logging | |
| import numpy as np | |
| from typing import List, Dict, Any | |
| from sentence_transformers import SentenceTransformer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| from models import ExtractedInfo | |
| from division_hierarchy import get_department_name | |
| # Set up logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| class EmbeddingService: | |
| """ | |
| Service for fast division matching using sentence embeddings. | |
| How it works: | |
| 1. At startup: Encode all divisions into vectors (one-time cost) | |
| 2. For each query: Encode query and find most similar division (fast!) | |
| Speed: ~50-100ms (vs 4 seconds with LLM) | |
| Size: ~150MB (vs 4.8GB with LLM) | |
| """ | |
| def __init__(self, model_name: str = "all-MiniLM-L6-v2"): | |
| """ | |
| Initialize the embedding service. | |
| Args: | |
| model_name: Name of the sentence-transformers model to use. | |
| 'all-MiniLM-L6-v2' is fast, small (22MB), and accurate. | |
| """ | |
| logger.info(f"Loading embedding model: {model_name}") | |
| # Load the pre-trained model | |
| # This downloads the model on first run (~22MB) | |
| self.model = SentenceTransformer(model_name) | |
| logger.info("Model loaded successfully") | |
| # Define all 67 divisions with COMPREHENSIVE keywords | |
| # Format: (division_name, keywords_for_matching) | |
| # Updated with exhaustive keywords for better accuracy | |
| self.divisions_data = [ | |
| # FINANCE | |
| ("General Accounting Division", "accounting general ledger bookkeeping financial records journal entries posting reconciliation account balancing closing books financial statements invoices receipts vouchers ledgers trial balance balance sheet accounting errors wrong entries posting mistakes account discrepancies accounting software ERP SAP financial systems submit receipts accounting question ledger issue fiscal year closing"), | |
| ("Accounts Payable Division", "payable payments vendors bills invoices vendor payments bill processing payment approval invoice matching payment scheduling purchase orders vendor invoices payment vouchers remittance advice late payment vendor complaint unpaid invoice payment delay wrong payment payment systems vendor portal invoice processing pay vendor supplier payment creditors outstanding payments payment terms net 30 payment run"), | |
| ("Accounts Receivable Division", "receivable collections revenue income customer payments invoicing customers collecting payments revenue recognition aging reports dunning sales invoices receipts credit memos statements of account collection letters unpaid invoice overdue payment customer not paying bad debt payment dispute billing system collection software customer portal debtors outstanding invoices aging report payment terms cash collection DSO"), | |
| ("Financial Reporting & Control Division", "reporting control financial statements disclosure compliance monthly closing financial reporting variance analysis budget vs actual consolidation P&L income statement balance sheet cash flow statement management reports board reports reporting error wrong numbers financial discrepancy statement correction reporting software BI tools consolidation systems Excel models financial report monthly statements quarterly results budget variance GAAP IFRS financial controls internal controls SOX compliance reconciliation"), | |
| ("Audit & Financial Analysis", "audit analysis review financial review internal audit financial audit account analysis variance investigation trend analysis ratio analysis audit reports findings recommendations analytical reports management letters audit finding control weakness financial irregularity discrepancy investigation audit software data analytics tools sampling tools need financial analysis audit request investigate transaction variance explanation external audit internal audit SOX testing control testing substantive testing"), | |
| # INFORMATION TECHNOLOGY | |
| ("IT Governance & Quality Division", "IT governance quality standards policies IT strategy IT planning governance framework quality assurance standards compliance IT policies procedures standards governance framework compliance reports policy violation non-compliance governance issue quality problem governance platforms policy management tools compliance software IT policy question IT standards governance requirement quality issue ITIL COBIT ISO 27001 IT framework service management change management IT department information technology IT division policy review IT controls IT quality"), | |
| ("Applications Development & Integrations", "development apps integration software programming coding software development app building system integration API development custom development requirements design docs technical specs user stories sprint plans app not working software bug integration failure API issue development request IDE Git DevOps tools integration platforms development frameworks need new app software development integrate systems build application API connection agile scrum CI/CD microservices REST API web services mobile app web app IT department information technology IT division app dev application development software engineering technical development IT development IT team"), | |
| ("IT Infrastructure & Operations Div.", "infrastructure servers operations IT systems hardware server management network operations system monitoring infrastructure maintenance capacity planning system docs network diagrams runbooks incident reports change requests server down network problem system slow hardware failure connectivity issue mouse broken keyboard not working monitor problem printer issue laptop problem desktop issue equipment malfunction computer broken screen not working monitoring tools server management network tools ticketing system server not working network down system issue hardware problem my mouse is broken keyboard not responding monitor not working printer offline laptop won't start computer broken data center cloud infrastructure virtualization VMware storage SAN NAS backup disaster recovery computers laptops desktops monitors keyboards mice printers scanners docking stations cables adapters IT department information technology IT operations system controls system performance performance issues slow system data management database management IT infrastructure IT team system monitoring monitoring systems technical infrastructure"), | |
| ("Applications Maintenance & Support Div.", "maintenance support help desk application support software support incident management problem resolution ticket handling user support application fixes tickets incident reports knowledge base articles support documentation app not working software error login problem access issue system crash user can't login ticketing system remote support tools monitoring tools help desk software app not working software problem need help can't login error message application support service desk L1 support L2 support incident problem service request bug fix IT department information technology IT support IT help technical support IT assistance application support IT team need help with IT IT issues technical issues IT helpdesk support desk"), | |
| ("IT Security Implementation & Operations", "security implementation IT security cyber protection information security security monitoring threat detection vulnerability management security implementation access control security policies incident reports vulnerability assessments security logs security breach hacked account virus malware suspicious activity phishing email password reset firewall antivirus SIEM IDS IPS security tools encryption my account is hacked security problem virus on computer suspicious email password issue can't access system cybersecurity InfoSec data protection encryption authentication authorization MFA SSO identity management IT department information technology IT security cyber security cybersecurity security team security division access issues access control access management security incident security breach IT security team"), | |
| ("IT Manager's Office", "IT manager IT leadership IT management technology management IT strategy IT planning resource management vendor management IT budget IT plans strategy documents budget reports vendor contracts escalation IT complaint strategic IT question vendor issue speak to IT manager IT strategy IT escalation major IT issue IT director CIO IT leadership technology roadmap digital transformation"), | |
| ("Enterprise Architecture Team", "architecture enterprise architecture IT design system architecture architecture design system planning technology standards solution architecture enterprise planning architecture diagrams blueprints technical standards architecture principles architecture question design review technical standards system design system architecture enterprise design technical architecture solution design EA TOGAF architecture framework reference architecture technology stack design patterns"), | |
| ("Reporting & Data Analytics Unit", "analytics data reporting insights business intelligence data analysis report generation dashboard creation data visualization predictive analytics reports dashboards analytics KPI reports data insights visualizations report error wrong data dashboard not working data quality issue BI tools Tableau Power BI SQL data warehouse analytics platforms need a report data analysis dashboard KPI report business intelligence data insights big data data mining machine learning data science ETL data warehouse OLAP IT department information technology data analytics data team data management data systems"), | |
| # HUMAN RESOURCES | |
| ("Rewards & Hr Operations Division", "rewards HR operations salary compensation payroll benefits payroll processing salary administration benefits management compensation planning bonus calculation payslips salary letters benefits statements payroll reports tax documents salary delay payroll error wrong salary salary not received benefits issue bonus question pay stub problem my salary is delayed salary not received payroll error wrong amount paid benefits question bonus calculation pay raise HRIS payroll system benefits platform time and attendance total rewards variable pay incentives stock options pension health insurance leave balance"), | |
| ("HR Business Partner", "HR business partner HRBP employee relations HR support employee support HR consultation performance management employee issues HR advice employee files HR policies performance docs employee relations cases employee complaint HR question manager support employee issue workplace problem HRIS case management HR portal HR help employee issue manager question HR advice workplace problem employee engagement organizational development change management workforce planning HR department human resources HR division HR team employee performance performance issues performance review performance management HR help HR support need HR assistance"), | |
| ("Talent Development Division", "talent development training learning development employee growth training programs skills development career development learning paths competency building training materials course catalogs development plans training records certificates training request course enrollment development opportunity skill gap LMS e-learning platforms training management systems training course learning opportunity development program skills training professional development L&D upskilling reskilling leadership development technical training soft skills certifications"), | |
| ("Od & Talent Acquisition Division", "acquisition recruitment hiring talent acquisition jobs candidates recruiting hiring candidate sourcing interviewing job posting onboarding job descriptions CVs resumes offer letters recruitment reports hiring request job opening recruitment question candidate issue onboarding problem ATS recruitment platforms LinkedIn job boards need to hire job opening recruitment new position hiring process interview candidate talent acquisition sourcing headhunting employer branding candidate experience assessment"), | |
| ("Org.Culture & Initiatives Division", "culture initiatives employee engagement organizational culture workplace culture engagement programs culture building employee initiatives recognition programs wellness programs engagement surveys culture reports initiative proposals recognition materials engagement issue culture problem initiative request employee morale employee engagement culture initiative recognition program employee wellness team building employee experience values mission vision culture transformation employee satisfaction DEI"), | |
| # LEGAL | |
| ("Legal Agreements and Consultancy Division", "legal agreements consultancy legal advice contracts consultation legal review contract drafting legal consultation agreement negotiation legal advice contracts agreements legal opinions legal memos NDAs MOUs legal question contract review needed agreement issue legal advice request need legal advice contract review legal question agreement help legal consultation legal counsel commercial law corporate law legal advisory contract law"), | |
| ("Contracts & Mortgages and Guarantees Division", "contracts mortgages guarantees security collateral mortgage management guarantee processing contract administration collateral management security documentation mortgage documents guarantee letters security agreements pledge documents collateral records mortgage question guarantee issue security problem contract question mortgage document guarantee letter security agreement collateral question pledge loan security real estate property liens hypothecation financial guarantees"), | |
| ("Cases Division", "cases litigation legal cases lawsuits disputes court litigation management case handling dispute resolution court proceedings legal claims lawsuits court filings legal claims case files judgments settlements legal case lawsuit dispute court matter legal claim legal case lawsuit dispute court matter litigation legal claim arbitration litigation arbitration mediation legal proceedings court case claims management judgments"), | |
| # COMMUNICATION | |
| ("Public Relations & Media Division", "PR media public relations press communications announcements media relations press releases public announcements media monitoring crisis communication press releases media statements communication materials press kits media inquiry press question public statement needed PR crisis media request press release public announcement PR question media inquiry corporate communications external communications media coverage press conference spokesperson"), | |
| ("Internal Communication Unit", "internal communication employee communication announcements internal comms employee messaging internal announcements town halls newsletters intranet management internal memos newsletters announcements employee updates town hall materials communication request announcement needed employee messaging internal news employee announcement internal communication newsletter staff message company news employee communications change communications internal messaging intranet employee engagement"), | |
| # BUSINESS DEVELOPMENT | |
| ("Marketing Division", "marketing branding promotion advertising campaigns brand management marketing campaigns market promotion advertising digital marketing content marketing marketing plans campaign materials brand guidelines promotional materials marketing reports marketing request branding question campaign support promotional material marketing support branding promotion advertising marketing campaign social media digital marketing SEO SEM social media marketing content marketing email marketing events"), | |
| ("Partnerships Dev&Advisory Services Div", "partnerships advisory business partnerships strategic partnerships alliances partnership development alliance management collaboration partnership agreements joint ventures partnership agreements MOU collaboration agreements partnership proposals partnership opportunity collaboration request alliance question partnership opportunity business collaboration strategic alliance joint venture partnership strategic partnerships business alliances co-marketing channel partnerships ecosystem"), | |
| # STRATEGIC PLANNING | |
| ("Strategy Division", "strategy strategic planning business strategy corporate strategy strategic planning strategy development business planning strategic initiatives roadmap planning strategic plans business plans strategy documents roadmaps strategic initiatives strategy question strategic planning business direction strategic planning business strategy strategic initiative corporate strategy long-term planning corporate strategy business model strategic objectives strategic goals vision mission"), | |
| ("Corporate Performance Division", "performance corporate performance KPIs metrics objectives performance management KPI tracking metrics monitoring objective setting performance review KPI reports scorecards performance dashboards objective tracking performance reviews performance tracking KPI question metrics issue objective setting KPI tracking performance metrics corporate performance objectives scorecards BSC balanced scorecard OKRs performance indicators strategic metrics targets goals"), | |
| ("Knowledge Management Unit", "knowledge management information management documentation knowledge sharing knowledge capture documentation knowledge sharing content management knowledge repository knowledge base documentation procedures best practices lessons learned documentation request knowledge access information search knowledge base documentation procedure how to best practices process documentation knowledge base wiki document management content management information architecture"), | |
| # PROJECTS | |
| ("Project Management Division", "project management projects PMO project planning project execution project planning project execution project monitoring project control project delivery project plans schedules WBS project charters status reports project documentation project issue project delay project question project planning project management project planning PMO project delivery project status project delivery PMO PMP project lifecycle project portfolio agile waterfall project governance"), | |
| ("Projects Consultancy Division", "projects consultancy project advisory project consultation project services project consulting advisory services project expertise project guidance project assessment consulting reports project assessments advisory recommendations project studies project consultation needed project advice expert guidance project consultation project advisory project expert project guidance project assessment consulting advisory project expertise technical assistance project evaluation"), | |
| ("Construction Consultancy Division", "construction building construction consultancy construction projects engineering construction planning building supervision construction management site supervision construction quality construction plans building specs drawings blueprints construction reports inspection reports construction issue building problem site question construction quality construction project building construction construction supervision site management engineering civil engineering structural engineering construction management building codes specifications BOQ"), | |
| # MARKET RESEARCH | |
| ("Market Research Div.", "market research research market analysis market intelligence market research data collection market surveys research analysis market intelligence research reports market studies survey results research findings market analysis research request market data needed research question market research market data research study market analysis market intelligence market trends market research surveys focus groups market data industry analysis competitive intelligence"), | |
| ("Market Studies Division", "market studies industry analysis market assessment market evaluation market studies industry research market assessment feasibility studies market evaluation market study reports industry reports assessment reports feasibility studies market study request industry analysis needed feasibility question market study industry analysis market assessment feasibility study market evaluation industry research market sizing market segmentation market opportunity market attractiveness"), | |
| ("Business Intelligence Div.", "business intelligence market intelligence competitive intelligence market insights intelligence gathering competitor analysis market monitoring intelligence reporting intelligence reports competitor profiles market briefings intelligence updates intelligence request competitor information market intelligence business intelligence competitor analysis market intelligence competitive intelligence market insights CI competitive analysis market monitoring industry intelligence strategic intelligence"), | |
| # FACILITIES | |
| ("Facilities Management Division", "facilities building management office management facilities services facility maintenance building operations space management facility services office management maintenance schedules facility reports work orders service requests facility problem building issue office space maintenance request AC not working lights not working office equipment furniture request space issue cleaning request AC not working office too cold lights broken need furniture office space facility maintenance building problem room too hot chair broken desk request CMMS facility management software work order systems HVAC air conditioning heating ventilation lighting furniture office supplies workspace meeting rooms office equipment janitorial cleaning waste management"), | |
| ("Documents and Administrative Communications Center", "documents administrative communications center document management records document processing records management filing archiving document distribution mail handling official documents correspondence letters memos records archives document request filing question records access document retrieval need a document official letter document filing records request archive access correspondence records management archiving filing system document control correspondence management official documents document filing document archiving paper documents document center administrative documents"), | |
| ("Security and Safety Division", "security safety protection guards access control security operations access control visitor management security monitoring emergency response security reports incident reports visitor logs access logs security procedures security incident lost badge access card problem visitor escort security concern suspicious activity emergency safety hazard lost my badge access card not working need visitor pass security escort security incident emergency safety concern suspicious person physical security badge ID card access card visitor management CCTV guards patrols emergency procedures evacuation physical security building security premises security facility security incident report security event safety incident safety event"), | |
| ("Industrial Safety & Loss Prevention Div", "industrial safety loss prevention occupational safety workplace safety HSE safety management risk assessment safety inspections incident investigation loss prevention safety reports incident reports risk assessments safety procedures investigation reports safety incident accident injury safety hazard unsafe condition near miss safety incident workplace accident safety hazard injury report unsafe condition PPE safety training HSE occupational health safety compliance OSHA safety regulations PPE accident prevention safety training"), | |
| # PURCHASING | |
| ("Procurements Contracts and Vendors Division", "procurement contracts vendors suppliers purchasing sourcing procurement vendor management contract management supplier management sourcing tendering purchase orders contracts RFP RFQ vendor agreements tender documents vendor issue contract question procurement request supplier problem procurement vendor contract supplier agreement tender RFP purchase order sourcing strategic sourcing supplier relationship contract negotiation vendor evaluation procurement process"), | |
| ("Purchasing Division", "purchasing buying procurement purchase orders requisitions purchasing buying order processing purchase requisitions goods receipt purchasing approval purchase orders requisitions quotes purchase requests delivery notes purchase request buying question order status delivery problem purchase approval need to buy purchase request order status buying approval purchase something get a quote purchase requisition PO purchase approval ordering buying process goods receipt"), | |
| # GOVERNANCE | |
| ("Enterprise Governance Division", "enterprise governance governance policies corporate governance board governance governance framework policy development board governance corporate governance governance compliance policies governance framework board papers governance reports charters governance question policy issue compliance question governance requirement governance corporate policies board governance governance framework governance compliance corporate governance board of directors governance structure governance best practices King IV"), | |
| ("Compliance Regulatory Division", "regulatory compliance regulations regulatory compliance legal compliance regulatory compliance regulation monitoring compliance assessment regulatory reporting compliance review compliance reports regulatory filings compliance certificates regulatory updates regulatory requirement compliance question regulation change compliance issue regulatory compliance regulations compliance requirement regulatory filing compliance report regulatory requirements industry regulations compliance standards regulatory framework regulatory authorities"), | |
| ("Compliance Operations Division", "compliance operations compliance monitoring compliance management compliance controls compliance monitoring compliance testing compliance controls compliance verification compliance tracking compliance reports monitoring reports compliance checklists test results compliance logs compliance breach control failure compliance issue monitoring finding compliance monitoring compliance testing compliance controls compliance verification compliance tracking compliance program compliance testing monitoring controls compliance assurance compliance activities"), | |
| ("Operational Risk Management Division", "operational risk risk management operational risk management risk controls risk identification risk assessment risk mitigation control testing risk monitoring risk registers risk assessments control matrices risk reports mitigation plans operational risk risk event control failure risk issue operational failure operational risk risk management risk assessment control issue risk event operational failure risk framework COSO ERM risk appetite risk tolerance key risk indicators KRIs system controls controls management risk control internal controls control framework assessment needed risk assessment risk division risk department risk team control systems controls review"), | |
| ("Financial Risk Management Division", "financial risk risk management credit risk market risk liquidity risk financial risk assessment risk modeling stress testing scenario analysis risk measurement risk reports stress test results risk models VaR reports risk metrics financial risk market risk credit risk event liquidity issue financial risk market risk credit risk liquidity risk risk modeling stress testing Basel capital adequacy VaR credit risk market risk liquidity risk ALM risk metrics"), | |
| ("Cybersecurity Governance Unit", "cybersecurity governance security governance information security governance security policies security governance security policy development security framework security standards security compliance security policies security framework security standards security guidelines security charters security policy question security governance security framework security compliance cybersecurity governance security policies security framework information security governance security standards ISO 27001 NIST security framework security governance information security management ISMS"), | |
| ("Cybersecurity Defense Unit", "cybersecurity defense security operations cyber defense threat detection security monitoring threat monitoring incident response security operations threat hunting vulnerability management security incidents threat reports incident response plans security alerts IOCs cyber attack security breach hacked malware virus phishing ransomware data breach security incident cyber attack hacked virus phishing email ransomware security incident data breach malware suspicious email SOC security operations center threat intelligence incident response cyber threats APT zero-day exploit vulnerability"), | |
| # CREDIT | |
| ("Credit Risk Division", "credit risk credit assessment credit evaluation credit analysis default risk credit assessment credit evaluation credit scoring default probability credit rating credit review credit reports credit assessments credit scores rating reports credit analysis credit risk concern default risk credit quality credit deterioration credit risk credit assessment credit evaluation default risk credit quality credit rating PD LGD EAD credit scoring credit rating credit underwriting NPL non-performing loans"), | |
| ("Credit Relationships Division", "credit relationships customer relations client management customer service account management customer relationship management client servicing account management customer support relationship building customer profiles relationship reports service records customer communications customer complaint client issue relationship problem customer service customer service client support customer complaint account manager relationship manager customer issue CRM customer relationship client servicing relationship manager account management customer satisfaction"), | |
| ("Credit Control Team - A", "credit control collections monitoring credit monitoring team A credit monitoring collection activities payment follow-up credit limits exposure monitoring collection reports payment schedules credit memos monitoring reports dunning letters overdue payment collection issue credit limit payment delay delinquency credit control collections overdue payment payment monitoring credit limit collection collections receivables management credit monitoring payment tracking delinquency write-off"), | |
| ("Credit Control Team - B", "credit control collections monitoring credit monitoring team B credit monitoring collection activities payment follow-up credit limits exposure monitoring collection reports payment schedules credit memos monitoring reports dunning letters overdue payment collection issue credit limit payment delay delinquency credit control collections overdue payment payment monitoring credit limit collection collections receivables management credit monitoring payment tracking delinquency write-off"), | |
| ("Collection Team", "collection collections recovery debt collection payment recovery debt collection payment recovery collection activities recovery process collection calls collection letters payment plans recovery reports collection logs non-payment collection case recovery issue difficult customer debt collection payment recovery collection recover payment non-paying customer collections debt recovery recovery payment collection delinquent accounts bad debt"), | |
| ("Loan Follow-Up Team", "loan follow-up loan monitoring loan servicing loan administration loan monitoring loan servicing payment follow-up loan administration loan review loan files payment schedules loan reports servicing records loan reviews loan payment loan question payment issue loan servicing loan problem loan payment loan help loan issue loan servicing loan monitoring loan question payment schedule loan servicing loan administration loan payments loan portfolio loan monitoring loan review disbursement"), | |
| ("C&Lm Info Team", "credit info information credit information credit data information management credit data information services data management credit information credit reports information reports data files credit information information request credit data information access credit information credit data information request credit reports data access credit bureau credit information credit data information services"), | |
| # PORTFOLIO | |
| ("Portfolio Manager'S Office", "portfolio manager portfolio management office portfolio leadership portfolio management portfolio strategy portfolio oversight portfolio governance portfolio reports portfolio strategy portfolio reviews management reports portfolio question portfolio strategy portfolio oversight portfolio manager portfolio strategy portfolio management portfolio oversight portfolio governance portfolio leadership portfolio strategy asset management"), | |
| ("Portfolio Division", "portfolio portfolio management asset management investment portfolio portfolio management asset management portfolio monitoring portfolio optimization portfolio reporting portfolio reports asset statements portfolio analysis performance reports portfolio question asset issue portfolio performance investment question portfolio asset management portfolio performance investment portfolio portfolio monitoring my portfolio portfolio management asset allocation investment management portfolio optimization asset management portfolio performance"), | |
| ("Special Assets Division", "special assets distressed assets problem assets asset recovery asset recovery workout restructuring distressed asset management asset resolution workout plans restructuring plans asset reports recovery reports problem asset distressed loan asset recovery troubled asset special assets problem asset distressed asset asset recovery workout restructuring NPL management asset resolution workout restructuring distressed debt asset recovery"), | |
| # EXCELLENCE | |
| ("Programs Delivery Operations Division", "programs delivery operations program execution program operations program delivery program execution operational delivery program operations program implementation program reports delivery schedules operational reports program status program delivery operational issue program execution delivery problem program delivery program operations program execution delivery operations program implementation program management program delivery operational excellence program execution"), | |
| ("Customer Excellence Division", "customer excellence customer service customer experience customer satisfaction customer service customer experience management service excellence customer complaints customer feedback service quality customer feedback service reports satisfaction surveys complaint reports NPS reports customer complaint service issue poor service customer dissatisfaction complaint bad experience customer complaint service problem poor service complaint customer service not satisfied bad experience complaint about service customer satisfaction NPS customer experience CX service quality customer feedback complaints handling customer care"), | |
| ("Programs Design And Needs Assessment Division", "programs design needs assessment program development program planning program design needs analysis program planning program development requirements gathering needs assessments program designs requirements documents program proposals program design needs assessment program planning development request program design needs assessment program development program planning requirements analysis program development needs analysis program design program planning feasibility"), | |
| ("Programs Evaluation & Quality Assurance Division", "programs evaluation quality assurance program assessment QA program review program evaluation quality assurance program assessment program review performance evaluation evaluation reports QA reports assessment reports review reports program audits program evaluation quality issue assessment request program review program evaluation quality assurance program assessment QA program review program quality program evaluation quality control QA quality assurance program effectiveness program impact"), | |
| ("Product Dev For National Priorities Div", "product development national priorities new products product innovation product development new product creation product innovation product design national programs product proposals product specs development plans product roadmaps new product product development product idea innovation request new product product development product innovation develop new product national priorities product idea product management product innovation new products product design product strategy national initiatives"), | |
| ("Product Dev For Customer Empowerment Div", "product development customer empowerment customer products customer-focused products customer product development customer-centric design product innovation customer empowerment programs product proposals customer research product specs customer feedback customer product product for customers customer empowerment customer-focused development customer products customer empowerment customer-focused products products for customers customer innovation customer empowerment customer-centric products customer solutions customer value"), | |
| ("Solutions & Design Division", "solutions design solution design solution development solutions architecture solution design solution development solution architecture solution delivery design thinking solution designs design documents solution proposals solution blueprints solution design solution request design question solution development solution design solution development design solution solutions custom solution solution architecture design thinking solution engineering solution delivery custom solutions"), | |
| # AUDIT | |
| ("Operations Audit Team", "operations audit operational audit audit internal audit operational review operational auditing process audit operational review compliance audit operational assessment audit reports audit findings audit recommendations operational audit reports audit request operational audit audit finding audit question operations audit operational audit audit request internal audit operational review internal audit operational audit process audit compliance audit operational controls"), | |
| # ACADEMY | |
| ("Academy Strategic Partnerships Division", "academy training education learning development partnerships training education programs learning professional development courses workshops certifications partnerships course catalogs training materials certificates learning plans partnership agreements training request course enrollment certification learning opportunity education program training education learning courses workshop certification professional development academy programs training courses SIDF Academy corporate university training programs learning center professional development certifications workshops seminars e-learning leadership development"), | |
| # EXECUTIVE | |
| ("CEO Office", "CEO chief executive executive office CEO office leadership executive management strategic leadership executive decisions CEO communications executive governance executive reports board papers CEO communications strategic documents executive escalation CEO office executive question strategic matter CEO executive office chief executive CEO office executive escalation top management C-suite executive leadership CEO chief executive officer executive management strategic leadership"), | |
| ] | |
| # Extract just the division names and search texts | |
| self.division_names = [div[0] for div in self.divisions_data] | |
| self.division_search_texts = [ | |
| f"{div[0]} {div[1]}" for div in self.divisions_data | |
| ] | |
| logger.info(f"Pre-encoding {len(self.division_names)} divisions...") | |
| # PRE-ENCODE all divisions (this is the magic!) | |
| # This happens once at startup, then queries are super fast | |
| self.division_embeddings = self.model.encode( | |
| self.division_search_texts, | |
| convert_to_numpy=True, | |
| show_progress_bar=True | |
| ) | |
| logger.info(f"✓ Encoded {len(self.division_names)} divisions") | |
| logger.info(f"Embedding shape: {self.division_embeddings.shape}") | |
| # ALSO ENCODE DEPARTMENTS | |
| # Get unique departments and build department → divisions mapping | |
| from contacts_data import get_all_contacts | |
| from collections import defaultdict | |
| contacts = get_all_contacts() | |
| dept_to_divisions = defaultdict(set) | |
| for contact in contacts: | |
| dept_to_divisions[contact["department"]].add(contact["division"]) | |
| self.dept_to_divisions = {dept: list(divs) for dept, divs in dept_to_divisions.items()} | |
| self.department_names = list(self.dept_to_divisions.keys()) | |
| # Create search texts for departments (department name + common keywords) | |
| self.department_search_texts = [] | |
| for dept in self.department_names: | |
| # Add department name and common keywords | |
| search_text = f"{dept} department team group unit" | |
| self.department_search_texts.append(search_text) | |
| logger.info(f"Pre-encoding {len(self.department_names)} departments...") | |
| self.department_embeddings = self.model.encode( | |
| self.department_search_texts, | |
| convert_to_numpy=True, | |
| show_progress_bar=False | |
| ) | |
| logger.info(f"✓ Encoded {len(self.department_names)} departments") | |
| logger.info("EmbeddingService ready!") | |
| def find_division(self, query: str, top_k: int = 3) -> List[ExtractedInfo]: | |
| """ | |
| Find the best matching divisions for a query. | |
| Also checks department-level matches and expands them to divisions. | |
| Args: | |
| query: User's search query (e.g., "app development", "HR help", "Information Technology") | |
| top_k: Number of top matches to return (default: 3) | |
| Returns: | |
| List of ExtractedInfo objects with division and confidence scores | |
| How it works: | |
| 1. Encode the query into a vector (fast: ~10ms) | |
| 2. Calculate similarity with all division vectors (fast: ~5ms) | |
| 3. ALSO calculate similarity with all department vectors | |
| 4. If department match is stronger, expand to all divisions in that department | |
| 5. Return top matches sorted by similarity score | |
| Total time: ~15-50ms | |
| """ | |
| logger.info(f"Processing query: {query}") | |
| # STEP 1: Encode the query | |
| query_embedding = self.model.encode([query], convert_to_numpy=True) | |
| # STEP 2: Calculate similarity with all divisions | |
| division_similarities = cosine_similarity( | |
| query_embedding, | |
| self.division_embeddings | |
| )[0] | |
| # STEP 2B: ALSO calculate similarity with departments | |
| department_similarities = cosine_similarity( | |
| query_embedding, | |
| self.department_embeddings | |
| )[0] | |
| # STEP 3: Check if any department has significantly better match than divisions | |
| best_division_similarity = np.max(division_similarities) | |
| best_dept_similarity = np.max(department_similarities) | |
| # Convert to confidence (0-1 range) | |
| best_division_conf = (best_division_similarity + 1) / 2 | |
| best_dept_conf = (best_dept_similarity + 1) / 2 | |
| logger.info(f"Best division match confidence: {best_division_conf:.2f}") | |
| logger.info(f"Best department match confidence: {best_dept_conf:.2f}") | |
| results = [] | |
| # If department match is better (even slightly >= 0.01 higher), use department | |
| # Lower threshold (0.01 instead of 0.05) to catch department-level queries | |
| # e.g., "Information Technology" should expand to all IT divisions | |
| if best_dept_conf > best_division_conf and (best_dept_conf - best_division_conf) >= 0.01: | |
| best_dept_idx = np.argmax(department_similarities) | |
| dept_name = self.department_names[best_dept_idx] | |
| divisions_in_dept = self.dept_to_divisions[dept_name] | |
| logger.info(f"✓ Department match: {dept_name} ({best_dept_conf:.2f}) - Expanding to ALL {len(divisions_in_dept)} divisions") | |
| # Return ALL divisions in this department with the department's confidence | |
| # This ensures contact search can find people across all divisions in the department | |
| for division_name in divisions_in_dept: | |
| results.append(ExtractedInfo( | |
| division=division_name, | |
| department=dept_name, | |
| confidence=round(best_dept_conf, 2) | |
| )) | |
| logger.info(f" - {division_name} (dept match, confidence: {best_dept_conf:.2f})") | |
| else: | |
| # Use regular division matching | |
| top_indices = np.argsort(division_similarities)[::-1][:top_k] | |
| for idx in top_indices: | |
| division_name = self.division_names[idx] | |
| similarity_score = float(division_similarities[idx]) | |
| confidence = (similarity_score + 1) / 2 | |
| # Get parent department | |
| department_name = get_department_name(division_name) | |
| results.append(ExtractedInfo( | |
| division=division_name, | |
| department=department_name, | |
| confidence=round(confidence, 2) | |
| )) | |
| logger.info( | |
| f"Match: {division_name} [{department_name}] " | |
| f"(similarity: {similarity_score:.3f}, " | |
| f"confidence: {confidence:.2f})" | |
| ) | |
| logger.info(f"✓ Found {len(results)} matches") | |
| return results | |