ContactSearchAssistant / contacts_data.py
Muhammed Essam
Initial commit: Voice Assistant demo
8ef276c
# contacts_data.py
"""
Contact database with 500 fake contacts covering all departments and divisions.
Each contact has Arabic and English names for better search support.
"""
from typing import List, Dict
from division_hierarchy import DIVISION_TO_DEPARTMENT
import random
# Common Arabic first names (male and female)
ARABIC_FIRST_NAMES_MALE = [
"محمد", "أحمد", "عبدالله", "عمر", "خالد", "سعد", "فيصل", "سلطان", "ناصر", "طلال",
"عبدالعزيز", "فهد", "تركي", "سلمان", "بندر", "مشعل", "ماجد", "يوسف", "حسن", "علي",
"وليد", "زياد", "رامي", "كريم", "عادل", "راشد", "مازن", "طارق", "إبراهим", "عيسى",
"نواف", "سامي", "بدر", "عاصم", "وسام", "هاني", "ثامر", "صالح", "ياسر", "جاسم",
"هشام", "فواز", "معاذ", "عثمان", "أسامة", "باسل", "عمار", "نبيل", "توفيق", "جمال"
]
ARABIC_FIRST_NAMES_FEMALE = [
"فاطمة", "نورة", "سارة", "منى", "هند", "ريم", "لينا", "دانة", "شهد", "جود",
"رهف", "غلا", "عبير", "أمل", "ندى", "رنا", "لمى", "ديمة", "بشرى", "سمية",
"هيفاء", "ليلى", "زينب", "خلود", "شروق", "أريج", "جميلة", "رباب", "سلمى", "وفاء",
"عائشة", "خديجة", "مريم", "رقية", "زهراء", "نجود", "حصة", "عزة", "صفية", "ملاك",
"روان", "تالا", "جنى", "لين", "ريتاج", "أسماء", "سديم", "لمار", "بيان", "شيماء"
]
# Common Arabic last names
ARABIC_LAST_NAMES = [
"العتيبي", "الدوسري", "القحطاني", "الشهري", "الغامدي", "الزهراني", "العنزي", "الحربي",
"المطيري", "العسيري", "السبيعي", "الشمري", "الجهني", "العمري", "البقمي", "الفهد",
"السديري", "الثبيتي", "الصقري", "الأحمد", "الخالد", "السليمان", "العبدالله", "الفهيد",
"الشايع", "الرشيد", "العجمي", "المالك", "الفريح", "الحمود", "الناصر", "الشريف",
"البلوي", "اليامي", "الوادعي", "الفيفي", "الشهراني", "البكري", "العسكر", "الراشد",
"الفايز", "الخليف", "المنيع", "العبيد", "السحيم", "الغنام", "السلمان", "الهاجري",
"النهدي", "الرويلي", "المري", "السواط", "الربيعان", "الدغيثر", "الفضلي", "القرني",
"الثنيان", "العريفي", "الهويدي", "الجريسي", "البدراني", "المهيدب", "السالم", "الحارثي",
"العطوي", "الصخري", "الرحيلي", "السعيد", "الحافظ", "الوهيبي", "البراك", "الضويان"
]
# Job titles in English and Arabic by category
JOB_TITLES = {
"executive": [
("Chief Executive Officer", "المدير التنفيذي"),
("Executive Director", "المدير التنفيذي"),
("Vice President", "نائب الرئيس"),
("Senior Vice President", "نائب الرئيس الأول"),
],
"management": [
("Director", "مدير"),
("Senior Manager", "مدير أول"),
("Manager", "مدير"),
("Assistant Manager", "مساعد مدير"),
("Team Leader", "قائد فريق"),
("Supervisor", "مشرف"),
],
"specialist": [
("Senior Specialist", "أخصائي أول"),
("Specialist", "أخصائي"),
("Senior Analyst", "محلل أول"),
("Analyst", "محلل"),
("Senior Consultant", "مستشار أول"),
("Consultant", "مستشار"),
("Senior Officer", "موظف أول"),
("Officer", "موظف"),
],
"technical": [
("Senior Engineer", "مهندس أول"),
("Engineer", "مهندس"),
("Technical Lead", "قائد تقني"),
("Developer", "مطور"),
("Architect", "مهندس معماري"),
],
"support": [
("Coordinator", "منسق"),
("Administrator", "إداري"),
("Assistant", "مساعد"),
("Associate", "معاون"),
]
}
# Phone extensions (4-digit)
def generate_extension() -> str:
"""Generate a 4-digit phone extension"""
return str(random.randint(1000, 9999))
# Email generation
def generate_email(first_name_en: str, last_name_en: str) -> str:
"""Generate an email address"""
# Remove spaces and special characters
first = first_name_en.lower().replace(" ", "").replace("-", "")
last = last_name_en.lower().replace(" ", "").replace("-", "")
return f"{first}.{last}@sidf.gov.sa"
def transliterate_arabic_name(arabic_name: str) -> str:
"""
Simple transliteration of Arabic names to English.
This is a basic mapping for common names.
"""
transliteration_map = {
# Male names
"محمد": "Mohammed", "أحمد": "Ahmed", "عبدالله": "Abdullah", "عمر": "Omar", "خالد": "Khalid",
"سعد": "Saad", "فيصل": "Faisal", "سلطان": "Sultan", "ناصر": "Nasser", "طلال": "Talal",
"عبدالعزيز": "Abdulaziz", "فهد": "Fahad", "تركي": "Turki", "سلمان": "Salman", "بندر": "Bandar",
"مشعل": "Mishaal", "ماجد": "Majed", "يوسف": "Yousef", "حسن": "Hassan", "علي": "Ali",
"وليد": "Waleed", "زياد": "Ziyad", "رامي": "Rami", "كريم": "Kareem", "عادل": "Adel",
"راشد": "Rashed", "مازن": "Mazen", "طارق": "Tariq", "إبراهim": "Ibrahim", "عيسى": "Issa",
"نواف": "Nawaf", "سامي": "Sami", "بدر": "Badr", "عاصم": "Asim", "وسام": "Wissam",
"هاني": "Hani", "ثامر": "Thamer", "صالح": "Saleh", "ياسر": "Yasser", "جاسم": "Jasim",
"هشام": "Hisham", "فواز": "Fawaz", "معاذ": "Muath", "عثمان": "Othman", "أسامة": "Osama",
"باسل": "Basel", "عمار": "Ammar", "نبيل": "Nabil", "توفيق": "Tawfiq", "جمال": "Jamal",
# Female names
"فاطمة": "Fatima", "نورة": "Noura", "سارة": "Sarah", "منى": "Mona", "هند": "Hind",
"ريم": "Reem", "لينا": "Lina", "دانة": "Dana", "شهد": "Shahad", "جود": "Joud",
"رهف": "Rahaf", "غلا": "Ghala", "عبير": "Abeer", "أمل": "Amal", "ندى": "Nada",
"رنا": "Rana", "لمى": "Lama", "ديمة": "Dima", "بشرى": "Bushra", "سمية": "Somaya",
"هيفاء": "Haifa", "ليلى": "Layla", "زينب": "Zainab", "خلود": "Kholoud", "شروق": "Shorouq",
"أريج": "Areej", "جميلة": "Jamila", "رباب": "Rabab", "سلمى": "Salma", "وفاء": "Wafa",
"عائشة": "Aisha", "خديجة": "Khadija", "مريم": "Maryam", "رقية": "Ruqaya", "زهراء": "Zahra",
"نجود": "Nujoud", "حصة": "Hessa", "عزة": "Azza", "صفية": "Safiya", "ملاك": "Malak",
"روان": "Rawan", "تالا": "Tala", "جنى": "Jana", "لين": "Leen", "ريتاج": "Ritaj",
"أسماء": "Asma", "سديم": "Sadeem", "لمار": "Lamar", "بيان": "Bayan", "شيماء": "Shaima",
# Last names
"العتيبي": "Al-Otaibi", "الدوسري": "Al-Dosari", "القحطاني": "Al-Qahtani", "الشهري": "Al-Shahri",
"الغامدي": "Al-Ghamdi", "الزهراني": "Al-Zahrani", "العنزي": "Al-Anazi", "الحربي": "Al-Harbi",
"المطيري": "Al-Mutairi", "العسيري": "Al-Asiri", "السبيعي": "Al-Subaie", "الشمري": "Al-Shammari",
"الجهني": "Al-Juhani", "العمري": "Al-Omari", "البقمي": "Al-Buqami", "الفهد": "Al-Fahad",
"السديري": "Al-Sudairi", "الثبيتي": "Al-Thubaiti", "الصقري": "Al-Saqri", "الأحمد": "Al-Ahmad",
"الخالد": "Al-Khalid", "السليمان": "Al-Sulaiman", "العبدالله": "Al-Abdullah", "الفهيد": "Al-Fahaid",
"الشايع": "Al-Shaya", "الرشيد": "Al-Rasheed", "العجمي": "Al-Ajmi", "المالك": "Al-Malek",
"الفريح": "Al-Fraihi", "الحمود": "Al-Hamoud", "الناصر": "Al-Nasser", "الشريف": "Al-Shareef",
"البلوي": "Al-Balawi", "اليامي": "Al-Yami", "الوادعي": "Al-Wadei", "الفيفي": "Al-Faifi",
"الشهراني": "Al-Shahrani", "البكري": "Al-Bakri", "العسكر": "Al-Askar", "الراشد": "Al-Rashed",
"الفايز": "Al-Fayez", "الخليف": "Al-Khleif", "المنيع": "Al-Manie", "العبيد": "Al-Obaid",
"السحيم": "Al-Suhaim", "الغنام": "Al-Ghannam", "السلمان": "Al-Salman", "الهاجري": "Al-Hajri",
"النهدي": "Al-Nahdi", "الرويلي": "Al-Ruwaili", "المري": "Al-Marri", "السواط": "Al-Sawat",
"الربيعان": "Al-Rabian", "الدغيثر": "Al-Dughither", "الفضلي": "Al-Fadhli", "القرني": "Al-Qarni",
"الثنيان": "Al-Thuniyan", "العريفي": "Al-Arifi", "الهويدي": "Al-Huwaidi", "الجريسي": "Al-Juraysi",
"البدراني": "Al-Badrani", "المهيدب": "Al-Muhaidib", "السالم": "Al-Salem", "الحارثي": "Al-Harthi",
"العطوي": "Al-Atawi", "الصخري": "Al-Sakhri", "الرحيلي": "Al-Rahili", "السعيد": "Al-Saeed",
"الحافظ": "Al-Hafiz", "الوهيبي": "Al-Wahaibi", "البراك": "Al-Barrak", "الضويان": "Al-Dhuwayan",
}
return transliteration_map.get(arabic_name, arabic_name)
def generate_contacts() -> List[Dict]:
"""
Generate 500 fake contacts distributed across all divisions.
Returns a list of contact dictionaries.
"""
contacts = []
contact_id = 1000 # Starting ID
# Get all divisions
divisions = list(DIVISION_TO_DEPARTMENT.keys())
# Calculate contacts per division (aim for ~7-8 per division)
contacts_per_division = 500 // len(divisions)
extra_contacts = 500 % len(divisions)
for div_index, division in enumerate(divisions):
department_name, department_id = DIVISION_TO_DEPARTMENT[division]
# Number of contacts for this division
num_contacts = contacts_per_division
if div_index < extra_contacts:
num_contacts += 1
# Determine seniority distribution (more junior staff than senior)
# 10% executive, 20% management, 50% specialist, 15% technical, 5% support
seniority_distribution = []
seniority_distribution.extend(["executive"] * max(1, int(num_contacts * 0.10)))
seniority_distribution.extend(["management"] * max(1, int(num_contacts * 0.20)))
seniority_distribution.extend(["specialist"] * max(1, int(num_contacts * 0.50)))
seniority_distribution.extend(["technical"] * max(1, int(num_contacts * 0.15)))
seniority_distribution.extend(["support"] * max(1, int(num_contacts * 0.05)))
# Ensure we have exactly num_contacts
while len(seniority_distribution) < num_contacts:
seniority_distribution.append("specialist")
seniority_distribution = seniority_distribution[:num_contacts]
random.shuffle(seniority_distribution)
for i in range(num_contacts):
# Mix male and female names (60% male, 40% female)
is_male = random.random() < 0.6
if is_male:
first_name_ar = random.choice(ARABIC_FIRST_NAMES_MALE)
else:
first_name_ar = random.choice(ARABIC_FIRST_NAMES_FEMALE)
last_name_ar = random.choice(ARABIC_LAST_NAMES)
# Transliterate to English
first_name_en = transliterate_arabic_name(first_name_ar)
last_name_en = transliterate_arabic_name(last_name_ar)
# Full names
full_name_ar = f"{first_name_ar} {last_name_ar}"
full_name_en = f"{first_name_en} {last_name_en}"
# Get job title based on seniority
seniority = seniority_distribution[i]
title_en, title_ar = random.choice(JOB_TITLES[seniority])
# Generate contact info
extension = generate_extension()
email = generate_email(first_name_en, last_name_en)
contact = {
"id": contact_id,
"first_name_ar": first_name_ar,
"last_name_ar": last_name_ar,
"full_name_ar": full_name_ar,
"first_name_en": first_name_en,
"last_name_en": last_name_en,
"full_name_en": full_name_en,
"title_en": title_en,
"title_ar": title_ar,
"division": division,
"department": department_name,
"department_id": department_id,
"email": email,
"extension": extension,
"phone": f"+966-11-218-{extension}",
}
contacts.append(contact)
contact_id += 1
return contacts
# Generate contacts on module load
CONTACTS_DATABASE = generate_contacts()
# Create indexes for fast lookup
CONTACTS_BY_NAME_AR = {contact["full_name_ar"]: contact for contact in CONTACTS_DATABASE}
CONTACTS_BY_NAME_EN = {contact["full_name_en"]: contact for contact in CONTACTS_DATABASE}
CONTACTS_BY_DIVISION = {}
for contact in CONTACTS_DATABASE:
division = contact["division"]
if division not in CONTACTS_BY_DIVISION:
CONTACTS_BY_DIVISION[division] = []
CONTACTS_BY_DIVISION[division].append(contact)
def get_all_contacts() -> List[Dict]:
"""Get all contacts"""
return CONTACTS_DATABASE
def get_contacts_by_division(division: str) -> List[Dict]:
"""Get contacts for a specific division"""
return CONTACTS_BY_DIVISION.get(division, [])
def get_contact_by_name(name: str) -> Dict:
"""Get contact by exact name (Arabic or English)"""
# Try Arabic first
contact = CONTACTS_BY_NAME_AR.get(name)
if contact:
return contact
# Try English
contact = CONTACTS_BY_NAME_EN.get(name)
if contact:
return contact
return None
if __name__ == "__main__":
# Test the contact generation
contacts = get_all_contacts()
print(f"Generated {len(contacts)} contacts")
print(f"\nSample contacts:")
for i, contact in enumerate(contacts[:5]):
print(f"{i+1}. {contact['full_name_en']} ({contact['full_name_ar']})")
print(f" {contact['title_en']} - {contact['division']}")
print(f" {contact['email']} | Ext: {contact['extension']}")
print()
# Show distribution by department
from collections import Counter
dept_counts = Counter(contact["department"] for contact in contacts)
print("\nContacts by Department:")
for dept, count in sorted(dept_counts.items(), key=lambda x: -x[1]):
print(f" {dept}: {count}")