GeoQuery / backend /scripts /update_embeddings.py
GerardCB's picture
Deploy to Spaces (Final Clean)
4851501
"""
Update Embeddings for Semantic Search
Refreshes the embeddings.json index with any new tables in the catalog.
"""
import sys
import asyncio
import logging
from pathlib import Path
# Add project root to path
sys.path.append(str(Path(__file__).parent.parent.parent))
from backend.core.data_catalog import get_data_catalog
from backend.core.semantic_search import get_semantic_search
def update_embeddings():
print("="*60)
# Reload catalog to ensure latest
catalog = get_data_catalog()
catalog.load_catalog()
search_service = get_semantic_search()
print(f"Catalog size: {len(catalog.catalog)} tables")
print(f"Existing embeddings: {len(search_service.embeddings)}")
print("\nGenerating embeddings for new tables...")
new_count = search_service.embed_all_tables(catalog.catalog)
print(f"\n✅ Embedded {new_count} new tables.")
print(f"Total embedded: {len(search_service.embeddings)}")
if __name__ == "__main__":
update_embeddings()