import logging import os from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext, load_index_from_storage from llama_index.core.node_parser import SentenceSplitter logger = logging.getLogger(__name__) DOCUMENT_PATH = './documents' # remember to delete stored vectors when new documents are added to the data so the storage is recreated def load_write_index_nodes(path): documents = [] if not os.path.exists(path): documents = SimpleDirectoryReader(DOCUMENT_PATH, filename_as_id=True).load_data() logger.info(f'Indexing documents in {DOCUMENT_PATH}...') index = VectorStoreIndex.from_documents(documents) index.storage_context.persist(persist_dir=path) logger.info(f'{len(documents)} documents indexed.') else: logger.info(f'Loading index from {path}...') storage_context = StorageContext.from_defaults(persist_dir=path) index = load_index_from_storage(storage_context) parser = SentenceSplitter() nodes = parser.get_nodes_from_documents(documents) return index, nodes