Spaces:
Paused
Paused
| import logging | |
| import os | |
| from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext, load_index_from_storage | |
| from llama_index.core.node_parser import SentenceSplitter | |
| logger = logging.getLogger(__name__) | |
| DOCUMENT_PATH = './documents' | |
| # remember to delete stored vectors when new documents are added to the data so the storage is recreated | |
| def load_write_index_nodes(path): | |
| documents = [] | |
| if not os.path.exists(path): | |
| documents = SimpleDirectoryReader(DOCUMENT_PATH, filename_as_id=True).load_data() | |
| logger.info(f'Indexing documents in {DOCUMENT_PATH}...') | |
| index = VectorStoreIndex.from_documents(documents) | |
| index.storage_context.persist(persist_dir=path) | |
| logger.info(f'{len(documents)} documents indexed.') | |
| else: | |
| logger.info(f'Loading index from {path}...') | |
| storage_context = StorageContext.from_defaults(persist_dir=path) | |
| index = load_index_from_storage(storage_context) | |
| parser = SentenceSplitter() | |
| nodes = parser.get_nodes_from_documents(documents) | |
| return index, nodes | |