Spaces:
Build error
Build error
| from langchain_openai import OpenAIEmbeddings | |
| from langchain_chroma import Chroma | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.docstore.document import Document | |
| import os | |
| from config import PERSIST_DIRECTORY | |
| def process_safety_with_chroma(text): | |
| """ | |
| Processes and stores the given text into ChromaDB. | |
| Args: | |
| text (str): Text to be embedded and stored. | |
| Returns: | |
| Chroma: The Chroma vector store object. | |
| """ | |
| if os.path.exists(PERSIST_DIRECTORY): | |
| vector_store = Chroma(persist_directory=PERSIST_DIRECTORY, embedding_function=OpenAIEmbeddings()) | |
| else: | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100) | |
| text_chunks = text_splitter.split_text(text) | |
| documents = [Document(page_content=chunk, metadata={"source": f"chunk_{i}"}) for i, chunk in enumerate(text_chunks)] | |
| embeddings = OpenAIEmbeddings() | |
| vector_store = Chroma.from_documents(documents, embeddings, persist_directory=PERSIST_DIRECTORY) | |
| return vector_store | |