Spaces:
Runtime error
Runtime error
| import os | |
| from langchain_community.embeddings import SentenceTransformerEmbeddings | |
| from langchain_community.document_loaders import UnstructuredFileLoader, DirectoryLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_community.vectorstores import Qdrant | |
| embeddings = SentenceTransformerEmbeddings(model_name="NeuML/pubmedbert-base-embeddings") | |
| loader = DirectoryLoader('data/', glob="**/*.pdf", show_progress=True, loader_cls=UnstructuredFileLoader) | |
| documents = loader.load() | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=700, chunk_overlap=70) | |
| texts = text_splitter.split_documents(documents) | |
| # url = "http://127.0.0.1:6333" # This is the same URL that must match Step 4d | |
| qdrant_api_key = 'ic_WPSW7zUEOYzJIbHAYKVUxTf7xVXxFfJgTN6UsnvcuXGwkRPGx3g' | |
| qdrant_url = 'https://ea51a65a-6fad-48ce-b571-846d3b496882.us-east4-0.gcp.cloud.qdrant.io' | |
| qdrant = Qdrant.from_documents( | |
| texts, | |
| embeddings, | |
| url=qdrant_url, | |
| port=6333, | |
| api_key=qdrant_api_key, | |
| prefer_grpc=False, | |
| collection_name="vector_db" | |
| ) | |
| print("Vector DB Successfully Created!") | |