Spaces:

perctrix
/

VoiceAssistance

Runtime error

App Files Files Community

Steven Chen commited on Nov 13, 2024

Commit

16081bf

verified ·

1 Parent(s): 2d528ea

debug

Browse files

Files changed (1) hide show

app.py +49 -21

app.py CHANGED Viewed

@@ -112,13 +112,13 @@ def load_files(file_paths: list):
             docs.extend(loaded_docs)
     return docs
-def split_text(txt, chunk_size=200, overlap=20):
-    if not txt:
-        return None
-    splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=overlap)
-    docs = splitter.split_documents(txt)
-    return docs
 def create_embedding_model(model_file):
     embedding = HuggingFaceEmbeddings(model_name=model_file, model_kwargs={'trust_remote_code': True})
@@ -137,10 +137,10 @@ def file_paths_match(store_path, file_paths):
     saved_file_paths = load_file_paths(store_path)
     return saved_file_paths == file_paths
-def create_vector_store(docs, store_file, embeddings):
-    vector_store = FAISS.from_documents(docs, embeddings)
-    vector_store.save_local(store_file)
-    return vector_store
 def load_vector_store(store_path, embeddings):
     if os.path.exists(store_path):
@@ -149,20 +149,48 @@ def load_vector_store(store_path, embeddings):
     else:
         return None
-def load_or_create_store(store_path, file_paths, embeddings):
-    if os.path.exists(store_path) and file_paths_match(store_path, file_paths):
-        print("Vector database is consistent with last use, no need to rewrite")
-        vector_store = load_vector_store(store_path, embeddings)
-        if vector_store:
-            return vector_store
-    print("Rewriting database")
-    pages = load_files(file_paths)
-    docs = split_text(pages)
-    vector_store = create_vector_store(docs, store_path, embeddings)
-    save_file_paths(store_path, file_paths)
     return vector_store
 def query_vector_store(vector_store: FAISS, query, k=4, relevance_threshold=0.8):
     retriever = vector_store.as_retriever(
         search_type="similarity_score_threshold",

             docs.extend(loaded_docs)
     return docs
+# def split_text(txt, chunk_size=200, overlap=20):
+#     if not txt:
+#         return None
+#     splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=overlap)
+#     docs = splitter.split_documents(txt)
+#     return docs
 def create_embedding_model(model_file):
     embedding = HuggingFaceEmbeddings(model_name=model_file, model_kwargs={'trust_remote_code': True})
     saved_file_paths = load_file_paths(store_path)
     return saved_file_paths == file_paths
+# def create_vector_store(docs, store_file, embeddings):
+#     vector_store = FAISS.from_documents(docs, embeddings)
+#     vector_store.save_local(store_file)
+#     return vector_store
 def load_vector_store(store_path, embeddings):
     if os.path.exists(store_path):
     else:
         return None
+def split_text(txt, chunk_size=200, overlap=20):
+    if not txt:
+        return []  # 返回空列表而不是 None
+    splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=overlap)
+    docs = splitter.split_documents(txt)
+    return docs
+def create_vector_store(docs, store_file, embeddings):
+    if not docs:  # 添加验证
+        raise ValueError("No documents provided for creating vector store")
+    vector_store = FAISS.from_documents(docs, embeddings)
+    vector_store.save_local(store_file)
     return vector_store
+def load_or_create_store(store_path, file_paths, embeddings):
+    try:
+        if os.path.exists(store_path) and file_paths_match(store_path, file_paths):
+            print("Vector database is consistent with last use, no need to rewrite")
+            vector_store = load_vector_store(store_path, embeddings)
+            if vector_store:
+                return vector_store
+        print("Rewriting database")
+        pages = load_files(file_paths)
+        if not pages:  # 添加验证
+            raise ValueError("No documents loaded from provided file paths")
+        docs = split_text(pages)
+        if not docs:  # 添加验证
+            raise ValueError("No documents created after splitting text")
+        vector_store = create_vector_store(docs, store_path, embeddings)
+        save_file_paths(store_path, file_paths)
+        return vector_store
+    except Exception as e:
+        print(f"Error creating vector store: {str(e)}")
+        # 可以根据需要决定是否继续抛出异常
+        raise
 def query_vector_store(vector_store: FAISS, query, k=4, relevance_threshold=0.8):
     retriever = vector_store.as_retriever(
         search_type="similarity_score_threshold",