Spaces:
Sleeping
Sleeping
File size: 830 Bytes
94f5c4b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
from dataclasses import dataclass
@dataclass
class RAGConfig:
# Embedding 模型
embedding_model_name: str = "sentence-transformers/all-MiniLM-L6-v2"
normalize_embeddings: bool = True
# 检索参数
top_k: int = 5
similarity_threshold: float = 0.4
# LLM 模型
llm_model_name: str = "facebook/rag-token-base"
llm_max_length: int = 512
generation_kwargs: dict = None
# PDF 路径
pdf_dir: str = "data/pdfs"
vector_db_path: str = "data/embeddings/vector_store.pkl"
# Chunk 配置
chunk_size: int = 500
chunk_overlap: int = 100
def __post_init__(self):
if self.generation_kwargs is None:
self.generation_kwargs = {
"max_new_tokens": 200,
"temperature": 0.7,
"do_sample": True,
}
|