Spaces:
Sleeping
Sleeping
Jan Biermeyer
commited on
Commit
·
aa413f7
1
Parent(s):
c8a749d
still fixing
Browse files- rag/model_loader.py +13 -4
rag/model_loader.py
CHANGED
|
@@ -180,11 +180,17 @@ def load_enhanced_model_m2max() -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
|
|
| 180 |
cache_dir = os.getenv("HF_HOME") or os.getenv("TRANSFORMERS_CACHE") or "/workspace/.cache/huggingface" if os.getenv("WORKSPACE") else ".cache/huggingface"
|
| 181 |
|
| 182 |
# For LoRA models, try loading tokenizer from LoRA directory first, then base model
|
|
|
|
| 183 |
tokenizer = None
|
| 184 |
if model_path and (model_path / "tokenizer.json").exists():
|
| 185 |
try:
|
| 186 |
logger.info(f"📝 Loading tokenizer from LoRA directory: {model_path}")
|
| 187 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
except Exception as e:
|
| 189 |
logger.warning(f"⚠️ Could not load tokenizer from LoRA dir: {e}, using base model")
|
| 190 |
|
|
@@ -193,7 +199,8 @@ def load_enhanced_model_m2max() -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
|
|
| 193 |
base_model_name,
|
| 194 |
cache_dir=cache_dir,
|
| 195 |
padding_side='left', # Required for decoder-only models
|
| 196 |
-
trust_remote_code=True
|
|
|
|
| 197 |
)
|
| 198 |
|
| 199 |
if tokenizer.pad_token is None:
|
|
@@ -297,7 +304,8 @@ def load_enhanced_model_m2max() -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
|
|
| 297 |
base_model_name,
|
| 298 |
cache_dir=cache_dir,
|
| 299 |
padding_side='left',
|
| 300 |
-
trust_remote_code=True
|
|
|
|
| 301 |
)
|
| 302 |
|
| 303 |
if tokenizer.pad_token is None:
|
|
@@ -380,7 +388,8 @@ def load_enhanced_model_m2max() -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
|
|
| 380 |
base_model_name,
|
| 381 |
cache_dir=cache_dir,
|
| 382 |
padding_side='left',
|
| 383 |
-
trust_remote_code=True
|
|
|
|
| 384 |
)
|
| 385 |
|
| 386 |
if tokenizer.pad_token is None:
|
|
|
|
| 180 |
cache_dir = os.getenv("HF_HOME") or os.getenv("TRANSFORMERS_CACHE") or "/workspace/.cache/huggingface" if os.getenv("WORKSPACE") else ".cache/huggingface"
|
| 181 |
|
| 182 |
# For LoRA models, try loading tokenizer from LoRA directory first, then base model
|
| 183 |
+
# Use slow tokenizer (use_fast=False) which requires sentencepiece for Llama/Mistral models
|
| 184 |
tokenizer = None
|
| 185 |
if model_path and (model_path / "tokenizer.json").exists():
|
| 186 |
try:
|
| 187 |
logger.info(f"📝 Loading tokenizer from LoRA directory: {model_path}")
|
| 188 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
| 189 |
+
str(model_path),
|
| 190 |
+
cache_dir=cache_dir,
|
| 191 |
+
trust_remote_code=True,
|
| 192 |
+
use_fast=False # Use slow tokenizer with sentencepiece
|
| 193 |
+
)
|
| 194 |
except Exception as e:
|
| 195 |
logger.warning(f"⚠️ Could not load tokenizer from LoRA dir: {e}, using base model")
|
| 196 |
|
|
|
|
| 199 |
base_model_name,
|
| 200 |
cache_dir=cache_dir,
|
| 201 |
padding_side='left', # Required for decoder-only models
|
| 202 |
+
trust_remote_code=True,
|
| 203 |
+
use_fast=False # Use slow tokenizer with sentencepiece
|
| 204 |
)
|
| 205 |
|
| 206 |
if tokenizer.pad_token is None:
|
|
|
|
| 304 |
base_model_name,
|
| 305 |
cache_dir=cache_dir,
|
| 306 |
padding_side='left',
|
| 307 |
+
trust_remote_code=True,
|
| 308 |
+
use_fast=False # Use slow tokenizer with sentencepiece
|
| 309 |
)
|
| 310 |
|
| 311 |
if tokenizer.pad_token is None:
|
|
|
|
| 388 |
base_model_name,
|
| 389 |
cache_dir=cache_dir,
|
| 390 |
padding_side='left',
|
| 391 |
+
trust_remote_code=True,
|
| 392 |
+
use_fast=False # Use slow tokenizer with sentencepiece
|
| 393 |
)
|
| 394 |
|
| 395 |
if tokenizer.pad_token is None:
|