xu song
commited on
Commit
·
931d3ff
1
Parent(s):
55f05a7
update
Browse files- models/cpp_qwen2.py +7 -4
models/cpp_qwen2.py
CHANGED
|
@@ -4,7 +4,7 @@ https://github.com/awinml/llama-cpp-python-bindings
|
|
| 4 |
|
| 5 |
python convert_hf_to_gguf.py --outtype f16 Qwen1.5-0.5B-Chat
|
| 6 |
|
| 7 |
-
python convert_hf_to_gguf.py /workspace/xusong/huggingface/models/
|
| 8 |
|
| 9 |
|
| 10 |
./llama-cli -m /workspace/xusong/huggingface/models/Qwen1.5-0.5B-Chat/Qwen1.5-0.5B-Chat-F16.gguf -p "I believe the meaning of life is" -n 128
|
|
@@ -37,15 +37,17 @@ import config
|
|
| 37 |
class Qwen2Simulator(Simulator):
|
| 38 |
|
| 39 |
def __init__(self):
|
| 40 |
-
self.hf_tokenizer = AutoTokenizer.from_pretrained(
|
| 41 |
-
"/workspace/xusong/huggingface/models/Qwen2-0.5B-Instruct/")
|
| 42 |
-
|
| 43 |
local_path = "/workspace/xusong/huggingface/models/Qwen2-0.5B-Instruct-GGUF/qwen2-0_5b-instruct-fp16.gguf"
|
| 44 |
if os.path.exists(local_path):
|
|
|
|
|
|
|
| 45 |
self.llm = llama_cpp.Llama( # n_ctx, n_threads
|
| 46 |
model_path=local_path,
|
|
|
|
|
|
|
| 47 |
n_ctx=config.MAX_SEQUENCE_LENGTH, #
|
| 48 |
# n_threads=None, # 默认会根据cpu数来设置 n_threads
|
|
|
|
| 49 |
use_mlock=True,
|
| 50 |
verbose=True,
|
| 51 |
)
|
|
@@ -53,6 +55,7 @@ class Qwen2Simulator(Simulator):
|
|
| 53 |
self.hf_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
|
| 54 |
self.llm = llama_cpp.Llama.from_pretrained(
|
| 55 |
repo_id="Qwen/Qwen2-0.5B-Instruct-GGUF",
|
|
|
|
| 56 |
filename="*fp16.gguf",
|
| 57 |
n_ctx=config.MAX_SEQUENCE_LENGTH,
|
| 58 |
use_mlock=True,
|
|
|
|
| 4 |
|
| 5 |
python convert_hf_to_gguf.py --outtype f16 Qwen1.5-0.5B-Chat
|
| 6 |
|
| 7 |
+
python convert_hf_to_gguf.py /workspace/xusong/huggingface/models/Qwen2-0.5B-Instruct/
|
| 8 |
|
| 9 |
|
| 10 |
./llama-cli -m /workspace/xusong/huggingface/models/Qwen1.5-0.5B-Chat/Qwen1.5-0.5B-Chat-F16.gguf -p "I believe the meaning of life is" -n 128
|
|
|
|
| 37 |
class Qwen2Simulator(Simulator):
|
| 38 |
|
| 39 |
def __init__(self):
|
|
|
|
|
|
|
|
|
|
| 40 |
local_path = "/workspace/xusong/huggingface/models/Qwen2-0.5B-Instruct-GGUF/qwen2-0_5b-instruct-fp16.gguf"
|
| 41 |
if os.path.exists(local_path):
|
| 42 |
+
self.hf_tokenizer = AutoTokenizer.from_pretrained(
|
| 43 |
+
"/workspace/xusong/huggingface/models/Qwen2-0.5B-Instruct/")
|
| 44 |
self.llm = llama_cpp.Llama( # n_ctx, n_threads
|
| 45 |
model_path=local_path,
|
| 46 |
+
# 默认的tokenizer有bug,tokenize后的id不同
|
| 47 |
+
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
|
| 48 |
n_ctx=config.MAX_SEQUENCE_LENGTH, #
|
| 49 |
# n_threads=None, # 默认会根据cpu数来设置 n_threads
|
| 50 |
+
|
| 51 |
use_mlock=True,
|
| 52 |
verbose=True,
|
| 53 |
)
|
|
|
|
| 55 |
self.hf_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
|
| 56 |
self.llm = llama_cpp.Llama.from_pretrained(
|
| 57 |
repo_id="Qwen/Qwen2-0.5B-Instruct-GGUF",
|
| 58 |
+
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
|
| 59 |
filename="*fp16.gguf",
|
| 60 |
n_ctx=config.MAX_SEQUENCE_LENGTH,
|
| 61 |
use_mlock=True,
|