Spaces:
Sleeping
Sleeping
disable use_fast tokenizing
Browse files
app.py
CHANGED
|
@@ -41,16 +41,19 @@ st.write('loading chunks into vector db')
|
|
| 41 |
model_name = "hkunlp/instructor-large"
|
| 42 |
hf_embeddings = HuggingFaceInstructEmbeddings(
|
| 43 |
model_name = model_name)
|
| 44 |
-
db = Chroma.from_documents(texts, hf_embeddings)
|
| 45 |
|
| 46 |
-
st.write('loading
|
| 47 |
#model_name_or_path = "TheBloke/Llama-2-13B-chat-GPTQ"
|
| 48 |
model_name_or_path = "TheBloke/Llama-2-13B-chat-GGUF"
|
|
|
|
| 49 |
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
|
| 50 |
|
| 51 |
model_basename = "model"
|
|
|
|
| 52 |
use_triton = False
|
| 53 |
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
|
|
|
|
| 54 |
model = AutoGPTQForCausalLM.from_quantized(model_name_or_path,
|
| 55 |
model_basename=model_basename,
|
| 56 |
use_safetensors=True,
|
|
|
|
| 41 |
model_name = "hkunlp/instructor-large"
|
| 42 |
hf_embeddings = HuggingFaceInstructEmbeddings(
|
| 43 |
model_name = model_name)
|
| 44 |
+
# db = Chroma.from_documents(texts, hf_embeddings)
|
| 45 |
|
| 46 |
+
st.write('loading tokenizer')
|
| 47 |
#model_name_or_path = "TheBloke/Llama-2-13B-chat-GPTQ"
|
| 48 |
model_name_or_path = "TheBloke/Llama-2-13B-chat-GGUF"
|
| 49 |
+
#tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
|
| 50 |
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
|
| 51 |
|
| 52 |
model_basename = "model"
|
| 53 |
+
|
| 54 |
use_triton = False
|
| 55 |
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
|
| 56 |
+
st.write('loading LLM')
|
| 57 |
model = AutoGPTQForCausalLM.from_quantized(model_name_or_path,
|
| 58 |
model_basename=model_basename,
|
| 59 |
use_safetensors=True,
|