Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
d4bf693
1
Parent(s):
1557ad2
minor fix
Browse files- requirements.txt +4 -2
- src/backend/model_operations.py +24 -16
requirements.txt
CHANGED
|
@@ -12,11 +12,13 @@ pandas==2.0.0
|
|
| 12 |
python-dateutil==2.8.2
|
| 13 |
requests==2.28.2
|
| 14 |
tqdm==4.65.0
|
| 15 |
-
transformers
|
| 16 |
tokenizers>=0.15.0
|
| 17 |
sentence-transformers==2.2.2
|
| 18 |
google-generativeai
|
| 19 |
replicate
|
| 20 |
anthropic
|
| 21 |
openai
|
| 22 |
-
cohere
|
|
|
|
|
|
|
|
|
| 12 |
python-dateutil==2.8.2
|
| 13 |
requests==2.28.2
|
| 14 |
tqdm==4.65.0
|
| 15 |
+
transformers
|
| 16 |
tokenizers>=0.15.0
|
| 17 |
sentence-transformers==2.2.2
|
| 18 |
google-generativeai
|
| 19 |
replicate
|
| 20 |
anthropic
|
| 21 |
openai
|
| 22 |
+
cohere
|
| 23 |
+
mistralai
|
| 24 |
+
peft
|
src/backend/model_operations.py
CHANGED
|
@@ -9,19 +9,17 @@ import json
|
|
| 9 |
import numpy as np
|
| 10 |
import pandas as pd
|
| 11 |
import spacy
|
| 12 |
-
from sentence_transformers import CrossEncoder
|
| 13 |
import litellm
|
| 14 |
from tqdm import tqdm
|
| 15 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoModelForTokenClassification
|
|
|
|
| 16 |
import torch
|
| 17 |
import cohere
|
| 18 |
from openai import OpenAI
|
| 19 |
import anthropic
|
| 20 |
import replicate
|
| 21 |
-
# pip install -U google-generativeai
|
| 22 |
import google.generativeai as genai
|
| 23 |
-
from mistralai
|
| 24 |
-
from mistralai.models.chat_completion import ChatMessage
|
| 25 |
|
| 26 |
|
| 27 |
import src.backend.util as util
|
|
@@ -330,15 +328,21 @@ class SummaryGenerator:
|
|
| 330 |
|
| 331 |
elif 'mistral-large' in self.model_id.lower():
|
| 332 |
api_key = os.environ["MISTRAL_API_KEY"]
|
| 333 |
-
client =
|
| 334 |
|
| 335 |
messages = [
|
| 336 |
-
|
| 337 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 338 |
]
|
| 339 |
|
| 340 |
# No streaming
|
| 341 |
-
chat_response = client.chat(
|
| 342 |
model=self.model_id,
|
| 343 |
messages=messages,
|
| 344 |
)
|
|
@@ -373,6 +377,7 @@ class SummaryGenerator:
|
|
| 373 |
self.local_pipeline = pipeline(
|
| 374 |
"text-generation",
|
| 375 |
model=self.model_id,
|
|
|
|
| 376 |
model_kwargs={"torch_dtype": torch.bfloat16},
|
| 377 |
device_map="auto",
|
| 378 |
trust_remote_code=True
|
|
@@ -384,7 +389,8 @@ class SummaryGenerator:
|
|
| 384 |
self.local_model = AutoModelForCausalLM.from_pretrained(self.model_id,
|
| 385 |
torch_dtype=torch.bfloat16,
|
| 386 |
attn_implementation="flash_attention_2",
|
| 387 |
-
device_map="auto"
|
|
|
|
| 388 |
else:
|
| 389 |
self.local_model = AutoModelForCausalLM.from_pretrained(self.model_id, trust_remote_code=True, device_map="auto", torch_dtype="auto")
|
| 390 |
# print(self.local_model.device)
|
|
@@ -401,7 +407,7 @@ class SummaryGenerator:
|
|
| 401 |
outputs = self.local_pipeline(
|
| 402 |
messages,
|
| 403 |
max_new_tokens=250,
|
| 404 |
-
|
| 405 |
do_sample=False
|
| 406 |
)
|
| 407 |
result = outputs[0]["generated_text"][-1]['content']
|
|
@@ -434,15 +440,16 @@ class SummaryGenerator:
|
|
| 434 |
input_ids = self.tokenizer(prompt, return_tensors="pt").to(self.device)
|
| 435 |
with torch.no_grad():
|
| 436 |
outputs = self.local_model.generate(**input_ids, max_new_tokens=250, do_sample=True, temperature=0.01, pad_token_id=self.tokenizer.eos_token_id)
|
| 437 |
-
|
| 438 |
-
|
|
|
|
| 439 |
result = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 440 |
if 'gemma-2' in self.model_id.lower():
|
| 441 |
result = result.split(user_prompt + '\nmodel')[-1].strip()
|
| 442 |
-
|
| 443 |
elif 'intel' in self.model_id.lower():
|
| 444 |
result = result.split("### Assistant:\n")[-1]
|
| 445 |
-
|
|
|
|
| 446 |
else:
|
| 447 |
# print(prompt)
|
| 448 |
# print('-'*50)
|
|
@@ -496,7 +503,8 @@ class EvaluationModel:
|
|
| 496 |
Args:
|
| 497 |
model_path (str): Path to the CrossEncoder model.
|
| 498 |
"""
|
| 499 |
-
|
|
|
|
| 500 |
self.device = device
|
| 501 |
self.model.to(self.device)
|
| 502 |
self.scores = []
|
|
|
|
| 9 |
import numpy as np
|
| 10 |
import pandas as pd
|
| 11 |
import spacy
|
|
|
|
| 12 |
import litellm
|
| 13 |
from tqdm import tqdm
|
| 14 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoModelForTokenClassification, AutoConfig
|
| 15 |
+
from peft import PeftModel
|
| 16 |
import torch
|
| 17 |
import cohere
|
| 18 |
from openai import OpenAI
|
| 19 |
import anthropic
|
| 20 |
import replicate
|
|
|
|
| 21 |
import google.generativeai as genai
|
| 22 |
+
from mistralai import Mistral
|
|
|
|
| 23 |
|
| 24 |
|
| 25 |
import src.backend.util as util
|
|
|
|
| 328 |
|
| 329 |
elif 'mistral-large' in self.model_id.lower():
|
| 330 |
api_key = os.environ["MISTRAL_API_KEY"]
|
| 331 |
+
client = Mistral(api_key=api_key)
|
| 332 |
|
| 333 |
messages = [
|
| 334 |
+
{
|
| 335 |
+
"role":"system",
|
| 336 |
+
"content":system_prompt
|
| 337 |
+
},
|
| 338 |
+
{
|
| 339 |
+
"role":"user",
|
| 340 |
+
"content":user_prompt
|
| 341 |
+
}
|
| 342 |
]
|
| 343 |
|
| 344 |
# No streaming
|
| 345 |
+
chat_response = client.chat.complete(
|
| 346 |
model=self.model_id,
|
| 347 |
messages=messages,
|
| 348 |
)
|
|
|
|
| 377 |
self.local_pipeline = pipeline(
|
| 378 |
"text-generation",
|
| 379 |
model=self.model_id,
|
| 380 |
+
tokenizer=AutoTokenizer.from_pretrained(self.model_id),
|
| 381 |
model_kwargs={"torch_dtype": torch.bfloat16},
|
| 382 |
device_map="auto",
|
| 383 |
trust_remote_code=True
|
|
|
|
| 389 |
self.local_model = AutoModelForCausalLM.from_pretrained(self.model_id,
|
| 390 |
torch_dtype=torch.bfloat16,
|
| 391 |
attn_implementation="flash_attention_2",
|
| 392 |
+
device_map="auto",
|
| 393 |
+
use_mamba_kernels=False)
|
| 394 |
else:
|
| 395 |
self.local_model = AutoModelForCausalLM.from_pretrained(self.model_id, trust_remote_code=True, device_map="auto", torch_dtype="auto")
|
| 396 |
# print(self.local_model.device)
|
|
|
|
| 407 |
outputs = self.local_pipeline(
|
| 408 |
messages,
|
| 409 |
max_new_tokens=250,
|
| 410 |
+
# return_full_text=False,
|
| 411 |
do_sample=False
|
| 412 |
)
|
| 413 |
result = outputs[0]["generated_text"][-1]['content']
|
|
|
|
| 440 |
input_ids = self.tokenizer(prompt, return_tensors="pt").to(self.device)
|
| 441 |
with torch.no_grad():
|
| 442 |
outputs = self.local_model.generate(**input_ids, max_new_tokens=250, do_sample=True, temperature=0.01, pad_token_id=self.tokenizer.eos_token_id)
|
| 443 |
+
if 'glm' in self.model_id.lower():
|
| 444 |
+
outputs = outputs[:, input_ids['input_ids'].shape[1]:]
|
| 445 |
+
|
| 446 |
result = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 447 |
if 'gemma-2' in self.model_id.lower():
|
| 448 |
result = result.split(user_prompt + '\nmodel')[-1].strip()
|
|
|
|
| 449 |
elif 'intel' in self.model_id.lower():
|
| 450 |
result = result.split("### Assistant:\n")[-1]
|
| 451 |
+
elif 'jamba' in self.model_id.lower():
|
| 452 |
+
result = result.split(messages[-1]['content'])[1].strip()
|
| 453 |
else:
|
| 454 |
# print(prompt)
|
| 455 |
# print('-'*50)
|
|
|
|
| 503 |
Args:
|
| 504 |
model_path (str): Path to the CrossEncoder model.
|
| 505 |
"""
|
| 506 |
+
config = AutoConfig.from_pretrained('google/flan-t5-large')
|
| 507 |
+
self.model = AutoModelForTokenClassification.from_pretrained(model_path, config=config)
|
| 508 |
self.device = device
|
| 509 |
self.model.to(self.device)
|
| 510 |
self.scores = []
|