druvx13 commited on
Commit
6a45166
Β·
verified Β·
1 Parent(s): 9b561b2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -10
app.py CHANGED
@@ -5,7 +5,6 @@ import os
5
 
6
  # πŸ”§ CPU Optimization Suite
7
  os.environ["OMP_NUM_THREADS"] = "4"
8
- os.environ["MKL_NUM_THREADS"] = "4"
9
  torch.set_num_threads(4)
10
  torch.manual_seed(42)
11
 
@@ -22,16 +21,31 @@ tokenizer = AutoTokenizer.from_pretrained(
22
 
23
  # βœ… Add pad_token if missing (required for batched generation)
24
  if tokenizer.pad_token is None:
 
25
  tokenizer.add_special_tokens({'pad_token': '[PAD]'})
26
- tokenizer.pad_token = tokenizer.eos_token # Fallback to EOS as pad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
- # 🧠 Load Model with CPU-specific settings
29
- model = AutoModelForCausalLM.from_pretrained(
30
- MODEL_NAME,
31
- torch_dtype=torch.float32,
32
- low_cpu_mem_usage=True,
33
- cache_dir=cache_dir
34
- ).eval()
35
 
36
  def generate_response(prompt, max_new_tokens=128, temperature=0.7, top_p=0.9, num_sequences=1):
37
  """Optimized for 18GB CPU with strict memory control"""
@@ -54,7 +68,7 @@ def generate_response(prompt, max_new_tokens=128, temperature=0.7, top_p=0.9, nu
54
  top_p=float(top_p),
55
  do_sample=True,
56
  num_return_sequences=int(num_sequences),
57
- pad_token_id=tokenizer.pad_token_id or tokenizer.eos_token_id,
58
  eos_token_id=tokenizer.eos_token_id
59
  )
60
 
 
5
 
6
  # πŸ”§ CPU Optimization Suite
7
  os.environ["OMP_NUM_THREADS"] = "4"
 
8
  torch.set_num_threads(4)
9
  torch.manual_seed(42)
10
 
 
21
 
22
  # βœ… Add pad_token if missing (required for batched generation)
23
  if tokenizer.pad_token is None:
24
+ # First add special token to tokenizer
25
  tokenizer.add_special_tokens({'pad_token': '[PAD]'})
26
+
27
+ # Then resize model embeddings to accommodate new token
28
+ model = AutoModelForCausalLM.from_pretrained(
29
+ MODEL_NAME,
30
+ torch_dtype=torch.float32,
31
+ low_cpu_mem_usage=True,
32
+ cache_dir=cache_dir
33
+ )
34
+ model.resize_token_embeddings(len(tokenizer))
35
+
36
+ # Finally set pad_token
37
+ tokenizer.pad_token = tokenizer.eos_token or tokenizer.cls_token or '[PAD]'
38
+ else:
39
+ # Load model normally if pad_token exists
40
+ model = AutoModelForCausalLM.from_pretrained(
41
+ MODEL_NAME,
42
+ torch_dtype=torch.float32,
43
+ low_cpu_mem_usage=True,
44
+ cache_dir=cache_dir
45
+ )
46
 
47
+ # 🧠 Final model setup
48
+ model = model.eval()
 
 
 
 
 
49
 
50
  def generate_response(prompt, max_new_tokens=128, temperature=0.7, top_p=0.9, num_sequences=1):
51
  """Optimized for 18GB CPU with strict memory control"""
 
68
  top_p=float(top_p),
69
  do_sample=True,
70
  num_return_sequences=int(num_sequences),
71
+ pad_token_id=tokenizer.convert_tokens_to_ids(tokenizer.pad_token),
72
  eos_token_id=tokenizer.eos_token_id
73
  )
74