yuvraj-singh-9886 commited on
Commit
c156c1f
·
1 Parent(s): c41577c

Add liger-kernel dependency and update model files

Browse files
Files changed (3) hide show
  1. app.py +10 -1
  2. requirements.txt +1 -0
  3. tokenizer.py +12 -1
app.py CHANGED
@@ -55,7 +55,16 @@ def initialize_app():
55
  print("Tokenizer initialized successfully")
56
  except Exception as e:
57
  print(f"Error initializing tokenizer: {e}")
58
- raise e
 
 
 
 
 
 
 
 
 
59
 
60
  # Initialize the global tokenizer in model.py
61
  initialize_tokenizer(hf_token=hf_token)
 
55
  print("Tokenizer initialized successfully")
56
  except Exception as e:
57
  print(f"Error initializing tokenizer: {e}")
58
+ print("This might be due to missing HF_TOKEN or lack of access to gated models.")
59
+ print("The app will try to use a fallback tokenizer.")
60
+ # Don't raise the error, let the tokenizer handle fallback
61
+ try:
62
+ tk = Tokenizer(hf_token=None) # Force fallback
63
+ tk = tk.ready_tokenizer()
64
+ print("Fallback tokenizer initialized successfully")
65
+ except Exception as fallback_error:
66
+ print(f"Fallback tokenizer also failed: {fallback_error}")
67
+ raise fallback_error
68
 
69
  # Initialize the global tokenizer in model.py
70
  initialize_tokenizer(hf_token=hf_token)
requirements.txt CHANGED
@@ -7,3 +7,4 @@ huggingface_hub
7
  gradio
8
  numpy
9
  safetensors
 
 
7
  gradio
8
  numpy
9
  safetensors
10
+ liger-kernel
tokenizer.py CHANGED
@@ -9,8 +9,19 @@ class Tokenizer:
9
  print(f"[INFO] Using HF token for model access")
10
  else:
11
  print("[INFO] No HF token provided - using public models only")
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
- self.tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf", token=hf_token)
14
  self.tokenizer.add_special_tokens({'pad_token': '[PAD]'})
15
 
16
  def ready_tokenizer(self):
 
9
  print(f"[INFO] Using HF token for model access")
10
  else:
11
  print("[INFO] No HF token provided - using public models only")
12
+
13
+ # Use a public tokenizer instead of gated Llama model
14
+ # GPT-2 tokenizer is widely compatible and doesn't require authentication
15
+ try:
16
+ if hf_token:
17
+ # Try Llama tokenizer first if we have a token
18
+ self.tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf", token=hf_token)
19
+ else:
20
+ raise Exception("No token - using fallback")
21
+ except:
22
+ print("[INFO] Fallback to public GPT-2 tokenizer")
23
+ self.tokenizer = AutoTokenizer.from_pretrained("gpt2")
24
 
 
25
  self.tokenizer.add_special_tokens({'pad_token': '[PAD]'})
26
 
27
  def ready_tokenizer(self):