zhao1iang commited on
Commit
6ba3da6
Β·
1 Parent(s): 869654a

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +10 -14
README.md CHANGED
@@ -126,21 +126,17 @@ def special_encode(input, tokenizer):
126
 
127
  return res_id
128
 
129
- def special_encode(input, tokenizer):
130
- raw_str = "[USER]%s[SEP][BOT]" % input.strip().replace("\r", "")
131
- eos_id = tokenizer.eos_token_id
132
- bos_id = tokenizer.bos_token_id
133
- sep_id = tokenizer.encode("[SEP]")[-1]
134
- res_id = [eos_id, bos_id]
135
- arr = raw_str.split("[SEP]")
136
- for elem_idx in range(len(arr)):
137
- elem = arr[elem_idx]
138
- elem_id = tokenizer.encode(elem)[1:]
139
- res_id += elem_id
140
- if elem_idx < len(arr) - 1:
141
- res_id.append(sep_id)
142
 
143
- return res_id
144
 
145
  if __name__ == '__main__':
146
  text = "ε°ηŽ‹θ¦ε°†150千克含药量20%ηš„ε†œθ―η¨€ι‡Šζˆε«θ―ι‡5%ηš„θ―ζ°΄οΌŽιœ€θ¦εŠ ζ°΄ε€šε°‘εƒε…‹οΌŸ"
 
126
 
127
  return res_id
128
 
129
+ def extract_res(response):
130
+ if "[BOT]" in response:
131
+ response = response.split("[BOT]")[1]
132
+ if "<s>" in response:
133
+ response = response.split("<s>")[-1]
134
+ if "</s>" in response:
135
+ response = response.split("</s>")[0]
136
+ if "[SEP]" in response:
137
+ response = response.split("[SEP]")[0]
138
+ return response
 
 
 
139
 
 
140
 
141
  if __name__ == '__main__':
142
  text = "ε°ηŽ‹θ¦ε°†150千克含药量20%ηš„ε†œθ―η¨€ι‡Šζˆε«θ―ι‡5%ηš„θ―ζ°΄οΌŽιœ€θ¦εŠ ζ°΄ε€šε°‘εƒε…‹οΌŸ"