p1atdev
/

dart-v1-base

@@ -38,7 +38,7 @@ MODEL_NAME = "p1atdev/dart-v1-base"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True) # trust_remote_code is required for tokenizer
 model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.bfloat16)
-prompt = "<|bos|><rating>rating:sfw, rating:general</rating><copyright>original</copyright><character></character><general>1girl, "
 inputs = tokenizer(prompt, return_tensors="pt").input_ids
 with torch.no_grad():
@@ -48,6 +48,23 @@ print(tokenizer.decode(outputs[0], skip_special_tokens=True))
 # rating:sfw, rating:general, original, 1girl, ahoge, black hair, blue eyes, blush, closed mouth, ear piercing, earrings, jewelry, looking at viewer, mole, mole under eye, piercing, portrait, shirt, short hair, solo, white shirt
 ```
 #### Flash attention (optional)
 Using flash attention can optimize computations, but it is currently only compatible with Linux.
@@ -86,8 +103,12 @@ ort_model = ORTModelForCausalLM.from_pretrained(MODEL_NAME)
 # qunatized version
 # ort_model = ORTModelForCausalLM.from_pretrained(MODEL_NAME, file_name="model_quantized.onnx")
-prompt = "<|bos|><rating>rating:sfw, rating:general</rating><copyright>original</copyright><character></character><general>1girl, "
-inputs = tokenizer(prompt, return_tensors="pt").input_ids
 with torch.no_grad():
   outputs = model.generate(inputs, generation_config=generation_config)

 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True) # trust_remote_code is required for tokenizer
 model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.bfloat16)
+prompt = "<|bos|><rating>rating:sfw, rating:general</rating><copyright>original</copyright><character></character><general>1girl"
 inputs = tokenizer(prompt, return_tensors="pt").input_ids
 with torch.no_grad():
 # rating:sfw, rating:general, original, 1girl, ahoge, black hair, blue eyes, blush, closed mouth, ear piercing, earrings, jewelry, looking at viewer, mole, mole under eye, piercing, portrait, shirt, short hair, solo, white shirt
 ```
+You can use `tokenizer.apply_chat_template` to simplify constructiing of prompts:
+```py
+inputs = tokenizer.apply_chat_template({
+  "rating": "rating:sfw, rating:general",
+  "copyright": "original",
+  "character": "",
+  "general": "1girl"
+}, tokenize=True) # tokenize=False to preview prompt
+# same as input_ids of "<|bos|><rating>rating:sfw, rating:general</rating><copyright>original</copyright><character></character><general>1girl"
+with torch.no_grad():
+  outputs = model.generate(inputs, generation_config=generation_config)
+```
+See [chat_templating document](https://huggingface.co/docs/transformers/main/en/chat_templating) for more detail about `apply_chat_template`.
 #### Flash attention (optional)
 Using flash attention can optimize computations, but it is currently only compatible with Linux.
 # qunatized version
 # ort_model = ORTModelForCausalLM.from_pretrained(MODEL_NAME, file_name="model_quantized.onnx")
+inputs = tokenizer.apply_chat_template({
+  "rating": "rating:sfw, rating:general",
+  "copyright": "original",
+  "character": "",
+  "general": "1girl"
+}, tokenize=True)
 with torch.no_grad():
   outputs = model.generate(inputs, generation_config=generation_config)