|
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
import torch |
|
|
import numpy as np |
|
|
from datasets import Dataset |
|
|
from peft import PeftModel |
|
|
import json |
|
|
|
|
|
checkpoint_path = '' |
|
|
base_model_name = "Qwen2-1.5B-Instruct" |
|
|
SEP_TOKEN = '<sep>' |
|
|
|
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(base_model_name) |
|
|
model = AutoModelForCausalLM.from_pretrained(base_model_name) |
|
|
|
|
|
model = PeftModel.from_pretrained(model, checkpoint_path) |
|
|
|
|
|
context = """""" |
|
|
|
|
|
instruction = """根据下面input的上下文,生成和上下文有关的问答对,并输出到output中。""" |
|
|
input_prompt = f"instruction: {instruction} input: {context} output:" |
|
|
|
|
|
input_ids = tokenizer(input_prompt, return_tensors="pt")['input_ids'] |
|
|
|
|
|
output = model.generate( |
|
|
input_ids=input_ids, |
|
|
max_new_tokens = 64, |
|
|
num_return_sequences=5, |
|
|
pad_token_id=tokenizer.eos_token_id, |
|
|
temperature=0.8, |
|
|
) |
|
|
|
|
|
for i in range(len(output)): |
|
|
output_text = tokenizer.decode(output[i], skip_special_tokens=True)[len(input_prompt):] |
|
|
print(output_text) |