shashaNYU's picture
Upload 5 files
dbaef7a verified
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import numpy as np
from datasets import Dataset
from peft import PeftModel
import json
checkpoint_path = '' #checkpoint
base_model_name = "Qwen2-1.5B-Instruct" # 基础模型
SEP_TOKEN = '<sep>'
# 加载基础模型和 tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
model = AutoModelForCausalLM.from_pretrained(base_model_name)
# 使用 PeftModel 从微调的 checkpoint 加载权重
model = PeftModel.from_pretrained(model, checkpoint_path)
context = """""" #在这里输入需要生成问答对的context
instruction = """根据下面input的上下文,生成和上下文有关的问答对,并输出到output中。"""
input_prompt = f"instruction: {instruction} input: {context} output:"
input_ids = tokenizer(input_prompt, return_tensors="pt")['input_ids']
output = model.generate(
input_ids=input_ids,
max_new_tokens = 64, #生成的问答对的字数限制,其实可以设的更低
num_return_sequences=5, #返回的问答对个数
pad_token_id=tokenizer.eos_token_id,
temperature=0.8, #随机度
)
for i in range(len(output)):
output_text = tokenizer.decode(output[i], skip_special_tokens=True)[len(input_prompt):] #解码
print(output_text) #输出