|
|
import os |
|
|
import gradio as gr |
|
|
from llama_cpp import Llama |
|
|
import requests |
|
|
from tqdm import tqdm |
|
|
|
|
|
|
|
|
MODEL_URL = "https://huggingface.co/mradermacher/Ultiima-78B-v2-GGUF/resolve/main/Ultiima-78B-v2.Q2_K.gguf" |
|
|
MODEL_PATH = "models/Ultiima-78B-v2.Q2_K.gguf" |
|
|
|
|
|
|
|
|
SYSTEM_PROMPT = "あなたは丁寧で知的な日本語AIアシスタントです。ユーザーの質問にわかりやすく答えてください。" |
|
|
|
|
|
def download_model(url=MODEL_URL, path=MODEL_PATH): |
|
|
os.makedirs(os.path.dirname(path), exist_ok=True) |
|
|
if os.path.exists(path): |
|
|
print("モデルファイルは既に存在します。") |
|
|
return |
|
|
print(f"モデルをダウンロード中: {url}") |
|
|
response = requests.get(url, stream=True) |
|
|
total = int(response.headers.get('content-length', 0)) |
|
|
with open(path, 'wb') as file, tqdm( |
|
|
desc=path, |
|
|
total=total, |
|
|
unit='iB', |
|
|
unit_scale=True, |
|
|
unit_divisor=1024, |
|
|
) as bar: |
|
|
for data in response.iter_content(chunk_size=1024): |
|
|
size = file.write(data) |
|
|
bar.update(size) |
|
|
print("モデルのダウンロードが完了しました。") |
|
|
|
|
|
|
|
|
download_model() |
|
|
|
|
|
|
|
|
llm = Llama(model_path=MODEL_PATH) |
|
|
|
|
|
def build_prompt(messages): |
|
|
prompt = f"<|system|>\n{SYSTEM_PROMPT}\n" |
|
|
for msg in messages: |
|
|
if msg["role"] == "user": |
|
|
prompt += f"<|user|>\n{msg['content']}\n" |
|
|
elif msg["role"] == "assistant": |
|
|
prompt += f"<|assistant|>\n{msg['content']}\n" |
|
|
prompt += "<|assistant|>\n" |
|
|
return prompt |
|
|
|
|
|
def generate_response(messages, temperature, top_p, max_tokens): |
|
|
prompt = build_prompt(messages) |
|
|
response = llm.create_completion( |
|
|
prompt=prompt, |
|
|
temperature=temperature, |
|
|
top_p=top_p, |
|
|
max_tokens=max_tokens, |
|
|
stop=["<|user|>", "<|system|>", "<|assistant|>"] |
|
|
) |
|
|
return response["choices"][0]["text"].strip() |
|
|
|
|
|
def chat_interface(user_input, history, temperature, top_p, max_tokens): |
|
|
if history is None or len(history) == 0: |
|
|
history = [] |
|
|
history.append({"role": "user", "content": user_input}) |
|
|
response = generate_response(history, temperature, top_p, max_tokens) |
|
|
history.append({"role": "assistant", "content": response}) |
|
|
|
|
|
chat_display = [] |
|
|
for msg in history: |
|
|
role = "ユーザー" if msg["role"] == "user" else "AI" |
|
|
chat_display.append((role, msg["content"])) |
|
|
|
|
|
return chat_display, history |
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("# Saka-14B GGUF 日本語チャット(システムプロンプト+履歴対応)") |
|
|
chatbot = gr.Chatbot() |
|
|
user_input = gr.Textbox(placeholder="質問をどうぞ", label="あなたの入力") |
|
|
|
|
|
temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.05, label="Temperature(創造性)") |
|
|
top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.8, step=0.05, label="Top-p(確率の上位何%から生成するか)") |
|
|
max_tokens = gr.Slider(minimum=16, maximum=2048, value=512, step=16, label="最大トークン数") |
|
|
|
|
|
history = gr.State([]) |
|
|
|
|
|
submit_btn = gr.Button("送信") |
|
|
submit_btn.click(chat_interface, inputs=[user_input, history, temperature, top_p, max_tokens], outputs=[chatbot, history]) |
|
|
|
|
|
user_input.submit(chat_interface, inputs=[user_input, history, temperature, top_p, max_tokens], outputs=[chatbot, history]) |
|
|
|
|
|
demo.launch() |