File size: 3,525 Bytes
8087873
 
 
 
 
 
be4b518
4e0d47d
 
56f6207
be4b518
 
 
8087873
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be4b518
8087873
 
 
 
 
be4b518
 
 
 
 
 
 
 
 
 
 
 
8087873
 
 
 
 
be4b518
8087873
be4b518
8087873
 
be4b518
8087873
be4b518
 
 
 
 
 
 
 
 
 
8087873
 
be4b518
8087873
 
 
 
 
56f6207
8087873
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import os
import gradio as gr
from llama_cpp import Llama
import requests
from tqdm import tqdm

# モデル情報
MODEL_URL = "https://huggingface.co/mradermacher/Ultiima-78B-v2-GGUF/resolve/main/Ultiima-78B-v2.Q2_K.gguf"
MODEL_PATH = "models/Ultiima-78B-v2.Q2_K.gguf"

# システムプロンプト(自由に変更してください)
SYSTEM_PROMPT = "あなたは丁寧で知的な日本語AIアシスタントです。ユーザーの質問にわかりやすく答えてください。"

def download_model(url=MODEL_URL, path=MODEL_PATH):
    os.makedirs(os.path.dirname(path), exist_ok=True)
    if os.path.exists(path):
        print("モデルファイルは既に存在します。")
        return
    print(f"モデルをダウンロード中: {url}")
    response = requests.get(url, stream=True)
    total = int(response.headers.get('content-length', 0))
    with open(path, 'wb') as file, tqdm(
        desc=path,
        total=total,
        unit='iB',
        unit_scale=True,
        unit_divisor=1024,
    ) as bar:
        for data in response.iter_content(chunk_size=1024):
            size = file.write(data)
            bar.update(size)
    print("モデルのダウンロードが完了しました。")

# モデルダウンロード
download_model()

# モデルロード
llm = Llama(model_path=MODEL_PATH)

def build_prompt(messages):
    prompt = f"<|system|>\n{SYSTEM_PROMPT}\n"
    for msg in messages:
        if msg["role"] == "user":
            prompt += f"<|user|>\n{msg['content']}\n"
        elif msg["role"] == "assistant":
            prompt += f"<|assistant|>\n{msg['content']}\n"
    prompt += "<|assistant|>\n"
    return prompt

def generate_response(messages, temperature, top_p, max_tokens):
    prompt = build_prompt(messages)
    response = llm.create_completion(
        prompt=prompt,
        temperature=temperature,
        top_p=top_p,
        max_tokens=max_tokens,
        stop=["<|user|>", "<|system|>", "<|assistant|>"]
    )
    return response["choices"][0]["text"].strip()

def chat_interface(user_input, history, temperature, top_p, max_tokens):
    if history is None or len(history) == 0:
        history = []
    history.append({"role": "user", "content": user_input})
    response = generate_response(history, temperature, top_p, max_tokens)
    history.append({"role": "assistant", "content": response})
    
    chat_display = []
    for msg in history:
        role = "ユーザー" if msg["role"] == "user" else "AI"
        chat_display.append((role, msg["content"]))
    
    return chat_display, history

with gr.Blocks() as demo:
    gr.Markdown("# Saka-14B GGUF 日本語チャット(システムプロンプト+履歴対応)")
    chatbot = gr.Chatbot()
    user_input = gr.Textbox(placeholder="質問をどうぞ", label="あなたの入力")
    
    temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.05, label="Temperature(創造性)")
    top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.8, step=0.05, label="Top-p(確率の上位何%から生成するか)")
    max_tokens = gr.Slider(minimum=16, maximum=2048, value=512, step=16, label="最大トークン数")
    
    history = gr.State([])
    
    submit_btn = gr.Button("送信")
    submit_btn.click(chat_interface, inputs=[user_input, history, temperature, top_p, max_tokens], outputs=[chatbot, history])
    
    user_input.submit(chat_interface, inputs=[user_input, history, temperature, top_p, max_tokens], outputs=[chatbot, history])

demo.launch()