Gemi / app.py
Sakalti's picture
Update app.py
4e0d47d verified
import os
import gradio as gr
from llama_cpp import Llama
import requests
from tqdm import tqdm
# モデル情報
MODEL_URL = "https://huggingface.co/mradermacher/Ultiima-78B-v2-GGUF/resolve/main/Ultiima-78B-v2.Q2_K.gguf"
MODEL_PATH = "models/Ultiima-78B-v2.Q2_K.gguf"
# システムプロンプト(自由に変更してください)
SYSTEM_PROMPT = "あなたは丁寧で知的な日本語AIアシスタントです。ユーザーの質問にわかりやすく答えてください。"
def download_model(url=MODEL_URL, path=MODEL_PATH):
os.makedirs(os.path.dirname(path), exist_ok=True)
if os.path.exists(path):
print("モデルファイルは既に存在します。")
return
print(f"モデルをダウンロード中: {url}")
response = requests.get(url, stream=True)
total = int(response.headers.get('content-length', 0))
with open(path, 'wb') as file, tqdm(
desc=path,
total=total,
unit='iB',
unit_scale=True,
unit_divisor=1024,
) as bar:
for data in response.iter_content(chunk_size=1024):
size = file.write(data)
bar.update(size)
print("モデルのダウンロードが完了しました。")
# モデルダウンロード
download_model()
# モデルロード
llm = Llama(model_path=MODEL_PATH)
def build_prompt(messages):
prompt = f"<|system|>\n{SYSTEM_PROMPT}\n"
for msg in messages:
if msg["role"] == "user":
prompt += f"<|user|>\n{msg['content']}\n"
elif msg["role"] == "assistant":
prompt += f"<|assistant|>\n{msg['content']}\n"
prompt += "<|assistant|>\n"
return prompt
def generate_response(messages, temperature, top_p, max_tokens):
prompt = build_prompt(messages)
response = llm.create_completion(
prompt=prompt,
temperature=temperature,
top_p=top_p,
max_tokens=max_tokens,
stop=["<|user|>", "<|system|>", "<|assistant|>"]
)
return response["choices"][0]["text"].strip()
def chat_interface(user_input, history, temperature, top_p, max_tokens):
if history is None or len(history) == 0:
history = []
history.append({"role": "user", "content": user_input})
response = generate_response(history, temperature, top_p, max_tokens)
history.append({"role": "assistant", "content": response})
chat_display = []
for msg in history:
role = "ユーザー" if msg["role"] == "user" else "AI"
chat_display.append((role, msg["content"]))
return chat_display, history
with gr.Blocks() as demo:
gr.Markdown("# Saka-14B GGUF 日本語チャット(システムプロンプト+履歴対応)")
chatbot = gr.Chatbot()
user_input = gr.Textbox(placeholder="質問をどうぞ", label="あなたの入力")
temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.05, label="Temperature(創造性)")
top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.8, step=0.05, label="Top-p(確率の上位何%から生成するか)")
max_tokens = gr.Slider(minimum=16, maximum=2048, value=512, step=16, label="最大トークン数")
history = gr.State([])
submit_btn = gr.Button("送信")
submit_btn.click(chat_interface, inputs=[user_input, history, temperature, top_p, max_tokens], outputs=[chatbot, history])
user_input.submit(chat_interface, inputs=[user_input, history, temperature, top_p, max_tokens], outputs=[chatbot, history])
demo.launch()