File size: 2,696 Bytes
ab9cd81 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
from fastapi import FastAPI, Request
from pydantic import BaseModel
from typing import List, Optional, Literal
from gradio_client import Client
import uvicorn
import time
import uuid
# ==== Инициализация Gradio Client ====
gr_client = Client("Nymbo/Serverless-TextGen-Hub")
# ==== Функция обращения к нейросети ====
def ask(user_prompt, system_prompt, model):
result = gr_client.predict(
history=[[user_prompt, None]],
system_msg=system_prompt,
max_tokens=512,
temperature=0.7,
top_p=0.95,
freq_penalty=0,
seed=-1,
custom_model=model,
search_term="",
selected_model=model,
api_name="/bot"
)
return result
# ==== FastAPI приложение ====
app = FastAPI()
# ==== Модели запросов/ответов ====
class Message(BaseModel):
role: Literal["user", "assistant", "system"]
content: str
class ChatRequest(BaseModel):
model: str
messages: List[Message]
temperature: Optional[float] = 0.7
top_p: Optional[float] = 0.95
max_tokens: Optional[int] = 512
# остальные параметры можно добавить при необходимости
@app.post("/v1/chat/completions")
async def chat_completion(request: ChatRequest):
# Извлекаем последнее сообщение от пользователя
user_msg = next((m.content for m in reversed(request.messages) if m.role == "user"), None)
system_msg = next((m.content for m in request.messages if m.role == "system"), "You are a helpful AI assistant.")
if not user_msg:
return {"error": "User message not found."}
# Получаем ответ от модели
assistant_reply = ask(user_msg, system_msg, request.model)
# Формируем ответ в стиле OpenAI API
response = {
"id": f"chatcmpl-{uuid.uuid4().hex[:12]}",
"object": "chat.completion",
"created": int(time.time()),
"model": request.model,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": assistant_reply
},
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 0, # Можно вычислить при необходимости
"completion_tokens": 0,
"total_tokens": 0
}
}
return response
# ==== Запуск сервера ====
if __name__ == "__main__":
uvicorn.run("local_openai_server:app", host="0.0.0.0", port=7860, reload=True)
|