File size: 4,026 Bytes
ab9cd81 8b04581 ab9cd81 21dd7fa ab9cd81 21dd7fa ab9cd81 8b04581 21dd7fa 04c276b 8b04581 21dd7fa 8b04581 21dd7fa 8b04581 21dd7fa 8b04581 21dd7fa 8b04581 21dd7fa 8b04581 21dd7fa 8b04581 21dd7fa ab9cd81 8b04581 ab9cd81 8b04581 ab9cd81 8b04581 ab9cd81 8b04581 ab9cd81 21dd7fa 8b04581 21dd7fa 8b04581 21dd7fa 8b04581 21dd7fa ab9cd81 8b04581 ab9cd81 8b04581 ab9cd81 21dd7fa ab9cd81 21dd7fa ab9cd81 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
from fastapi import FastAPI, Request
from fastapi.responses import PlainTextResponse
from pydantic import BaseModel
from typing import List, Optional, Literal
from gradio_client import Client
import uvicorn
import time
import uuid
import logging
# === Настройка логгера ===
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
logger = logging.getLogger(__name__)
# === Задаём модель (используется глобально) ===
AI_MODEL = "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO"
# === Подключаемся к Gradio Space по URL ===
try:
gr_client = Client("Nymbo/Serverless-TextGen-Hub")
logger.info("✅ Успешно подключено к Gradio Space")
except Exception as e:
logger.error(f"❌ Ошибка подключения к Gradio Client: {e}")
gr_client = None
# === Функция вызова модели ===
def ask(user_prompt, system_prompt):
if not gr_client:
return "[Ошибка: Gradio Client не инициализирован]"
try:
result = gr_client.predict(
history=[[user_prompt, None]],
system_msg=system_prompt,
max_tokens=512,
temperature=0.7,
top_p=0.95,
freq_penalty=0,
seed=-1,
custom_model=AI_MODEL,
search_term="",
selected_model=AI_MODEL,
api_name="/bot"
)
return result
except Exception as e:
logger.error(f"❌ Ошибка при вызове модели: {e}")
return f"[Ошибка: {str(e)}]"
# === FastAPI приложение ===
app = FastAPI()
# === Модели запроса ===
class Message(BaseModel):
role: Literal["user", "assistant", "system"]
content: str
class ChatRequest(BaseModel):
model: str # Заглушка — не используется
messages: List[Message]
temperature: Optional[float] = 0.7
top_p: Optional[float] = 0.95
max_tokens: Optional[int] = 512
# === Корневой маршрут "/" ===
@app.get("/", response_class=PlainTextResponse)
async def root():
return "Proxy free test"
# === Основной маршрут OpenAI-совместимый ===
@app.post("/v1/chat/completions")
async def chat_completion(request: Request):
headers = dict(request.headers)
body = await request.body()
logger.info("📥 Запрос получен")
logger.info(f"🔸 Заголовки: {headers}")
logger.info(f"🔸 Тело: {body.decode('utf-8')}")
try:
data = await request.json()
chat_request = ChatRequest(**data)
except Exception as e:
logger.error(f"❌ Ошибка разбора JSON: {e}")
return {"error": "Некорректный JSON"}
# Игнорируем model из запроса и используем глобальный AI_MODEL
user_msg = next((m.content for m in reversed(chat_request.messages) if m.role == "user"), None)
system_msg = next((m.content for m in chat_request.messages if m.role == "system"), "You are a helpful AI assistant.")
if not user_msg:
return {"error": "User message not found."}
assistant_reply = ask(user_msg, system_msg)
response = {
"id": f"chatcmpl-{uuid.uuid4().hex[:12]}",
"object": "chat.completion",
"created": int(time.time()),
"model": AI_MODEL,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": assistant_reply
},
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 0,
"completion_tokens": 0,
"total_tokens": 0
}
}
return response
# === Запуск сервера ===
if __name__ == "__main__":
uvicorn.run("local_openai_server:app", host="0.0.0.0", port=7860, reload=True)
|