prox / main.py
EmoCube's picture
Update main.py
04c276b verified
raw
history blame
4.03 kB
from fastapi import FastAPI, Request
from fastapi.responses import PlainTextResponse
from pydantic import BaseModel
from typing import List, Optional, Literal
from gradio_client import Client
import uvicorn
import time
import uuid
import logging
# === Настройка логгера ===
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
logger = logging.getLogger(__name__)
# === Задаём модель (используется глобально) ===
AI_MODEL = "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO"
# === Подключаемся к Gradio Space по URL ===
try:
gr_client = Client("Nymbo/Serverless-TextGen-Hub")
logger.info("✅ Успешно подключено к Gradio Space")
except Exception as e:
logger.error(f"❌ Ошибка подключения к Gradio Client: {e}")
gr_client = None
# === Функция вызова модели ===
def ask(user_prompt, system_prompt):
if not gr_client:
return "[Ошибка: Gradio Client не инициализирован]"
try:
result = gr_client.predict(
history=[[user_prompt, None]],
system_msg=system_prompt,
max_tokens=512,
temperature=0.7,
top_p=0.95,
freq_penalty=0,
seed=-1,
custom_model=AI_MODEL,
search_term="",
selected_model=AI_MODEL,
api_name="/bot"
)
return result
except Exception as e:
logger.error(f"❌ Ошибка при вызове модели: {e}")
return f"[Ошибка: {str(e)}]"
# === FastAPI приложение ===
app = FastAPI()
# === Модели запроса ===
class Message(BaseModel):
role: Literal["user", "assistant", "system"]
content: str
class ChatRequest(BaseModel):
model: str # Заглушка — не используется
messages: List[Message]
temperature: Optional[float] = 0.7
top_p: Optional[float] = 0.95
max_tokens: Optional[int] = 512
# === Корневой маршрут "/" ===
@app.get("/", response_class=PlainTextResponse)
async def root():
return "Proxy free test"
# === Основной маршрут OpenAI-совместимый ===
@app.post("/v1/chat/completions")
async def chat_completion(request: Request):
headers = dict(request.headers)
body = await request.body()
logger.info("📥 Запрос получен")
logger.info(f"🔸 Заголовки: {headers}")
logger.info(f"🔸 Тело: {body.decode('utf-8')}")
try:
data = await request.json()
chat_request = ChatRequest(**data)
except Exception as e:
logger.error(f"❌ Ошибка разбора JSON: {e}")
return {"error": "Некорректный JSON"}
# Игнорируем model из запроса и используем глобальный AI_MODEL
user_msg = next((m.content for m in reversed(chat_request.messages) if m.role == "user"), None)
system_msg = next((m.content for m in chat_request.messages if m.role == "system"), "You are a helpful AI assistant.")
if not user_msg:
return {"error": "User message not found."}
assistant_reply = ask(user_msg, system_msg)
response = {
"id": f"chatcmpl-{uuid.uuid4().hex[:12]}",
"object": "chat.completion",
"created": int(time.time()),
"model": AI_MODEL,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": assistant_reply
},
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 0,
"completion_tokens": 0,
"total_tokens": 0
}
}
return response
# === Запуск сервера ===
if __name__ == "__main__":
uvicorn.run("local_openai_server:app", host="0.0.0.0", port=7860, reload=True)