|
|
from fastapi import FastAPI, Request |
|
|
from fastapi.responses import PlainTextResponse |
|
|
from pydantic import BaseModel |
|
|
from typing import List, Optional, Literal |
|
|
from gradio_client import Client |
|
|
import uvicorn |
|
|
import time |
|
|
import uuid |
|
|
import logging |
|
|
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s") |
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
AI_MODEL = "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO" |
|
|
|
|
|
|
|
|
try: |
|
|
gr_client = Client("Nymbo/Serverless-TextGen-Hub") |
|
|
logger.info("✅ Успешно подключено к Gradio Space") |
|
|
except Exception as e: |
|
|
logger.error(f"❌ Ошибка подключения к Gradio Client: {e}") |
|
|
gr_client = None |
|
|
|
|
|
|
|
|
def ask(user_prompt, system_prompt): |
|
|
if not gr_client: |
|
|
return "[Ошибка: Gradio Client не инициализирован]" |
|
|
|
|
|
try: |
|
|
result = gr_client.predict( |
|
|
history=[[user_prompt, None]], |
|
|
system_msg=system_prompt, |
|
|
max_tokens=512, |
|
|
temperature=0.7, |
|
|
top_p=0.95, |
|
|
freq_penalty=0, |
|
|
seed=-1, |
|
|
custom_model=AI_MODEL, |
|
|
search_term="", |
|
|
selected_model=AI_MODEL, |
|
|
api_name="/bot" |
|
|
) |
|
|
return result |
|
|
except Exception as e: |
|
|
logger.error(f"❌ Ошибка при вызове модели: {e}") |
|
|
return f"[Ошибка: {str(e)}]" |
|
|
|
|
|
|
|
|
app = FastAPI() |
|
|
|
|
|
|
|
|
class Message(BaseModel): |
|
|
role: Literal["user", "assistant", "system"] |
|
|
content: str |
|
|
|
|
|
class ChatRequest(BaseModel): |
|
|
model: str |
|
|
messages: List[Message] |
|
|
temperature: Optional[float] = 0.7 |
|
|
top_p: Optional[float] = 0.95 |
|
|
max_tokens: Optional[int] = 512 |
|
|
|
|
|
|
|
|
@app.get("/", response_class=PlainTextResponse) |
|
|
async def root(): |
|
|
return "Proxy free test" |
|
|
|
|
|
|
|
|
@app.post("/v1/chat/completions") |
|
|
async def chat_completion(request: Request): |
|
|
headers = dict(request.headers) |
|
|
body = await request.body() |
|
|
|
|
|
logger.info("📥 Запрос получен") |
|
|
logger.info(f"🔸 Заголовки: {headers}") |
|
|
logger.info(f"🔸 Тело: {body.decode('utf-8')}") |
|
|
|
|
|
try: |
|
|
data = await request.json() |
|
|
chat_request = ChatRequest(**data) |
|
|
except Exception as e: |
|
|
logger.error(f"❌ Ошибка разбора JSON: {e}") |
|
|
return {"error": "Некорректный JSON"} |
|
|
|
|
|
|
|
|
user_msg = next((m.content for m in reversed(chat_request.messages) if m.role == "user"), None) |
|
|
system_msg = next((m.content for m in chat_request.messages if m.role == "system"), "You are a helpful AI assistant.") |
|
|
|
|
|
if not user_msg: |
|
|
return {"error": "User message not found."} |
|
|
|
|
|
assistant_reply = ask(user_msg, system_msg) |
|
|
|
|
|
response = { |
|
|
"id": f"chatcmpl-{uuid.uuid4().hex[:12]}", |
|
|
"object": "chat.completion", |
|
|
"created": int(time.time()), |
|
|
"model": AI_MODEL, |
|
|
"choices": [ |
|
|
{ |
|
|
"index": 0, |
|
|
"message": { |
|
|
"role": "assistant", |
|
|
"content": assistant_reply |
|
|
}, |
|
|
"finish_reason": "stop" |
|
|
} |
|
|
], |
|
|
"usage": { |
|
|
"prompt_tokens": 0, |
|
|
"completion_tokens": 0, |
|
|
"total_tokens": 0 |
|
|
} |
|
|
} |
|
|
|
|
|
return response |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
uvicorn.run("local_openai_server:app", host="0.0.0.0", port=7860, reload=True) |
|
|
|