Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, HTTPException | |
| from pydantic import BaseModel | |
| from llama_cpp import Llama | |
| # Initialize the LLM once when the application starts | |
| llm = Llama.from_pretrained( | |
| repo_id="hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF", | |
| filename="llama-3.2-1b-instruct-q4_k_m.gguf" | |
| ) | |
| app = FastAPI() | |
| class ChatRequest(BaseModel): | |
| messages: list[dict] | |
| async def chat_completion(request: ChatRequest): | |
| # print(request.messages) | |
| # test = [{"role": "user", "content": "dsfa"}] | |
| # print(test) | |
| # return "haha" | |
| try: | |
| response = llm.create_chat_completion( | |
| messages = request.messages | |
| ) | |
| return response | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) |