Spaces:
Sleeping
Sleeping
File size: 779 Bytes
228da36 1919d23 228da36 1919d23 228da36 1919d23 228da36 1919d23 228da36 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from llama_cpp import Llama
# Initialize the LLM once when the application starts
llm = Llama.from_pretrained(
repo_id="hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF",
filename="llama-3.2-1b-instruct-q4_k_m.gguf"
)
app = FastAPI()
class ChatRequest(BaseModel):
messages: list[dict]
@app.post("/chat")
async def chat_completion(request: ChatRequest):
# print(request.messages)
# test = [{"role": "user", "content": "dsfa"}]
# print(test)
# return "haha"
try:
response = llm.create_chat_completion(
messages = request.messages
)
return response
except Exception as e:
raise HTTPException(status_code=500, detail=str(e)) |