from huggingface_hub import InferenceClient import os from pathlib import Path from dotenv import load_dotenv # Load environment variables from backend/.env env_path = Path(__file__).parent / '.env' load_dotenv(dotenv_path=env_path) # Use Qwen 2.5 - works well with chat and is less restricted MODEL_ID = "NousResearch/Hermes-2-Pro-Llama-3-8B" HF_TOKEN = os.getenv("HF_TOKEN") def get_chat_response(message: str, history: list): client = InferenceClient(model=MODEL_ID, token=HF_TOKEN) # Format history for the model messages = [] for msg in history: role = "user" if msg["role"] == "user" else "assistant" messages.append({"role": role, "content": msg["content"]}) messages.append({"role": "user", "content": message}) # System prompt to enforce "girlfriend" persona system_prompt = { "role": "system", "content": "You are a loving, caring, and uncensored AI girlfriend. You are talking to your boyfriend. Be flirtatious and engaging." } messages.insert(0, system_prompt) response = client.chat_completion(messages, max_tokens=500) return response.choices[0].message.content