from fastapi import FastAPI, Request, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import os
import httpx
import time
import uuid
from collections import defaultdict
from typing import Optional
from .history import save_conversation, get_conversation_history
from .papers import get_relevant_papers
from .rag_tracker import create_rag_pipeline

app = FastAPI(title="Eidolon Tutor API", version="0.2.0")

# CORS for local development and cross-origin requests
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Simple in-memory rate limiter (IP-based)
_rate_limit_store = defaultdict(list)
RATE_LIMIT_REQUESTS = int(os.getenv("RATE_LIMIT_REQUESTS", "10"))
RATE_LIMIT_WINDOW = int(os.getenv("RATE_LIMIT_WINDOW", "60"))  # seconds


def check_rate_limit(client_ip: str) -> bool:
    """Simple sliding window rate limiter."""
    now = time.time()
    window_start = now - RATE_LIMIT_WINDOW
    # Clean old requests
    _rate_limit_store[client_ip] = [
        req_time for req_time in _rate_limit_store[client_ip] if req_time > window_start
    ]
    if len(_rate_limit_store[client_ip]) >= RATE_LIMIT_REQUESTS:
        return False
    _rate_limit_store[client_ip].append(now)
    return True


class AskIn(BaseModel):
    prompt: str
    max_tokens: Optional[int] = 512
    temperature: Optional[float] = 0.7
    session_id: Optional[str] = None  # for conversation history
    mode: Optional[str] = "standard"  # learning mode: standard, socratic, eli5, technical, analogy, code
    difficulty: Optional[int] = 3  # 1-5 difficulty scale
    persona: Optional[str] = "friendly"  # friendly, strict, enthusiastic, professional


class AskOut(BaseModel):
    result: Optional[str] = None
    error: Optional[str] = None
    source: str = "demo"  # "demo", "inference", or "error"
    session_id: str = ""  # returned session ID
    # Optional research data to support the response (citations, RAG pipeline, attention, etc.)
    research_data: Optional[dict] = None


def get_demo_response(prompt: str, mode: str = "standard", difficulty: int = 3, persona: str = "friendly") -> str:
    """Generate deterministic demo responses with learning modes and personalization."""
    p = prompt.strip().lower()
    if not p:
        return "Please enter a question for the demo tutor."
    
    # Persona prefixes
    persona_styles = {
        "friendly": "😊 ",
        "strict": "📚 ",
        "enthusiastic": "🎉 ",
        "professional": "🎓 ",
        "playful": "🎮 "
    }
    prefix = persona_styles.get(persona, "")
    
    # Mode-specific responses
    if mode == "socratic":
        return f"{prefix}**Socratic Mode** 🤔\n\nGreat question! Let me guide you with some questions:\n\n1. What do you already know about *\"{prompt}\"*?\n2. Can you think of a similar concept you're familiar with?\n3. What would happen if we changed one key variable?\n4. How would you explain this to someone younger?\n\n[Demo mode - these questions would adapt based on your actual responses]"
    
    elif mode == "eli5":
        return f"{prefix}**ELI5 Mode** 👶\n\nOkay, imagine *\"{prompt}\"* like this:\n\nThink of it like building with LEGO blocks. Each block is a simple piece, but when you put them together in the right way, you can build amazing things!\n\n[Demo mode - real responses would use age-appropriate analogies]"
    
    elif mode == "technical":
        difficulty_markers = ["Beginner", "Intermediate", "Advanced", "Expert", "Research-Level"]
        level = difficulty_markers[min(difficulty - 1, 4)]
        return f"{prefix}**Technical Deep-Dive** 🔬 (Level: {level})\n\n**Topic:** {prompt}\n\n**Core Concepts:**\n- Fundamental principles and definitions\n- Mathematical/logical foundations\n- Implementation details and edge cases\n- Performance considerations\n- Common pitfalls and best practices\n\n[Demo mode - depth would match difficulty level {difficulty}/5]"
    
    elif mode == "analogy":
        analogies = [
            "a restaurant kitchen (preparation → cooking → serving)",
            "a postal system (sending → routing → delivery)",
            "a factory assembly line (input → processing → output)",
            "a team sport (strategy → execution → scoring)"
        ]
        import random
        random.seed(len(prompt))  # deterministic
        analogy = random.choice(analogies)
        return f"{prefix}**Analogy Master** 🎭\n\nLet me explain *\"{prompt}\"* using an analogy:\n\nIt's like {analogy}.\n\nEach step has a purpose, and when they work together, magic happens!\n\n[Demo mode - analogies would be carefully crafted for each topic]"
    
    elif mode == "code":
        return f"{prefix}**Code Mentor** 💻\n\n```python\n# Pseudocode for: {prompt}\n\nclass Solution:\n    def solve(self, problem):\n        # Step 1: Understand the requirements\n        requirements = self.analyze(problem)\n        \n        # Step 2: Break down into smaller pieces\n        components = self.decompose(requirements)\n        \n        # Step 3: Implement each piece\n        for component in components:\n            self.implement(component)\n        \n        # Step 4: Test and refine\n        return self.test_and_validate()\n```\n\n[Demo mode - would provide working code examples]"
    
    # Standard mode (fallback)
    if "explain" in p or "what is" in p:
        return f"{prefix}**Standard Explanation:**\n\nHere's a concise explanation for *\"{prompt}\"*:\n\n• **Key Point 1:** Main concept overview\n• **Key Point 2:** Why it matters\n• **Key Point 3:** How it's used in practice\n\n[Demo mode - set DEMO_MODE=1 or configure INFERENCE_API_URL]"
    
    if "code" in p or "how to" in p or "implement" in p:
        return f"{prefix}**Implementation Guide:**\n\n**Problem:** {prompt}\n\n**Approach:**\n1. Define the requirements clearly\n2. Choose the right data structures\n3. Write clean, testable code\n4. Handle edge cases\n\n[Demo mode]"
    
    return f"{prefix}**Response:**\n\nI understood your prompt: *\"{prompt}\"*.\n\nThis is a demo response. Try different **learning modes** (Socratic, ELI5, Technical, Analogy, Code) for varied approaches!\n\n[Demo mode]"


async def call_inference_api(
    prompt: str, api_url: str, api_key: Optional[str], max_tokens: int, temperature: float
) -> dict:
    """Call external inference API with retries and timeout."""
    payload = {
        "inputs": prompt,
        "parameters": {"max_new_tokens": max_tokens, "temperature": temperature},
    }
    headers = {"Accept": "application/json", "Content-Type": "application/json"}
    if api_key:
        headers["Authorization"] = f"Bearer {api_key}"

    # Retry logic: 2 attempts with exponential backoff
    for attempt in range(2):
        try:
            async with httpx.AsyncClient(timeout=60.0) as client:
                resp = await client.post(api_url, json=payload, headers=headers)
                resp.raise_for_status()
                data = resp.json()

                # Normalize response
                if isinstance(data, dict) and "error" in data:
                    return {"error": data.get("error"), "source": "inference"}
                if isinstance(data, list) and len(data) > 0:
                    first = data[0]
                    if isinstance(first, dict) and "generated_text" in first:
                        return {"result": first["generated_text"], "source": "inference"}
                    if isinstance(first, str):
                        return {"result": first, "source": "inference"}
                if isinstance(data, dict) and "generated_text" in data:
                    return {"result": data["generated_text"], "source": "inference"}
                return {"result": str(data), "source": "inference"}

        except httpx.HTTPError as e:
            if attempt == 0:
                await httpx.AsyncClient().aclose()
                time.sleep(1)  # backoff
                continue
            return {"error": f"Inference API failed after retries: {str(e)}", "source": "error"}

    return {"error": "Inference API failed", "source": "error"}


@app.post("/", response_model=AskOut)
async def ask(in_data: AskIn, request: Request):
    """
    Main API endpoint: accepts a prompt and returns a response.
    
    Supports:
    - Demo mode (DEMO_MODE=1): returns canned responses
    - External inference (INFERENCE_API_URL set): calls hosted model
    - Rate limiting (configurable via RATE_LIMIT_REQUESTS/RATE_LIMIT_WINDOW)
    - Conversation history (optional session_id)
    """
    # Rate limiting
    client_ip = request.client.host if request.client else "unknown"
    if not check_rate_limit(client_ip):
        raise HTTPException(status_code=429, detail="Rate limit exceeded. Try again later.")

    # Generate or use provided session ID
    session_id = in_data.session_id or str(uuid.uuid4())

    api_url = os.environ.get("INFERENCE_API_URL")
    api_key = os.environ.get("INFERENCE_API_KEY")
    demo_mode = os.environ.get("DEMO_MODE", "0").lower() in ("1", "true", "yes")

    # Demo mode
    if demo_mode or not api_url:
        result_text = get_demo_response(in_data.prompt, in_data.mode, in_data.difficulty, in_data.persona)
        save_conversation(session_id, in_data.prompt, result_text, "demo")
        
        # Attach relevant paper citations for the prompt/mode
        papers = get_relevant_papers(in_data.prompt, in_data.mode)
        
        # Generate RAG pipeline visualization
        rag_pipeline = create_rag_pipeline(in_data.prompt, in_data.mode, result_text)
        
        return AskOut(
            result=result_text, 
            source="demo", 
            session_id=session_id, 
            research_data={
                "papers": papers,
                "rag_pipeline": rag_pipeline
            }
        )

    # Call inference API
    result = await call_inference_api(
        in_data.prompt, api_url, api_key, in_data.max_tokens, in_data.temperature
    )
    
    # Save to history
    if result.get("result"):
        save_conversation(session_id, in_data.prompt, result["result"], result.get("source", "inference"))

    # Add research citations and RAG pipeline for inference responses as well
    papers = get_relevant_papers(in_data.prompt, in_data.mode)
    rag_pipeline = create_rag_pipeline(in_data.prompt, in_data.mode, result.get("result", ""))
    
    out_payload = {
        **result, 
        "session_id": session_id, 
        "research_data": {
            "papers": papers,
            "rag_pipeline": rag_pipeline
        }
    }
    return AskOut(**out_payload)


@app.get("/history/{session_id}")
async def get_history(session_id: str, limit: int = 10):
    """Retrieve conversation history for a session."""
    return {"session_id": session_id, "history": get_conversation_history(session_id, limit)}