import os
import requests
from huggingface_hub import hf_hub_download
from langchain.llms.base import LLM
from langchain.chains import RetrievalQA
from langchain_core.prompts import PromptTemplate
from typing import Any, List, Optional, Mapping

# --- Custom LangChain LLM Wrapper for Hybrid Approach ---
class HybridLLM(LLM):
    api_url: str = ""
    local_llm: Any = None
    
    @property
    def _llm_type(self) -> str:
        return "hybrid_llm"

    def _call(self, prompt: str, stop: Optional[List[str]] = None, **kwargs: Any) -> str:
        # 1. Try Colab API first
        if self.api_url:
            try:
                print(f"🌐 Calling Colab API: {self.api_url}")
                response = requests.post(
                    f"{self.api_url}/generate",
                    json={"prompt": prompt, "max_tokens": 512},
                    timeout=30  # 30s timeout
                )
                if response.status_code == 200:
                    return response.json()["response"]
                else:
                    print(f"⚠️ API Error {response.status_code}: {response.text}")
            except Exception as e:
                print(f"⚠️ API Connection Failed: {e}")
        
        # 2. Fallback to Local GGUF
        if self.local_llm:
            print("💻 Using Local GGUF Fallback...")
            # Llama-cpp-python expects prompt in specific format or raw
            # We'll pass the prompt directly
            output = self.local_llm(
                prompt, 
                max_tokens=512, 
                stop=["<|im_end|>", "User:", "Input:"], 
                echo=False
            )
            return output['choices'][0]['text']
        
        return "❌ Error: No working LLM available (API failed and no local model)."

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        return {"api_url": self.api_url}

class LLMClient:
    def __init__(self, vector_store=None):
        """
        Initialize Hybrid LLM Client
        """
        self.vector_store = vector_store
        self.api_url = os.environ.get("COLAB_API_URL", "") # Get from Env Var
        self.local_llm = None
        
        # Initialize Local GGUF (always load as backup or if API missing)
        # We load it lazily or eagerly depending on memory. 
        # Since user has 16GB RAM, we can load a 2B model easily.
        try:
            print("📂 Loading Local Qwen3-VL-2B-Thinking (GGUF)...")
            from llama_cpp import Llama
            
            model_name = "Qwen/Qwen2.5-VL-3B-Thinking-GGUF" # Fallback to a known working GGUF if Qwen3 not found, but user asked for Qwen3
            # NOTE: As of now, Qwen3-VL GGUF might be under a specific repo. 
            # Let's use a generic search or specific path if known.
            # User specified: Qwen/Qwen3-VL-2B-Thinking-GGUF
            # We will try to download it.
            
            repo_id = "Qwen/Qwen3-VL-2B-Thinking-GGUF" 
            
            model_repo = "Qwen/Qwen3-VL-2B-Thinking-GGUF" 
            filename = "Qwen3VL-2B-Thinking-Q4_K_M.gguf"
            
            model_path = hf_hub_download(
                repo_id=model_repo, 
                filename=filename
            )
            
            self.local_llm = Llama(
                model_path=model_path,
                n_ctx=2048,
                n_threads=2, # Use 2 vCPUs
                verbose=False
            )
            print("✅ Local GGUF Model Ready!")
            
        except Exception as e:
            print(f"⚠️ Could not load local GGUF: {e}")

        # Create Hybrid LangChain Wrapper
        self.llm = HybridLLM(api_url=self.api_url, local_llm=self.local_llm)

    def analyze(self, text, context_chunks=None):
        """
        Analyze text using LangChain RetrievalQA
        """
        if not self.vector_store:
            return "❌ Vector Store not initialized."

        # Custom Prompt Template
        template = """<|im_start|>system
You are a cybersecurity expert. Task: Determine whether the input is 'PHISHING' or 'BENIGN' (Safe). 
Respond in the following format:
LABEL: [PHISHING or BENIGN]
EXPLANATION: [A brief Vietnamese explanation]

Context:
{context}
<|im_end|>
<|im_start|>user
Input:
{question}

Short Analysis:
<|im_end|>
<|im_start|>assistant
"""
        
        PROMPT = PromptTemplate(
            template=template, 
            input_variables=["context", "question"]
        )

        # Create QA Chain
        qa_chain = RetrievalQA.from_chain_type(
            llm=self.llm,
            chain_type="stuff",
            retriever=self.vector_store.as_retriever(search_kwargs={"k": 3}),
            chain_type_kwargs={"prompt": PROMPT}
        )

        try:
            print("🤖 Generating response...")
            response = qa_chain.invoke(text)
            return response['result']
        except Exception as e:
            return f"❌ Error: {str(e)}"