import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import joblib
from transformers import AutoModel, AutoTokenizer
from huggingface_hub import hf_hub_download
from torch.utils.data import Dataset, DataLoader
from config import CONFIG

# Import sklearn untuk encoding label sentimen
try:
    from sklearn.preprocessing import LabelEncoder
except ImportError:
    print("⚠️ scikit-learn tidak terinstall, mencoba install...")
    import subprocess
    subprocess.check_call(['pip', 'install', 'scikit-learn'])
    from sklearn.preprocessing import LabelEncoder

# Custom Dataset untuk batch processing
class ABSADataset(Dataset):
    """
    Custom Dataset untuk ABSA batch processing.
    Menggabungkan setiap kalimat dengan setiap aspek untuk prediksi.
    """

    def __init__(self, sentences, aspects, tokenizer, max_len):
        """
        Args:
            sentences (list): List dari kalimat input.
            aspects (list): List dari aspek yang ingin diprediksi.
            tokenizer (AutoTokenizer): Tokenizer IndoBERT.
            max_len (int): Panjang maksimum token.
        """
        self.sentences = sentences
        self.aspects = aspects
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        # Total kombinasi = jumlah kalimat × jumlah aspek
        return len(self.sentences) * len(self.aspects)

    def __getitem__(self, idx):
        """
        Mengembalikan encoded input untuk satu pasangan kalimat-aspek.
        """
        # Hitung indeks kalimat dan aspek dari indeks global
        sent_idx = idx // len(self.aspects)
        aspect_idx = idx % len(self.aspects)

        sentence = self.sentences[sent_idx]
        aspect = self.aspects[aspect_idx]

        # Gabungkan aspek dan kalimat dengan format khusus
        combined = f"[ASPEK] {aspect} [TEXT] {sentence}"

        # Tokenisasi dan encoding text
        encoded = self.tokenizer.encode_plus(
            combined,
            add_special_tokens=True,  # Tambah token [CLS] dan [SEP]
            padding="max_length",     # Padding ke max_length
            max_length=self.max_len,
            truncation=True,          # Potong jika melebihi max_length
            return_attention_mask=True,
            return_tensors="pt",
        )

        return {
            'input_ids': encoded['input_ids'].flatten(),
            'attention_mask': encoded['attention_mask'].flatten(),
            'sent_idx': sent_idx,      # Simpan indeks untuk mapping hasil
            'aspect_idx': aspect_idx
        }


def load_model_and_tokenizer():
    """
    Memuat model IndoBERT ABSA, tokenizer, dan label encoder.
    Jika file tidak ada lokal, akan download dari HuggingFace.

    Returns:
        model (nn.Module): Model ABSA yang sudah diload.
        tokenizer (AutoTokenizer): Tokenizer untuk IndoBERT.
        label_encoder (LabelEncoder): Encoder untuk label sentimen.
        device (torch.device): Device (cuda/cpu) yang digunakan.
    """
    # Setup path direktori model dan tokenizer
    base_path = os.path.abspath(os.path.dirname(__file__))
    model_dir = os.path.join(base_path, "assets", "model")
    tokenizer_dir = os.path.join(base_path, "assets", "tokenizer")

    # Buat direktori jika belum ada
    os.makedirs(model_dir, exist_ok=True)
    os.makedirs(tokenizer_dir, exist_ok=True)

    model_path = os.path.join(model_dir, "indobert_absa_model.pth")
    label_path = os.path.join(model_dir, "label_encoder.joblib")

    # === DOWNLOAD MODEL JIKA BELUM ADA ===
    if not os.path.exists(model_path):
        print("📥 Downloading model dari HuggingFace...")
        try:
            # Download dari HuggingFace Hub
            downloaded_model = hf_hub_download(
                repo_id=CONFIG["hf_model_repo"],
                filename="indobert_absa_model.pth",
                subfolder=CONFIG["hf_model_subfolder"],
                cache_dir=None
            )
            # Copy ke direktori lokal
            import shutil
            shutil.copy(downloaded_model, model_path)
            print(f"✅ Model berhasil didownload ke {model_path}")
        except Exception as e:
            print(f"❌ Error downloading model: {e}")
            raise

    # === DOWNLOAD LABEL ENCODER JIKA BELUM ADA ===
    if not os.path.exists(label_path):
        print("📥 Downloading label encoder dari HuggingFace...")
        try:
            downloaded_label = hf_hub_download(
                repo_id=CONFIG["hf_model_repo"],
                filename="label_encoder.joblib",
                subfolder=CONFIG["hf_model_subfolder"],
                cache_dir=None
            )
            import shutil
            shutil.copy(downloaded_label, label_path)
            print(f"✅ Label encoder berhasil didownload ke {label_path}")
        except Exception as e:
            print(f"❌ Error downloading label encoder: {e}")
            raise

    # === DOWNLOAD TOKENIZER JIKA BELUM ADA ===
    tokenizer_files = ["special_tokens_map.json", "tokenizer.json",
                       "tokenizer_config.json", "vocab.txt"]

    # Cek apakah semua file tokenizer sudah ada
    all_tokenizer_exists = all(
        os.path.exists(os.path.join(tokenizer_dir, f)) for f in tokenizer_files
    )

    if not all_tokenizer_exists:
        print("📥 Downloading tokenizer dari HuggingFace...")
        try:
            for file in tokenizer_files:
                if not os.path.exists(os.path.join(tokenizer_dir, file)):
                    # Download setiap file tokenizer
                    downloaded_file = hf_hub_download(
                        repo_id=CONFIG["hf_model_repo"],
                        filename=file,
                        subfolder=CONFIG["hf_tokenizer_subfolder"],
                        cache_dir=None
                    )
                    import shutil
                    shutil.copy(downloaded_file, os.path.join(
                        tokenizer_dir, file))
            print(f"✅ Tokenizer berhasil didownload ke {tokenizer_dir}")
        except Exception as e:
            print(f"❌ Error downloading tokenizer: {e}")
            # Fallback ke pretrained jika gagal
            pass

    # === LOAD TOKENIZER ===
    try:
        # Coba load dari direktori lokal
        tokenizer = AutoTokenizer.from_pretrained(tokenizer_dir)
        print("✅ Tokenizer loaded dari lokal")
    except Exception as e:
        # Fallback: load dari pretrained model
        print(
            f"⚠️ Gagal load tokenizer lokal ({e}), menggunakan pretrained...")
        tokenizer = AutoTokenizer.from_pretrained(CONFIG["model_name"])

    # === LOAD LABEL ENCODER ===
    try:
        label_encoder = joblib.load(label_path)
        print("✅ Label encoder loaded successfully")
    except Exception as e:
        print(f"❌ Error loading label encoder: {e}")
        raise RuntimeError(
            f"Gagal load label_encoder.joblib. "
            f"Pastikan file valid dan scikit-learn terinstall. Error: {e}"
        )

    # === DEFINISI MODEL ARCHITECTURE ===
    class IndoBertForABSA(nn.Module):
        """
        Model klasifikasi aspek berbasis IndoBERT untuk ABSA.
        Arsitektur: IndoBERT -> LayerNorm -> Dropout -> Linear Classifier
        """

        def __init__(self, num_labels):
            super().__init__()
            # Load pretrained IndoBERT
            self.bert = AutoModel.from_pretrained(
                CONFIG["model_name"], trust_remote_code=True, use_safetensors=True
            )
            # Layer normalisasi untuk stabilitas training
            self.norm = nn.LayerNorm(self.bert.config.hidden_size)
            # Dropout untuk mencegah overfitting
            self.dropout = nn.Dropout(CONFIG["dropout_rate"])
            # Linear layer untuk klasifikasi sentimen
            self.classifier = nn.Linear(
                self.bert.config.hidden_size, num_labels)

        def forward(self, input_ids, attention_mask):
            """
            Forward pass untuk model ABSA.

            Args:
                input_ids (torch.Tensor): Tensor input token IDs.
                attention_mask (torch.Tensor): Tensor mask perhatian.

            Returns:
                torch.Tensor: Logit prediksi.
            """
            # Dapatkan output dari BERT
            output = self.bert(input_ids=input_ids,
                               attention_mask=attention_mask)
            # Gunakan pooler output (representasi [CLS] token)
            pooled = output.pooler_output
            # Normalisasi
            normed = self.norm(pooled)
            # Dropout
            dropped = self.dropout(normed)
            # Klasifikasi
            return self.classifier(dropped)

    # === SETUP DEVICE DAN LOAD MODEL ===
    # Gunakan GPU jika tersedia, jika tidak gunakan CPU
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Inisialisasi model dengan jumlah label dari label encoder
    model = IndoBertForABSA(num_labels=len(label_encoder.classes_))

    try:
        # Load weights model yang sudah di-training
        model.load_state_dict(torch.load(model_path, map_location=device))
        print("✅ Model state dict loaded successfully")
    except Exception as e:
        print(f"❌ Error loading model state dict: {e}")
        raise

    # Pindahkan model ke device (GPU/CPU)
    model.to(device)
    # Set model ke mode evaluasi (nonaktifkan dropout, dll)
    model.eval()

    return model, tokenizer, label_encoder, device


def predict_multi_aspect(model, tokenizer, sentence, aspek_list, label_encoder, device, max_len):
    """
    Melakukan prediksi sentimen untuk setiap aspek pada satu kalimat.
    Proses prediksi dilakukan satu per satu (non-batch).

    Args:
        model (nn.Module): Model ABSA yang sudah diload.
        tokenizer (AutoTokenizer): Tokenizer IndoBERT.
        sentence (str): Kalimat input.
        aspek_list (list): Daftar aspek yang ingin diprediksi.
        label_encoder (LabelEncoder): Encoder label.
        device (torch.device): Device (cuda/cpu).
        max_len (int): Panjang maksimum token.

    Returns:
        dict: Hasil prediksi berupa {aspek: label_sentimen}.
    """
    results = {}

    # Loop untuk setiap aspek
    for aspek in aspek_list:
        # Gabungkan aspek dan kalimat
        combined = f"[ASPEK] {aspek} [TEXT] {sentence}"

        # Tokenisasi input
        encoded = tokenizer.encode_plus(
            combined,
            add_special_tokens=True,
            padding="max_length",
            max_length=max_len,
            truncation=True,
            return_attention_mask=True,
            return_tensors="pt",
        )

        # Pindahkan tensor ke device
        input_ids = encoded["input_ids"].to(device)
        attention_mask = encoded["attention_mask"].to(device)

        # Prediksi tanpa menghitung gradient (inference mode)
        with torch.no_grad():
            # Forward pass
            outputs = model( in