"""
Synthetic Data Generator Module

Generates synthetic training data using LLM APIs (OpenAI, Anthropic).
"""

import json
import os
from typing import List, Dict, Any, Optional
import time


class SyntheticDataGenerator:
    """Generate synthetic training data using LLMs."""

    def __init__(
        self,
        api_provider: str = "openai",
        api_key: Optional[str] = None,
        model: Optional[str] = None
    ):
        """
        Initialize synthetic data generator.

        Args:
            api_provider: "openai" or "anthropic"
            api_key: API key (uses environment variable if None)
            model: Model name (uses default if None)
        """
        self.api_provider = api_provider.lower()
        self.api_key = api_key or os.getenv("OPENAI_API_KEY" if self.api_provider == "openai" else "ANTHROPIC_API_KEY")

        if self.api_provider == "openai":
            self.model = model or "gpt-4-turbo-preview"
        else:
            self.model = model or "claude-3-opus-20240229"

        self.client = None
        self._init_client()

    def _init_client(self):
        """Initialize API client."""
        try:
            if self.api_provider == "openai":
                from openai import OpenAI
                self.client = OpenAI(api_key=self.api_key)
            else:
                from anthropic import Anthropic
                self.client = Anthropic(api_key=self.api_key)
        except ImportError:
            print(f"Warning: {self.api_provider} library not installed")
        except Exception as e:
            print(f"Warning: Failed to initialize {self.api_provider} client: {e}")

    def generate_examples(
        self,
        num_examples: int,
        topics: List[str],
        difficulty: str = "mixed",
        domain: str = "financial_advisor"
    ) -> List[Dict[str, Any]]:
        """
        Generate synthetic training examples.

        Args:
            num_examples: Number of examples to generate
            topics: List of topics to cover
            difficulty: "beginner", "intermediate", "advanced", or "mixed"
            domain: Domain context

        Returns:
            List of generated examples
        """
        if not self.client:
            raise ValueError(f"API client not initialized. Check {self.api_provider} API key.")

        print(f"Generating {num_examples} examples using {self.api_provider}...")

        examples = []
        examples_per_topic = max(1, num_examples // len(topics))

        for topic in topics:
            for i in range(examples_per_topic):
                if len(examples) >= num_examples:
                    break

                try:
                    example = self._generate_single_example(topic, difficulty, domain)
                    if example:
                        examples.append(example)
                        print(f"Generated {len(examples)}/{num_examples}", end="\r")
                    time.sleep(0.5)  # Rate limiting
                except Exception as e:
                    print(f"\nError generating example: {e}")
                    continue

        print(f"\n✅ Generated {len(examples)} examples")
        return examples

    def _generate_single_example(
        self,
        topic: str,
        difficulty: str,
        domain: str
    ) -> Optional[Dict[str, Any]]:
        """Generate a single training example."""

        prompt = f"""Generate a realistic {domain} training example about {topic}.
Difficulty level: {difficulty}

Output format (JSON):
{{
  "instruction": "The user's question or request",
  "input": "Additional context (optional, can be empty string)",
  "output": "The detailed, helpful response"
}}

Make it realistic and detailed. The response should be informative and professional."""

        try:
            if self.api_provider == "openai":
                response = self.client.chat.completions.create(
                    model=self.model,
                    messages=[
                        {"role": "system", "content": "You are a data generation assistant. Output only valid JSON."},
                        {"role": "user", "content": prompt}
                    ],
                    temperature=0.8
                )
                content = response.choices[0].message.content
            else:  # anthropic
                response = self.client.messages.create(
                    model=self.model,
                    max_tokens=1024,
                    messages=[
                        {"role": "user", "content": prompt}
                    ],
                    temperature=0.8
                )
                content = response.content[0].text

            # Parse JSON
            # Remove markdown code blocks if present
            content = content.strip()
            if content.startswith("```"):
                content = content.split("```")[1]
                if content.startswith("json"):
                    content = content[4:]
            content = content.strip()

            example = json.loads(content)

            # Validate structure
            if "instruction" in example and "output" in example:
                if "input" not in example:
                    example["input"] = ""
                return example

        except Exception as e:
            print(f"\nError parsing example: {e}")

        return None

    def generate_from_scenarios(
        self,
        scenarios: List[Dict[str, Any]],
        num_examples_per_scenario: int = 1
    ) -> List[Dict[str, Any]]:
        """
        Generate examples from predefined scenarios.

        Args:
            scenarios: List of scenario dicts with context
            num_examples_per_scenario: Examples to generate per scenario

        Returns:
            Generated examples
        """
        examples = []

        for scenario in scenarios:
            for _ in range(num_examples_per_scenario):
                prompt = f"""Based on this scenario, generate a training example:

Scenario: {json.dumps(scenario, indent=2)}

Output format (JSON):
{{
  "instruction": "The user's question or request",
  "input": "Additional context based on scenario",
  "output": "The detailed, helpful response"
}}"""

                try:
                    example = self._generate_from_prompt(prompt)
                    if example:
                        examples.append(example)
                except Exception as e:
                    print(f"Error: {e}")
                    continue

        return examples

    def _generate_from_prompt(self, prompt: str) -> Optional[Dict[str, Any]]:
        """Generate example from custom prompt."""
        try:
            if self.api_provider == "openai":
                response = self.client.chat.completions.create(
                    model=self.model,
                    messages=[{"role": "user", "content": prompt}],
                    temperature=0.8
                )
                content = response.choices[0].message.content
            else:
                response = self.client.messages.create(
                    model=self.model,
                    max_tokens=1024,
                    messages=[{"role": "user", "content": prompt}]
                )
                content = response.content[0].text

            # Parse JSON
            content = content.strip()
            if "```" in content:
                content = content.split("```")[1]
                if content.startswith("json"):
                    content = content[4:]
            content = content.strip()

            return json.loads(content)

        except Exception as e:
            print(f"Error: {e}")
            return None