Spaces:

Locutusque
/

LLM-Forest-Orchestra

Sleeping

File size: 41,675 Bytes

import os
import io
import gc
import math
import time
import uuid
import json
import spaces
import random
from abc import ABC, abstractmethod
from dataclasses import dataclass, field, asdict
from typing import Dict, List, Tuple, Optional, Any, Union
from enum import Enum

import gradio as gr
import numpy as np
import torch
from transformers import AutoModel, AutoTokenizer

import mido
from mido import Message, MidiFile, MidiTrack


# Configuration Classes

class ComputeMode(Enum):
    """Enum for computation modes."""
    FULL_MODEL = "Full model"
    MOCK_LATENTS = "Mock latents"


class MusicRole(Enum):
    """Enum for musical roles/layers."""
    MELODY = "melody"
    BASS = "bass"
    HARMONY = "harmony"
    PAD = "pad"
    ACCENT = "accent"
    ATMOSPHERE = "atmosphere"


@dataclass
class ScaleDefinition:
    """Represents a musical scale."""
    name: str
    notes: List[int]
    description: str = ""
    
    def __post_init__(self):
        """Validate scale notes are within MIDI range."""
        for note in self.notes:
            if not 0 <= note <= 127:
                raise ValueError(f"MIDI note {note} out of range (0-127)")


@dataclass
class InstrumentMapping:
    """Maps a layer to an instrument and musical role."""
    program: int  # MIDI program number
    role: MusicRole
    channel: int
    name: str = ""
    
    def __post_init__(self):
        """Validate MIDI program and channel."""
        if not 0 <= self.program <= 127:
            raise ValueError(f"MIDI program {self.program} out of range")
        if not 0 <= self.channel <= 15:
            raise ValueError(f"MIDI channel {self.channel} out of range")


@dataclass
class GenerationConfig:
    """Complete configuration for music generation."""
    model_name: str
    compute_mode: ComputeMode
    base_tempo: int
    velocity_range: Tuple[int, int]
    scale: ScaleDefinition
    num_layers_limit: int
    seed: int
    instrument_preset: str
    
    # Additional configuration options
    quantization_grid: int = 120
    octave_range: int = 2
    dynamics_curve: str = "linear"  # linear, exponential, logarithmic
    
    def validate(self):
        """Validate configuration parameters."""
        if not 1 <= self.base_tempo <= 2000:
            raise ValueError("Tempo must be between 1 and 2000")
        if not 1 <= self.velocity_range[0] < self.velocity_range[1] <= 127:
            raise ValueError("Invalid velocity range")
        if not 1 <= self.num_layers_limit <= 32:
            raise ValueError("Number of layers must be between 1 and 32")
    
    def to_dict(self) -> Dict:
        """Convert config to dictionary for serialization."""
        return {
            "model_name": self.model_name,
            "compute_mode": self.compute_mode.value,
            "base_tempo": self.base_tempo,
            "velocity_range": self.velocity_range,
            "scale_name": self.scale.name,
            "scale_notes": self.scale.notes,
            "num_layers_limit": self.num_layers_limit,
            "seed": self.seed,
            "instrument_preset": self.instrument_preset,
            "quantization_grid": self.quantization_grid,
            "octave_range": self.octave_range,
            "dynamics_curve": self.dynamics_curve
        }
    
    @classmethod
    def from_dict(cls, data: Dict, scale_manager: "ScaleManager") -> "GenerationConfig":
        """Create config from dictionary."""
        scale = scale_manager.get_scale(data["scale_name"])
        if scale is None:
            scale = ScaleDefinition(name="Custom", notes=data["scale_notes"])
        
        return cls(
            model_name=data["model_name"],
            compute_mode=ComputeMode(data["compute_mode"]),
            base_tempo=data["base_tempo"],
            velocity_range=tuple(data["velocity_range"]),
            scale=scale,
            num_layers_limit=data["num_layers_limit"],
            seed=data["seed"],
            instrument_preset=data["instrument_preset"],
            quantization_grid=data.get("quantization_grid", 120),
            octave_range=data.get("octave_range", 2),
            dynamics_curve=data.get("dynamics_curve", "linear")
        )


@dataclass
class Latents:
    """Container for model latents."""
    hidden_states: List[torch.Tensor]
    attentions: List[torch.Tensor]
    num_layers: int
    num_tokens: int
    metadata: Dict[str, Any] = field(default_factory=dict)


# Music Components

class ScaleManager:
    """Manages musical scales and modes."""
    
    def __init__(self):
        """Initialize with default scales."""
        self.scales = {
            "C pentatonic": ScaleDefinition(
                "C pentatonic", 
                [60, 62, 65, 67, 70, 72, 74, 77],
                "Major pentatonic scale"
            ),
            "C major": ScaleDefinition(
                "C major",
                [60, 62, 64, 65, 67, 69, 71, 72],
                "Major scale (Ionian mode)"
            ),
            "A minor": ScaleDefinition(
                "A minor",
                [57, 59, 60, 62, 64, 65, 67, 69],
                "Natural minor scale (Aeolian mode)"
            ),
            "D dorian": ScaleDefinition(
                "D dorian",
                [62, 64, 65, 67, 69, 71, 72, 74],
                "Dorian mode - minor with raised 6th"
            ),
            "E phrygian": ScaleDefinition(
                "E phrygian",
                [64, 65, 67, 69, 71, 72, 74, 76],
                "Phrygian mode - minor with lowered 2nd"
            ),
            "G mixolydian": ScaleDefinition(
                "G mixolydian",
                [67, 69, 71, 72, 74, 76, 77, 79],
                "Mixolydian mode - major with lowered 7th"
            ),
            "Blues scale": ScaleDefinition(
                "Blues scale",
                [60, 63, 65, 66, 67, 70, 72, 75],
                "Blues scale with blue notes"
            ),
            "Chromatic": ScaleDefinition(
                "Chromatic",
                list(range(60, 72)),
                "All 12 semitones"
            )
        }
    
    def get_scale(self, name: str) -> Optional[ScaleDefinition]:
        """Get scale by name."""
        return self.scales.get(name)
    
    def add_custom_scale(self, name: str, notes: List[int], description: str = "") -> ScaleDefinition:
        """Add a custom scale."""
        scale = ScaleDefinition(name, notes, description)
        self.scales[name] = scale
        return scale
    
    def list_scales(self) -> List[str]:
        """Get list of available scale names."""
        return list(self.scales.keys())


class InstrumentPresetManager:
    """Manages instrument presets for different musical styles."""
    
    def __init__(self):
        """Initialize with default presets."""
        self.presets = {
            "Ensemble (melody+bass+pad etc.)": [
                InstrumentMapping(0, MusicRole.MELODY, 0, "Piano"),
                InstrumentMapping(33, MusicRole.BASS, 1, "Electric Bass"),
                InstrumentMapping(46, MusicRole.HARMONY, 2, "Harp"),
                InstrumentMapping(48, MusicRole.PAD, 3, "String Ensemble"),
                InstrumentMapping(11, MusicRole.ACCENT, 4, "Vibraphone"),
                InstrumentMapping(89, MusicRole.ATMOSPHERE, 5, "Pad Warm")
            ],
            "Piano Trio (melody+bass+harmony)": [
                InstrumentMapping(0, MusicRole.MELODY, 0, "Piano"),
                InstrumentMapping(33, MusicRole.BASS, 1, "Electric Bass"),
                InstrumentMapping(0, MusicRole.HARMONY, 2, "Piano"),
                InstrumentMapping(48, MusicRole.PAD, 3, "String Ensemble"),
                InstrumentMapping(0, MusicRole.ACCENT, 4, "Piano"),
                InstrumentMapping(0, MusicRole.ATMOSPHERE, 5, "Piano")
            ],
            "Pads & Atmosphere": [
                InstrumentMapping(48, MusicRole.PAD, 0, "String Ensemble"),
                InstrumentMapping(48, MusicRole.PAD, 1, "String Ensemble"),
                InstrumentMapping(89, MusicRole.ATMOSPHERE, 2, "Pad Warm"),
                InstrumentMapping(89, MusicRole.ATMOSPHERE, 3, "Pad Warm"),
                InstrumentMapping(46, MusicRole.HARMONY, 4, "Harp"),
                InstrumentMapping(11, MusicRole.ACCENT, 5, "Vibraphone")
            ],
            "Orchestral": [
                InstrumentMapping(40, MusicRole.MELODY, 0, "Violin"),
                InstrumentMapping(42, MusicRole.BASS, 1, "Cello"),
                InstrumentMapping(46, MusicRole.HARMONY, 2, "Harp"),
                InstrumentMapping(48, MusicRole.PAD, 3, "String Ensemble"),
                InstrumentMapping(73, MusicRole.ACCENT, 4, "Flute"),
                InstrumentMapping(49, MusicRole.ATMOSPHERE, 5, "Slow Strings")
            ],
            "Electronic": [
                InstrumentMapping(80, MusicRole.MELODY, 0, "Lead Square"),
                InstrumentMapping(38, MusicRole.BASS, 1, "Synth Bass"),
                InstrumentMapping(81, MusicRole.HARMONY, 2, "Lead Sawtooth"),
                InstrumentMapping(90, MusicRole.PAD, 3, "Pad Polysynth"),
                InstrumentMapping(82, MusicRole.ACCENT, 4, "Lead Calliope"),
                InstrumentMapping(91, MusicRole.ATMOSPHERE, 5, "Pad Bowed")
            ]
        }
    
    def get_preset(self, name: str) -> List[InstrumentMapping]:
        """Get instrument preset by name."""
        return self.presets.get(name, self.presets["Ensemble (melody+bass+pad etc.)"])
    
    def list_presets(self) -> List[str]:
        """Get list of available preset names."""
        return list(self.presets.keys())


# Music Generation Components

class MusicMathUtils:
    """Utility class for music-related mathematical operations."""
    
    @staticmethod
    def entropy(p: np.ndarray) -> float:
        """Calculate Shannon entropy of a probability distribution."""
        p = p / (p.sum() + 1e-9)
        return float(-np.sum(p * np.log2(p + 1e-9)))
    
    @staticmethod
    def quantize_time(time_val: int, grid: int = 120) -> int:
        """Quantize time value to grid."""
        return int(round(time_val / grid) * grid)
    
    @staticmethod
    def norm_to_scale(val: float, scale: np.ndarray, octave_range: int = 2) -> int:
        """Map normalized value to scale note with octave range."""
        octave = int(abs(val) * octave_range) * 12
        note_idx = int(abs(val * 100) % len(scale))
        return int(scale[note_idx] + octave)
    
    @staticmethod
    def apply_dynamics_curve(value: float, curve_type: str = "linear") -> float:
        """Apply dynamics curve to a value."""
        value = np.clip(value, 0, 1)
        if curve_type == "exponential":
            return value ** 2
        elif curve_type == "logarithmic":
            return np.log1p(value * np.e) / np.log1p(np.e)
        else:  # linear
            return value


class NoteGenerator:
    """Generates notes based on neural network latents."""
    
    # Role-specific frequency multipliers
    ROLE_FREQUENCIES = {
        MusicRole.MELODY: 2.0,
        MusicRole.BASS: 0.5,
        MusicRole.HARMONY: 1.5,
        MusicRole.PAD: 0.25,
        MusicRole.ACCENT: 3.0,
        MusicRole.ATMOSPHERE: 0.33
    }
    
    # Role-specific weight distributions
    ROLE_WEIGHTS = {
        MusicRole.MELODY: np.array([0.4, 0.2, 0.2, 0.1, 0.1]),
        MusicRole.BASS: np.array([0.1, 0.4, 0.1, 0.3, 0.1]),
        MusicRole.HARMONY: np.array([0.2, 0.2, 0.3, 0.2, 0.1]),
        MusicRole.PAD: np.array([0.1, 0.3, 0.1, 0.1, 0.4]),
        MusicRole.ACCENT: np.array([0.5, 0.1, 0.2, 0.1, 0.1]),
        MusicRole.ATMOSPHERE: np.array([0.1, 0.2, 0.1, 0.2, 0.4])
    }
    
    def __init__(self, config: GenerationConfig):
        """Initialize with generation configuration."""
        self.config = config
        self.math_utils = MusicMathUtils()
        self.history: Dict[int, int] = {}
    
    def create_note_probability(
        self, 
        layer_idx: int, 
        token_idx: int, 
        attention_val: float, 
        hidden_state: np.ndarray, 
        num_tokens: int, 
        role: MusicRole
    ) -> float:
        """Calculate probability of playing a note based on multiple factors."""
        # Base probability from attention
        base_prob = 1 / (1 + np.exp(-10 * (attention_val - 0.5)))
        
        # Temporal factor based on role frequency
        temporal_factor = 0.5 + 0.5 * np.sin(
            2 * np.pi * self.ROLE_FREQUENCIES[role] * token_idx / max(1, num_tokens)
        )
        
        # Energy factor from hidden state norm
        energy = np.linalg.norm(hidden_state)
        energy_factor = np.tanh(energy / 10)
        
        # Variance factor
        local_variance = np.var(hidden_state)
        variance_factor = 1 - np.exp(-local_variance)
        
        # Entropy factor
        state_entropy = self.math_utils.entropy(np.abs(hidden_state))
        max_entropy = np.log2(max(2, hidden_state.shape[0]))
        entropy_factor = state_entropy / max_entropy
        
        # Combine factors with role-specific weights
        factors = np.array([base_prob, temporal_factor, energy_factor, variance_factor, entropy_factor])
        weights = self.ROLE_WEIGHTS[role]
        combined_prob = float(np.dot(weights, factors))
        
        # Add deterministic noise for variation
        noise_seed = layer_idx * 1000 + token_idx
        noise = 0.1 * (np.sin(noise_seed * 0.1) + np.cos(noise_seed * 0.23)) / 2
        
        # Apply dynamics curve
        final_prob = (combined_prob + noise) ** 1.5
        final_prob = self.math_utils.apply_dynamics_curve(final_prob, self.config.dynamics_curve)
        
        return float(np.clip(final_prob, 0, 1))
    
    def should_play_note(
        self, 
        layer_idx: int, 
        token_idx: int, 
        attention_val: float, 
        hidden_state: np.ndarray, 
        num_tokens: int, 
        role: MusicRole
    ) -> bool:
        """Determine if a note should be played."""
        prob = self.create_note_probability(
            layer_idx, token_idx, attention_val, hidden_state, num_tokens, role
        )
        
        # Adjust probability based on silence duration
        if layer_idx in self.history:
            last_played = self.history[layer_idx]
            silence_duration = token_idx - last_played
            prob *= (1 + np.tanh(silence_duration / 5) * 0.5)
        
        # Stochastic decision
        play_note = np.random.random() < prob
        
        if play_note:
            self.history[layer_idx] = token_idx
        
        return play_note
    
    def generate_notes_for_role(
        self, 
        role: MusicRole, 
        hidden_state: np.ndarray, 
        scale: np.ndarray
    ) -> List[int]:
        """Generate notes based on role and hidden state."""
        if role == MusicRole.MELODY:
            note = self.math_utils.norm_to_scale(
                hidden_state[0], scale, octave_range=1
            )
            return [note]
        
        elif role == MusicRole.BASS:
            note = self.math_utils.norm_to_scale(
                hidden_state[0], scale, octave_range=0
            ) - 12
            return [note]
        
        elif role == MusicRole.HARMONY:
            return [
                self.math_utils.norm_to_scale(hidden_state[i], scale, octave_range=1)
                for i in range(0, min(2, len(hidden_state)), 1)
            ]
        
        elif role == MusicRole.PAD:
            return [
                self.math_utils.norm_to_scale(hidden_state[i], scale, octave_range=1)
                for i in range(0, min(3, len(hidden_state)), 2)
            ]
        
        elif role == MusicRole.ACCENT:
            note = self.math_utils.norm_to_scale(
                hidden_state[0], scale, octave_range=2
            ) + 12
            return [note]
        
        else:  # ATMOSPHERE
            return [
                self.math_utils.norm_to_scale(hidden_state[i], scale, octave_range=1)
                for i in range(0, min(2, len(hidden_state)), 3)
            ]
    
    def calculate_velocity(
        self, 
        role: MusicRole, 
        attention_strength: float
    ) -> int:
        """Calculate note velocity based on role and attention."""
        base_velocity = int(
            attention_strength * (self.config.velocity_range[1] - self.config.velocity_range[0]) 
            + self.config.velocity_range[0]
        )
        
        # Role-specific adjustments
        if role == MusicRole.MELODY:
            velocity = min(base_velocity + 10, 127)
        elif role == MusicRole.ACCENT:
            velocity = min(base_velocity + 20, 127)
        elif role in [MusicRole.PAD, MusicRole.ATMOSPHERE]:
            velocity = max(base_velocity - 10, 20)
        else:
            velocity = base_velocity
        
        return velocity
    
    def calculate_duration(
        self, 
        role: MusicRole, 
        attention_matrix: np.ndarray
    ) -> int:
        """Calculate note duration based on role and attention."""
        if role in [MusicRole.PAD, MusicRole.ATMOSPHERE]:
            duration = self.config.base_tempo * 4
        elif role == MusicRole.BASS:
            duration = self.config.base_tempo
        else:
            try:
                dur_factor = self.math_utils.entropy(attention_matrix.mean(axis=0)) / (
                    np.log2(attention_matrix.shape[-1]) + 1e-9
                )
            except Exception:
                dur_factor = 0.5
            duration = self.math_utils.quantize_time(
                int(self.config.base_tempo * (0.5 + dur_factor * 1.5)),
                self.config.quantization_grid
            )
        
        return duration


# Model Interaction

class LatentExtractor(ABC):
    """Abstract base class for latent extraction strategies."""
    
    @abstractmethod
    def extract(self, text: str, config: GenerationConfig, progress=None) -> Latents:
        """Extract latents from text."""
        pass


class MockLatentExtractor(LatentExtractor):
    """Generate mock latents for testing without loading models."""
    
    def extract(self, text: str, config: GenerationConfig, progress=None) -> Latents:
        """Generate synthetic latents based on text."""
        # Simulate token count based on text length
        tokens = max(16, min(128, len(text.split()) * 4))
        layers = min(config.num_layers_limit, 6)
        
        # Generate deterministic but varied latents based on text
        np.random.seed(hash(text) % 2**32)
        
        hidden_states = [
            torch.randn(1, tokens, 128) for _ in range(layers)
        ]
        attentions = [
            torch.rand(1, 8, tokens, tokens) for _ in range(layers)
        ]
        
        metadata = {
            "mode": "mock",
            "text_length": len(text),
            "generated_tokens": tokens,
            "generated_layers": layers
        }
        
        return Latents(
            hidden_states=hidden_states,
            attentions=attentions,
            num_layers=layers,
            num_tokens=tokens,
            metadata=metadata
        )


class ModelLatentExtractor(LatentExtractor):
    """Extract real latents from transformer models."""
    
    @spaces.GPU(duration=45)
    def extract(self, text: str, config: GenerationConfig, progress=None) -> Latents:
        """Extract latents from a real transformer model."""
        model_name = config.model_name
        
        # Load tokenizer
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        if tokenizer.pad_token is None and tokenizer.eos_token is not None:
            tokenizer.pad_token = tokenizer.eos_token
        
        # Configure model loading
        load_kwargs = {
            "output_hidden_states": True,
            "output_attentions": True,
            "device_map": "cuda" if torch.cuda.is_available() else "cpu",
        }
        
        # Set appropriate dtype
        try:
            load_kwargs["torch_dtype"] = (
                torch.bfloat16 if torch.cuda.is_available() else torch.float32
            )
        except Exception:
            pass
        
        # Load model
        model = AutoModel.from_pretrained(model_name, **load_kwargs)
        
        # Tokenize input
        inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
        device = next(model.parameters()).device
        inputs = {k: v.to(device) for k, v in inputs.items()}
        
        # Get model outputs
        with torch.no_grad():
            outputs = model(**inputs)
            hidden_states = list(outputs.hidden_states)
            attentions = list(outputs.attentions)
        
        # Move to CPU to free VRAM
        hidden_states = [hs.to("cpu") for hs in hidden_states]
        attentions = [att.to("cpu") for att in attentions]
        
        # Limit layers
        layers = min(config.num_layers_limit, len(hidden_states))
        tokens = hidden_states[0].shape[1]
        
        # Clean up
        try:
            del model
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
            gc.collect()
        except Exception:
            pass
        
        metadata = {
            "mode": "full_model",
            "model_name": model_name,
            "actual_layers": len(hidden_states),
            "used_layers": layers,
            "tokens": tokens
        }
        
        return Latents(
            hidden_states=hidden_states[:layers],
            attentions=attentions[:layers],
            num_layers=layers,
            num_tokens=tokens,
            metadata=metadata
        )


class LatentExtractorFactory:
    """Factory for creating appropriate latent extractors."""
    
    @staticmethod
    def create(compute_mode: ComputeMode) -> LatentExtractor:
        """Create a latent extractor based on compute mode."""
        if compute_mode == ComputeMode.MOCK_LATENTS:
            return MockLatentExtractor()
        else:
            return ModelLatentExtractor()


# MIDI Generation

class MIDIRenderer:
    """Renders MIDI files from latents."""
    
    def __init__(self, config: GenerationConfig, instrument_manager: InstrumentPresetManager):
        """Initialize MIDI renderer."""
        self.config = config
        self.instrument_manager = instrument_manager
        self.note_generator = NoteGenerator(config)
        self.math_utils = MusicMathUtils()
    
    def render(self, latents: Latents) -> Tuple[bytes, Dict[str, Any]]:
        """Render MIDI from latents."""
        # Set random seeds for reproducibility
        np.random.seed(self.config.seed)
        random.seed(self.config.seed)
        torch.manual_seed(self.config.seed)
        
        # Prepare data
        scale = np.array(self.config.scale.notes, dtype=int)
        num_layers = latents.num_layers
        num_tokens = latents.num_tokens
        
        # Convert tensors to numpy
        hidden_states = [
            hs.float().numpy() if isinstance(hs, torch.Tensor) else hs 
            for hs in latents.hidden_states
        ]
        attentions = [
            att.float().numpy() if isinstance(att, torch.Tensor) else att 
            for att in latents.attentions
        ]
        
        # Get instrument mappings
        instrument_mappings = self.instrument_manager.get_preset(self.config.instrument_preset)
        
        # Create MIDI file and tracks
        midi_file = MidiFile()
        tracks = self._create_tracks(midi_file, num_layers, instrument_mappings)
        
        # Generate notes
        stats = self._generate_notes(
            tracks, hidden_states, attentions, 
            scale, num_tokens, instrument_mappings
        )
        
        # Convert to bytes
        bio = io.BytesIO()
        midi_file.save(file=bio)
        bio.seek(0)
        
        # Prepare metadata
        metadata = {
            "config": self.config.to_dict(),
            "latents_info": latents.metadata,
            "stats": stats,
            "timestamp": time.time()
        }
        
        return bio.read(), metadata
    
    def _create_tracks(
        self, 
        midi_file: MidiFile, 
        num_layers: int, 
        instrument_mappings: List[InstrumentMapping]
    ) -> List[MidiTrack]:
        """Create MIDI tracks with instrument assignments."""
        tracks = []
        
        for layer_idx in range(num_layers):
            track = MidiTrack()
            midi_file.tracks.append(track)
            tracks.append(track)
            
            # Get instrument mapping for this layer
            if layer_idx < len(instrument_mappings):
                mapping = instrument_mappings[layer_idx]
            else:
                # Default to piano if not enough mappings
                mapping = InstrumentMapping(0, MusicRole.MELODY, layer_idx % 16)
            
            # Set instrument
            track.append(Message(
                "program_change", 
                program=mapping.program, 
                time=0, 
                channel=mapping.channel
            ))
            
            # Add track name
            if mapping.name:
                track.append(mido.MetaMessage(
                    "track_name", 
                    name=f"{mapping.name} - {mapping.role.value}", 
                    time=0
                ))
        
        return tracks
    
    def _generate_notes(
        self,
        tracks: List[MidiTrack],
        hidden_states: List[np.ndarray],
        attentions: List[np.ndarray],
        scale: np.ndarray,
        num_tokens: int,
        instrument_mappings: List[InstrumentMapping]
    ) -> Dict[str, Any]:
        """Generate notes for all tracks."""
        current_time = [0] * len(tracks)
        notes_count = [0] * len(tracks)
        
        for token_idx in range(num_tokens):
            # Update time periodically
            if token_idx > 0 and token_idx % 4 == 0:
                for layer_idx in range(len(tracks)):
                    current_time[layer_idx] += self.config.base_tempo
            
            # Calculate panning
            pan = 64 + int(32 * np.sin(token_idx * math.pi / max(1, num_tokens)))
            
            # Generate notes for each layer
            for layer_idx in range(len(tracks)):
                if layer_idx >= len(instrument_mappings):
                    continue
                
                mapping = instrument_mappings[layer_idx]
                
                # Get attention and hidden state
                attn_matrix = attentions[min(layer_idx, len(attentions) - 1)][0, :, token_idx, :]
                attention_strength = float(np.mean(attn_matrix))
                layer_vec = hidden_states[layer_idx][0, token_idx]
                
                # Check if note should be played
                if not self.note_generator.should_play_note(
                    layer_idx, token_idx, attention_strength, 
                    layer_vec, num_tokens, mapping.role
                ):
                    continue
                
                # Generate notes
                notes_to_play = self.note_generator.generate_notes_for_role(
                    mapping.role, layer_vec, scale
                )
                
                # Calculate velocity and duration
                velocity = self.note_generator.calculate_velocity(
                    mapping.role, attention_strength
                )
                duration = self.note_generator.calculate_duration(
                    mapping.role, attn_matrix
                )
                
                # Add notes to track
                for note in notes_to_play:
                    note = max(21, min(108, int(note)))  # Clamp to piano range
                    
                    tracks[layer_idx].append(Message(
                        "note_on", 
                        note=note, 
                        velocity=velocity, 
                        time=current_time[layer_idx], 
                        channel=mapping.channel
                    ))
                    
                    tracks[layer_idx].append(Message(
                        "note_off", 
                        note=note, 
                        velocity=0, 
                        time=duration, 
                        channel=mapping.channel
                    ))
                    
                    current_time[layer_idx] = 0
                    notes_count[layer_idx] += 1
                
                # Set panning on first token
                if token_idx == 0:
                    tracks[layer_idx].append(Message(
                        "control_change", 
                        control=10, 
                        value=pan, 
                        time=0, 
                        channel=mapping.channel
                    ))
        
        return {
            "num_layers": len(tracks),
            "num_tokens": num_tokens,
            "notes_per_layer": notes_count,
            "total_notes": int(sum(notes_count)),
            "tempo_ticks_per_beat": int(self.config.base_tempo),
            "scale": list(map(int, scale.tolist())),
        }


# Main Orchestrator

class LLMForestOrchestra:
    """Main orchestrator class that coordinates the entire pipeline."""
    
    DEFAULT_MODEL = "unsloth/Qwen3-14B-Base"
    
    def __init__(self):
        """Initialize the orchestra."""
        self.scale_manager = ScaleManager()
        self.instrument_manager = InstrumentPresetManager()
        self.saved_configs: Dict[str, GenerationConfig] = {}
    
    def generate(
        self,
        text: str,
        model_name: str,
        compute_mode: str,
        base_tempo: int,
        velocity_range: Tuple[int, int],
        scale_name: str,
        custom_scale_notes: Optional[List[int]],
        num_layers: int,
        instrument_preset: str,
        seed: int,
        quantization_grid: int = 120,
        octave_range: int = 2,
        dynamics_curve: str = "linear"
    ) -> Tuple[str, Dict[str, Any]]:
        """Generate MIDI from text input."""
        # Get or create scale
        if scale_name == "Custom":
            if not custom_scale_notes:
                raise ValueError("Custom scale requires note list")
            scale = ScaleDefinition("Custom", custom_scale_notes)
        else:
            scale = self.scale_manager.get_scale(scale_name)
            if scale is None:
                raise ValueError(f"Unknown scale: {scale_name}")
        
        # Create configuration
        config = GenerationConfig(
            model_name=model_name or self.DEFAULT_MODEL,
            compute_mode=ComputeMode(compute_mode),
            base_tempo=base_tempo,
            velocity_range=velocity_range,
            scale=scale,
            num_layers_limit=num_layers,
            seed=seed,
            instrument_preset=instrument_preset,
            quantization_grid=quantization_grid,
            octave_range=octave_range,
            dynamics_curve=dynamics_curve
        )
        
        # Validate configuration
        config.validate()
        
        # Extract latents
        extractor = LatentExtractorFactory.create(config.compute_mode)
        latents = extractor.extract(text, config)
        
        # Render MIDI
        renderer = MIDIRenderer(config, self.instrument_manager)
        midi_bytes, metadata = renderer.render(latents)
        
        # Save MIDI file
        filename = f"llm_forest_orchestra_{uuid.uuid4().hex[:8]}.mid"
        with open(filename, "wb") as f:
            f.write(midi_bytes)
        
        return filename, metadata
    
    def save_config(self, name: str, config: GenerationConfig):
        """Save a configuration for later use."""
        self.saved_configs[name] = config
    
    def load_config(self, name: str) -> Optional[GenerationConfig]:
        """Load a saved configuration."""
        return self.saved_configs.get(name)
    
    def export_config(self, config: GenerationConfig, filepath: str):
        """Export configuration to JSON file."""
        with open(filepath, "w") as f:
            json.dump(config.to_dict(), f, indent=2)
    
    def import_config(self, filepath: str) -> GenerationConfig:
        """Import configuration from JSON file."""
        with open(filepath, "r") as f:
            data = json.load(f)
        return GenerationConfig.from_dict(data, self.scale_manager)


# Gradio UI

class GradioInterface:
    """Manages the Gradio user interface."""
    
    DESCRIPTION = """
    # 🌲 LLM Forest Orchestra — Sonify Transformer Internals
    
    Transform the hidden states and attention patterns of language models into multi-layered musical compositions.
    
    ## 🍄 Inspiration
    
    This project is inspired by the way **mushrooms and mycelial networks in forests**
    connect plants and trees, forming a living web of communication and resource sharing.
    These connections, can be turned into ethereal music.
    Just as signals move through these hidden connections, transformer models also
    pass hidden states and attentions across their layers. Here, those hidden
    connections are translated into **music**, analogous to the forest's secret orchestra.
    
    ## Features
    - **Two compute modes**: Full model (GPU) or Mock latents (CPU-friendly)
    - **Multiple musical scales**: From pentatonic to chromatic
    - **Instrument presets**: Orchestral, electronic, ensemble, and more
    - **Advanced controls**: Dynamics curves, quantization, velocity ranges
    - **Export**: Standard MIDI files for further editing in your DAW
    """
    
    EXAMPLE_TEXT = """Joy cascades in golden waterfalls, crashing into pools of melancholy blue.
    Anger burns red through veins of marble, while serenity floats on clouds of softest grey.
    Love pulses in waves of crimson and rose, intertwining with longing's purple haze.
    Each feeling resonates at its own frequency, painting music across the soul's canvas."""
    
    def __init__(self, orchestra: LLMForestOrchestra):
        """Initialize the interface."""
        self.orchestra = orchestra
    
    def create_interface(self) -> gr.Blocks:
        """Create the Gradio interface."""
        with gr.Blocks(title="LLM Forest Orchestra", theme=gr.themes.Soft()) as demo:
            gr.Markdown(self.DESCRIPTION)
            
            with gr.Tabs():
                with gr.TabItem("🎵 Generate Music"):
                    self._create_generation_tab()
            
            return demo
    
    def _create_generation_tab(self):
        """Create the main generation tab."""
        with gr.Row():
            with gr.Column(scale=1):
                text_input = gr.Textbox(
                    value=self.EXAMPLE_TEXT,
                    label="Input Text",
                    lines=8,
                    placeholder="Enter text to sonify..."
                )
                
                model_name = gr.Textbox(
                    value=self.orchestra.DEFAULT_MODEL,
                    label="Hugging Face Model",
                    info="Model must support output_hidden_states and output_attentions"
                )
                
                compute_mode = gr.Radio(
                    choices=["Full model", "Mock latents"],
                    value="Mock latents",
                    label="Compute Mode",
                    info="Mock latents for quick CPU-only demo"
                )
                
                with gr.Row():
                    instrument_preset = gr.Dropdown(
                        choices=self.orchestra.instrument_manager.list_presets(),
                        value="Ensemble (melody+bass+pad etc.)",
                        label="Instrument Preset"
                    )
                    
                    scale_choice = gr.Dropdown(
                        choices=self.orchestra.scale_manager.list_scales() + ["Custom"],
                        value="C pentatonic",
                        label="Musical Scale"
                    )
                
                custom_scale = gr.Textbox(
                    value="",
                    label="Custom Scale Notes",
                    placeholder="60,62,65,67,70",
                    visible=False
                )
                
                with gr.Row():
                    base_tempo = gr.Slider(
                        120, 960,
                        value=480,
                        step=1,
                        label="Tempo (ticks per beat)"
                    )
                    
                    num_layers = gr.Slider(
                        1, 6,
                        value=6,
                        step=1,
                        label="Max Layers"
                    )
                
                with gr.Row():
                    velocity_low = gr.Slider(
                        1, 126,
                        value=40,
                        step=1,
                        label="Min Velocity"
                    )
                    
                    velocity_high = gr.Slider(
                        2, 127,
                        value=90,
                        step=1,
                        label="Max Velocity"
                    )
                
                seed = gr.Number(
                    value=42,
                    precision=0,
                    label="Random Seed"
                )
                
                generate_btn = gr.Button(
                    "🎼 Generate MIDI",
                    variant="primary",
                    size="lg"
                )
            
            with gr.Column(scale=1):
                midi_output = gr.File(
                    label="Generated MIDI File",
                    file_types=[".mid", ".midi"]
                )
                
                stats_display = gr.Markdown(label="Quick Stats")
                
                metadata_json = gr.Code(
                    label="Metadata (JSON)",
                    language="json"
                )
                
                with gr.Row():
                    play_instructions = gr.Markdown(
                        """
                        ### 🎧 How to Play
                        1. Download the MIDI file
                        2. Open in any DAW or MIDI player
                        3. Adjust instruments and effects as desired
                        4. Export to audio format
                        """
                    )
        
        # Set up interactions
        def update_custom_scale_visibility(choice):
            return gr.update(visible=(choice == "Custom"))
        
        scale_choice.change(
            update_custom_scale_visibility,
            inputs=[scale_choice],
            outputs=[custom_scale]
        )
        
        def generate_wrapper(
            text, model_name, compute_mode, base_tempo,
            velocity_low, velocity_high, scale_choice,
            custom_scale, num_layers, instrument_preset, seed
        ):
            """Wrapper for generation with error handling."""
            try:
                # Parse custom scale if needed
                custom_notes = None
                if scale_choice == "Custom" and custom_scale:
                    custom_notes = [int(x.strip()) for x in custom_scale.split(",")]
                
                # Generate
                filename, metadata = self.orchestra.generate(
                    text=text,
                    model_name=model_name,
                    compute_mode=compute_mode,
                    base_tempo=int(base_tempo),
                    velocity_range=(int(velocity_low), int(velocity_high)),
                    scale_name=scale_choice,
                    custom_scale_notes=custom_notes,
                    num_layers=int(num_layers),
                    instrument_preset=instrument_preset,
                    seed=int(seed)
                )
                
                # Format stats
                stats = metadata.get("stats", {})
                stats_text = f"""
                ### Generation Statistics
                - **Layers Used**: {stats.get('num_layers', 'N/A')}
                - **Tokens Processed**: {stats.get('num_tokens', 'N/A')}
                - **Total Notes**: {stats.get('total_notes', 'N/A')}
                - **Notes per Layer**: {stats.get('notes_per_layer', [])}
                - **Scale**: {stats.get('scale', [])}
                - **Tempo**: {stats.get('tempo_ticks_per_beat', 'N/A')} ticks/beat
                """
                
                return filename, stats_text, json.dumps(metadata, indent=2)
                
            except Exception as e:
                error_msg = f"### ❌ Error\n{str(e)}"
                return None, error_msg, json.dumps({"error": str(e)}, indent=2)
        
        generate_btn.click(
            fn=generate_wrapper,
            inputs=[
                text_input, model_name, compute_mode, base_tempo,
                velocity_low, velocity_high, scale_choice,
                custom_scale, num_layers, instrument_preset, seed
            ],
            outputs=[midi_output, stats_display, metadata_json]
        )


# Main Entry Point

def main():
    """Main entry point for the application."""
    # Initialize orchestra
    orchestra = LLMForestOrchestra()
    
    # Create interface
    interface = GradioInterface(orchestra)
    demo = interface.create_interface()
    
    # Launch
    demo.launch()


if __name__ == "__main__":
    main()