import gradio as gr
import numpy as np
import threading
import time
from components.transcriber import AudioProcessor
from components.gpt import gen_llm_response
from components.streaming import StreamingManager, create_streaming_interface
from config import config

# Create processor instance with configuration-based device settings
processor = AudioProcessor(model_size="base.en")

# Adjust some settings for better quality
processor.min_process_length = 1 * processor.sample_rate  # Need at least 2 seconds before processing
processor.process_interval = 1  # Process at most every 1.5 seconds

# Create streaming manager
streaming_manager = StreamingManager(processor)

def process_mic_audio(audio):
    """Process audio from Gradio microphone and update transcription"""
    if audio is None:
        return gr.update(), gr.update(), gr.update()

    sr, y = audio

    # Add to processor and possibly trigger transcription
    buffer_size = processor.add_audio(y, sr)

    # Wait for any pending processing to complete before getting transcription
    processor.wait_for_processing_complete(1.0)

    # Get current transcription
    transcription = processor.get_transcription()

    # Send transcription to LLM and get response
    llm_response = ""
    if transcription and len(transcription) > 0:
        llm_response = gen_llm_response(transcription)

    # Return status update, original transcription, and LLM response
    buffer_seconds = buffer_size / processor.sample_rate
    return (
        f"Buffer: {buffer_seconds:.1f}s | Processed: {processor.processed_length/processor.sample_rate:.1f}s",
        transcription,
        llm_response
    )

def clear_audio_buffer():
    """Clear the audio buffer"""
    return processor.clear_buffer(), gr.update(), "", ""

def get_current_buffer():
    """Get the current buffer for playback"""
    return processor.get_playback_audio()

def force_transcribe():
    """Force transcription of current buffer"""
    # Force complete processing of all remaining audio
    transcription = processor.force_complete_processing()

    # Send to LLM and get response
    llm_response = ""
    if transcription and len(transcription) > 0:
        llm_response = gen_llm_response(transcription)

    return transcription, llm_response

# Create Gradio interface
with gr.Blocks(title="Live Speech Transcription") as demo:
    device_info = config.get_device_info()
    device_status = f"🖥️ **Device:** {device_info['device'].upper()}"
    if device_info['cuda_available'] and device_info['device'] == 'cuda':
        device_status += f" | **GPU:** {device_info.get('cuda_device_name', 'Unknown')}"

    gr.Markdown(f"# Live Speech Recognition with LLM Response\n{device_status}")

    with gr.Row():
        audio_input = gr.Audio(sources=["microphone"], streaming=True, label="Microphone Input")

    with gr.Row():
        status_output = gr.Textbox(label="Buffer Status", interactive=False)
        buffer_audio = gr.Audio(label="Current Buffer (Click to Play)", interactive=False)

    with gr.Row():
        clear_btn = gr.Button("Clear Buffer")
        play_btn = gr.Button("Get Buffer for Playback")
        force_btn = gr.Button("Force Transcribe")

    with gr.Row():
        with gr.Column():
            transcription_display = gr.Textbox(label="Live Transcription", lines=5, interactive=False)
        with gr.Column():
            llm_response_display = gr.Textbox(label="LLM Response", lines=5, interactive=False)

    # Create streaming interface
    streaming_components = create_streaming_interface(streaming_manager)

    # Connect main interface components
    audio_input.stream(
        process_mic_audio,
        audio_input,
        [status_output, streaming_components['transcription_output'], streaming_components['llm_output']]
    )

    clear_btn.click(
        clear_audio_buffer,
        None,
        [status_output, buffer_audio, streaming_components['transcription_output'], streaming_components['llm_output']]
    )
    play_btn.click(get_current_buffer, None, buffer_audio)
    force_btn.click(
        force_transcribe,
        None,
        [streaming_components['transcription_output'], streaming_components['llm_output']]
    )

if __name__ == "__main__":
    print("🎤 Live Speech Transcription App with LLM")
    print("=" * 40)

    # Display device configuration
    device_info = config.get_device_info()
    print("🔧 Configuration:")
    print(f"   Device: {device_info['device'].upper()}")
    print(f"   Compute type: {device_info['compute_type']}")
    print(f"   CUDA available: {device_info['cuda_available']}")
    if device_info['cuda_available'] and device_info['device'] == 'cuda':
        print(f"   GPU: {device_info.get('cuda_device_name', 'Unknown')}")
        memory_gb = device_info.get('cuda_memory_total', 0) / (1024**3)
        print(f"   GPU Memory: {memory_gb:.1f} GB")

    print("\nFeatures:")
    print("• Real-time microphone transcription")
    print("• Audio buffer playback")
    print("• LLM responses displayed in UI")
    print("• RoBERTa+ hybrid question detection")

    # Launch the interface
    demo.launch()