import gradio as gr import numpy as np import soundfile as sf import tempfile import os from kittentts import KittenTTS # Initialize the TTS model print("Loading Kitten TTS Mini model...") tts_model = KittenTTS("KittenML/kitten-tts-mini-0.1") print("Model loaded successfully!") # Available voices from the README AVAILABLE_VOICES = [ 'expr-voice-2-m', 'expr-voice-2-f', 'expr-voice-3-m', 'expr-voice-3-f', 'expr-voice-4-m', 'expr-voice-4-f', 'expr-voice-5-m', 'expr-voice-5-f' ] def generate_speech(text, voice): """Generate speech from text using Kitten TTS Mini""" if not text.strip(): return None, "Please enter some text to synthesize." # Check character limit if len(text) > 457: return None, f"❌ Text is too long ({len(text)} characters). Please limit to 457 characters or less." try: # Generate audio print(f"Generating audio for: '{text[:50]}...' with voice: {voice}") audio = tts_model.generate(text, voice=voice) # Create temporary file with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file: sf.write(tmp_file.name, audio, 24000) return tmp_file.name, f"✅ Successfully generated audio with {voice} ({len(text)} characters)" except Exception as e: error_msg = f"❌ Error generating audio: {str(e)}" print(error_msg) return None, error_msg def create_interface(): """Create the Gradio interface""" with gr.Blocks( title="🐱 Kitten TTS Mini", theme=gr.themes.Soft(), css=""" .main-header { text-align: center; margin-bottom: 2rem; } .info-box { background: var(--background-fill-secondary); color: var(--body-text-color); padding: 1rem; border-radius: 10px; border-left: 4px solid #4285f4; margin: 1rem 0; } .info-box h3, .info-box h4 { color: var(--body-text-color) !important; margin-top: 0; } .info-box ul, .info-box li, .info-box p { color: var(--body-text-color) !important; } .footer-box { background: var(--background-fill-secondary); color: var(--body-text-color); padding: 1rem; border-radius: 10px; margin: 2rem 0; text-align: center; } .footer-box p, .footer-box a { color: var(--body-text-color) !important; } .footer-box a:hover { color: #4285f4 !important; } """ ) as demo: # Header gr.HTML("""

🐱 Kitten TTS Mini 0.1

Open-source realistic text-to-speech with 80M parameters

""") # Info box gr.HTML("""

â„šī¸ About Kitten TTS Mini

""") with gr.Row(): with gr.Column(scale=2): # Input text text_input = gr.Textbox( label="📝 Text to Synthesize (max 457 characters)", placeholder="Enter the text you want to convert to speech...", lines=3, max_lines=10, max_length=457, show_label=True, info="Character limit: 457" ) # Voice selection voice_dropdown = gr.Dropdown( choices=AVAILABLE_VOICES, value='expr-voice-2-f', label="🎭 Voice Selection", info="Choose from available voices" ) # Generate button generate_btn = gr.Button( "đŸŽĩ Generate Speech", variant="primary", size="lg" ) with gr.Column(scale=1): # Voice descriptions gr.HTML("""

🎭 Voice Guide

Format: expr-voice-{number}-{gender}

""") # Output section with gr.Row(): with gr.Column(): status_output = gr.Textbox( label="📊 Status", interactive=False ) audio_output = gr.Audio( label="đŸŽĩ Generated Audio", type="filepath" ) # Example inputs gr.Examples( examples=[ ["Hello! This is Kitten TTS Mini, a high quality text-to-speech model.", "expr-voice-2-f"], ["Welcome to the world of open-source artificial intelligence and speech synthesis.", "expr-voice-3-m"], ["The quick brown fox jumps over the lazy dog. This sentence contains every letter of the alphabet.", "expr-voice-4-f"], ["Kitten TTS works without requiring a GPU, making it accessible for everyone to use.", "expr-voice-5-m"], ["Science and technology are advancing rapidly, bringing us closer to a better future.", "expr-voice-2-m"] ], inputs=[text_input, voice_dropdown], label="💡 Example Texts" ) # Footer gr.HTML(""" """) # Event handlers generate_btn.click( fn=generate_speech, inputs=[text_input, voice_dropdown], outputs=[audio_output, status_output], show_progress=True ) # Also allow Enter key to trigger generation text_input.submit( fn=generate_speech, inputs=[text_input, voice_dropdown], outputs=[audio_output, status_output], show_progress=True ) return demo if __name__ == "__main__": # Create and launch the interface demo = create_interface() demo.launch( server_name="0.0.0.0", server_port=7860, share=False, show_error=True )