Spaces:
Running
Running
| import gradio as gr | |
| import numpy as np | |
| import soundfile as sf | |
| import tempfile | |
| import os | |
| from kittentts import KittenTTS | |
| # Initialize the TTS model | |
| print("Loading Kitten TTS Mini model...") | |
| tts_model = KittenTTS("KittenML/kitten-tts-mini-0.1") | |
| print("Model loaded successfully!") | |
| # Available voices from the README | |
| AVAILABLE_VOICES = [ | |
| 'expr-voice-2-m', | |
| 'expr-voice-2-f', | |
| 'expr-voice-3-m', | |
| 'expr-voice-3-f', | |
| 'expr-voice-4-m', | |
| 'expr-voice-4-f', | |
| 'expr-voice-5-m', | |
| 'expr-voice-5-f' | |
| ] | |
| def generate_speech(text, voice): | |
| """Generate speech from text using Kitten TTS Mini""" | |
| if not text.strip(): | |
| return None, "Please enter some text to synthesize." | |
| # Check character limit | |
| if len(text) > 457: | |
| return None, f"β Text is too long ({len(text)} characters). Please limit to 457 characters or less." | |
| try: | |
| # Generate audio | |
| print(f"Generating audio for: '{text[:50]}...' with voice: {voice}") | |
| audio = tts_model.generate(text, voice=voice) | |
| # Create temporary file | |
| with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file: | |
| sf.write(tmp_file.name, audio, 24000) | |
| return tmp_file.name, f"β Successfully generated audio with {voice} ({len(text)} characters)" | |
| except Exception as e: | |
| error_msg = f"β Error generating audio: {str(e)}" | |
| print(error_msg) | |
| return None, error_msg | |
| def create_interface(): | |
| """Create the Gradio interface""" | |
| with gr.Blocks( | |
| title="π± Kitten TTS Mini", | |
| theme=gr.themes.Soft(), | |
| css=""" | |
| .main-header { | |
| text-align: center; | |
| margin-bottom: 2rem; | |
| } | |
| .info-box { | |
| background: var(--background-fill-secondary); | |
| color: var(--body-text-color); | |
| padding: 1rem; | |
| border-radius: 10px; | |
| border-left: 4px solid #4285f4; | |
| margin: 1rem 0; | |
| } | |
| .info-box h3, .info-box h4 { | |
| color: var(--body-text-color) !important; | |
| margin-top: 0; | |
| } | |
| .info-box ul, .info-box li, .info-box p { | |
| color: var(--body-text-color) !important; | |
| } | |
| .footer-box { | |
| background: var(--background-fill-secondary); | |
| color: var(--body-text-color); | |
| padding: 1rem; | |
| border-radius: 10px; | |
| margin: 2rem 0; | |
| text-align: center; | |
| } | |
| .footer-box p, .footer-box a { | |
| color: var(--body-text-color) !important; | |
| } | |
| .footer-box a:hover { | |
| color: #4285f4 !important; | |
| } | |
| """ | |
| ) as demo: | |
| # Header | |
| gr.HTML(""" | |
| <div class="main-header"> | |
| <h1>π± Kitten TTS Mini 0.1</h1> | |
| <p>Open-source realistic text-to-speech with 80M parameters</p> | |
| </div> | |
| """) | |
| # Info box | |
| gr.HTML(""" | |
| <div class="info-box"> | |
| <h3>βΉοΈ About Kitten TTS Mini</h3> | |
| <ul> | |
| <li><strong>Parameters:</strong> 80 million</li> | |
| <li><strong>File size:</strong> ~170MB</li> | |
| <li><strong>Sample rate:</strong> 24kHz</li> | |
| <li><strong>Voices:</strong> 8 different voices (male & female)</li> | |
| </ul> | |
| </div> | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| # Input text | |
| text_input = gr.Textbox( | |
| label="π Text to Synthesize (max 457 characters)", | |
| placeholder="Enter the text you want to convert to speech...", | |
| lines=3, | |
| max_lines=10, | |
| max_length=457, | |
| show_label=True, | |
| info="Character limit: 457" | |
| ) | |
| # Voice selection | |
| voice_dropdown = gr.Dropdown( | |
| choices=AVAILABLE_VOICES, | |
| value='expr-voice-2-f', | |
| label="π Voice Selection", | |
| info="Choose from available voices" | |
| ) | |
| # Generate button | |
| generate_btn = gr.Button( | |
| "π΅ Generate Speech", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| with gr.Column(scale=1): | |
| # Voice descriptions | |
| gr.HTML(""" | |
| <div class="info-box"> | |
| <h4>π Voice Guide</h4> | |
| <p><strong>Format:</strong> expr-voice-{number}-{gender}</p> | |
| <ul> | |
| <li><strong>Numbers 2-5:</strong> Different voice styles</li> | |
| <li><strong>m:</strong> Male voices</li> | |
| <li><strong>f:</strong> Female voices</li> | |
| </ul> | |
| </div> | |
| """) | |
| # Output section | |
| with gr.Row(): | |
| with gr.Column(): | |
| status_output = gr.Textbox( | |
| label="π Status", | |
| interactive=False | |
| ) | |
| audio_output = gr.Audio( | |
| label="π΅ Generated Audio", | |
| type="filepath" | |
| ) | |
| # Example inputs | |
| gr.Examples( | |
| examples=[ | |
| ["Hello! This is Kitten TTS Mini, a high quality text-to-speech model.", "expr-voice-2-f"], | |
| ["Welcome to the world of open-source artificial intelligence and speech synthesis.", "expr-voice-3-m"], | |
| ["The quick brown fox jumps over the lazy dog. This sentence contains every letter of the alphabet.", "expr-voice-4-f"], | |
| ["Kitten TTS works without requiring a GPU, making it accessible for everyone to use.", "expr-voice-5-m"], | |
| ["Science and technology are advancing rapidly, bringing us closer to a better future.", "expr-voice-2-m"] | |
| ], | |
| inputs=[text_input, voice_dropdown], | |
| label="π‘ Example Texts" | |
| ) | |
| # Footer | |
| gr.HTML(""" | |
| <div class="footer-box"> | |
| <p><strong>π± Kitten TTS Mini</strong> | Built with β€οΈ by the KittenML team</p> | |
| <p>Based on StyleTTS 2 architecture | Licensed under Apache 2.0</p> | |
| <p><a href="https://huggingface.co/KittenML/kitten-tts-mini-0.1" target="_blank">Model Card</a> | | |
| <a href="https://github.com/KittenML/KittenTTS" target="_blank">GitHub</a></p> | |
| </div> | |
| """) | |
| # Event handlers | |
| generate_btn.click( | |
| fn=generate_speech, | |
| inputs=[text_input, voice_dropdown], | |
| outputs=[audio_output, status_output], | |
| show_progress=True | |
| ) | |
| # Also allow Enter key to trigger generation | |
| text_input.submit( | |
| fn=generate_speech, | |
| inputs=[text_input, voice_dropdown], | |
| outputs=[audio_output, status_output], | |
| show_progress=True | |
| ) | |
| return demo | |
| if __name__ == "__main__": | |
| # Create and launch the interface | |
| demo = create_interface() | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False, | |
| show_error=True | |
| ) |