import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import torch import os # Set cache directory for Spaces os.environ['TRANSFORMERS_CACHE'] = '/tmp/cache' class HunyuanTranslator: def __init__(self, model_name: str = "tencent/Hunyuan-MT-7B-fp8"): self.model_name = model_name self.tokenizer = None self.model = None self._load_model() def _load_model(self): """Load the pre-quantized FP8 model""" print("Loading Hunyuan-MT FP8 model...") try: # Load tokenizer first self.tokenizer = AutoTokenizer.from_pretrained( self.model_name, cache_dir='/tmp/cache', trust_remote_code=True ) # For Compressed Tensors models, use the standard from_pretrained # The quantization is automatically handled by the model files self.model = AutoModelForCausalLM.from_pretrained( self.model_name, device_map="auto", torch_dtype=torch.float16, # Use fp16 as base dtype trust_remote_code=True, cache_dir='/tmp/cache' ) print("FP8 model loaded successfully!") print(f"Model device: {self.model.device}") print(f"Model dtype: {next(self.model.parameters()).dtype}") except Exception as e: print(f"Error loading model: {e}") raise Exception(f"Could not load the Hunyuan-MT model: {str(e)}") def translate_ja_to_en(self, input_text: str) -> str: """Translate Japanese to English using FP8 model""" if not input_text or input_text.strip() == "": return "Please enter some Japanese text to translate." # Limit input length for Spaces if len(input_text) > 1500: return "Input too long. Please keep under 1500 characters for this demo." try: # Clean and prepare the input text input_text = input_text.strip() # Create a clear translation prompt prompt = f"""Translate the following Japanese text to English. Provide only the translation without any additional explanations or notes. Japanese: {input_text} English:""" # Tokenize the input inputs = self.tokenizer( prompt, return_tensors="pt", truncation=True, max_length=1024 ) # Move inputs to the same device as model inputs = {k: v.to(self.model.device) for k, v in inputs.items()} # Generate translation with torch.no_grad(): outputs = self.model.generate( **inputs, max_new_tokens=512, temperature=0.7, do_sample=True, top_p=0.9, repetition_penalty=1.1, pad_token_id=self.tokenizer.eos_token_id, eos_token_id=self.tokenizer.eos_token_id, num_return_sequences=1 ) # Decode the output generated_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True) # Extract just the translation part (remove the prompt) if prompt in generated_text: translation = generated_text.replace(prompt, "").strip() else: # If prompt isn't found, try to extract after "English:" if "English:" in generated_text: translation = generated_text.split("English:")[-1].strip() else: translation = generated_text.strip() # Clean up the translation translation = translation.split('\n')[0].strip() # Take first line only translation = translation.replace('"', '').strip() return translation if translation else "No translation generated. Please try again." except Exception as e: return f"Error during translation: {str(e)}" def create_translation_interface(): """Create the Gradio interface for Japanese to English translation""" # Initialize translator try: translator = HunyuanTranslator() def translate_function(input_text): return translator.translate_ja_to_en(input_text) except Exception as e: print(f"Failed to initialize translator: {e}") def translate_function(input_text): return f"Model initialization failed: {str(e)}\n\nPlease check the Space logs for details." # Custom CSS for better appearance custom_css = """ .gradio-container { max-width: 900px !important; margin: 0 auto; } .container { max-width: 900px; margin: auto; padding: 20px; } .japanese-text { font-family: "Hiragino Sans", "Yu Gothic", "Meiryo", sans-serif; } .translation-box { border-left: 3px solid #4CAF50; padding-left: 15px; } """ # Create Gradio interface with gr.Blocks( title="Japanese to English Translation - Hunyuan-MT FP8", theme=gr.themes.Soft(), css=custom_css ) as demo: gr.Markdown( """ # đŸ‡¯đŸ‡ĩ → đŸ‡ē🇸 Japanese to English Translation **Model:** `tencent/Hunyuan-MT-7B-fp8` â€ĸ **Technology:** FP8 Quantization *Fast, high-quality Japanese to English translation using optimized FP8 model* """ ) with gr.Row(equal_height=False): with gr.Column(scale=1): gr.Markdown("### đŸ“Ĩ Japanese Input") input_text = gr.Textbox( label="", placeholder="æ—ĨæœŦčĒžãŽãƒ†ã‚­ã‚šãƒˆã‚’å…Ĩ力しãĻください...\n(Enter Japanese text here)", lines=6, max_lines=8, show_copy_button=True, elem_classes=["japanese-text"] ) with gr.Row(): translate_btn = gr.Button( "🚀 Translate", variant="primary", size="lg", scale=2 ) clear_btn = gr.Button( "đŸ—‘ī¸ Clear", variant="secondary", size="lg", scale=1 ) with gr.Column(scale=1): gr.Markdown("### 📤 English Translation") output_text = gr.Textbox( label="", placeholder="Translation will appear here...", lines=6, max_lines=8, show_copy_button=True, elem_classes=["translation-box"] ) # Examples section gr.Markdown("### 💡 Example Translations") examples = gr.Examples( examples=[ ["こんãĢãĄã¯ã€å…ƒæ°—ã§ã™ã‹īŧŸ"], ["ä슿—Ĩã¯č‰¯ã„å¤Šæ°—ã§ã™ã­ã€‚"], ["抟æĸ°å­Ļįŋ’とäēēåˇĨįŸĨčƒŊã¯įžäģŖæŠ€čĄ“ãŽé‡čρãĒ分野です。"], ["こぎãƒŦã‚šãƒˆãƒŠãƒŗãŽæ–™į†ã¯ã¨ãĻã‚‚įžŽå‘ŗã—ã„ã§ã™ã€‚"], ["明æ—Ĩぎäŧšč­°ã¯åˆåžŒä猿™‚から始ぞりぞす。"], ["æ—ĨæœŦぎ文化は非常ãĢčˆˆå‘ŗæˇąã„ã¨æ€ã„ãžã™ã€‚"], ["æ–°ã—ã„ãƒ—ãƒ­ã‚¸ã‚§ã‚¯ãƒˆãŽææĄˆæ›¸ã‚’äŊœæˆã—ぞした。"], ["é›ģčģŠãŽé…åģļãĢã‚ˆã‚Šã€åˆ°į€ãŒ30分ãģお遅れぞす。"] ], inputs=input_text, outputs=output_text, fn=translate_function, cache_examples=False, label="Click any example to try:" ) # Connect the buttons translate_btn.click( fn=translate_function, inputs=input_text, outputs=output_text, api_name="translate" ) clear_btn.click( fn=lambda: ("", ""), inputs=[], outputs=[input_text, output_text] ) # Also allow Enter key to trigger translation input_text.submit( fn=translate_function, inputs=input_text, outputs=output_text ) # Technical details gr.Markdown( """ --- ### đŸ› ī¸ Technical Information **Model Details:** - **Base Model**: Hunyuan-MT 7B - **Quantization**: FP8 (8-bit floating point) - **Memory Usage**: ~3-4GB - **Specialization**: Japanese ↔ English translation **Optimization Features:** - ✅ FP8 quantization for faster inference - ✅ GPU acceleration support - ✅ Efficient memory usage **Usage Tips:** - Keep inputs under 1500 characters for best results - Translation takes 5-15 seconds typically - Model works best with complete sentences - Handles technical and casual Japanese well """ ) return demo # For Hugging Face Spaces compatibility def get_space_app(): """Function that returns the Gradio app for Spaces""" return create_translation_interface() # Launch the app if __name__ == "__main__": demo = create_translation_interface() demo.launch( server_name="0.0.0.0", server_port=7860, share=False, show_error=True )