Spaces:

MegaTronX
/

Hunyuan7B-fp8-Japanese-Translate

Running

App Files Files Community

MegaTronX commited on Nov 5

Commit

0759571

verified ·

1 Parent(s): 1f87684

Update app.py

Browse files

Files changed (1) hide show

app.py +199 -23

app.py CHANGED Viewed

@@ -1,6 +1,10 @@
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 class HunyuanTranslator:
     def __init__(self, model_name: str = "tencent/Hunyuan-MT-7B-fp8"):
@@ -13,28 +17,42 @@ class HunyuanTranslator:
         """Load the pre-quantized FP8 model"""
         print("Loading pre-quantized Hunyuan-MT FP8 model...")
-        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
-        # Let transformers handle the FP8 quantization automatically
-        self.model = AutoModelForCausalLM.from_pretrained(
-            self.model_name,
-            device_map="auto",
-            # torch_dtype is handled automatically for quantized models
-            trust_remote_code=True,  # Important for custom models
-        )
-        print("FP8 model loaded successfully!")
-        print(f"Model device: {self.model.device}")
-        print(f"Model dtype: {next(self.model.parameters()).dtype}")
     def translate_ja_to_en(self, input_text: str) -> str:
         """Translate Japanese to English using FP8 model"""
-        if not input_text.strip():
-            return "Please enter Japanese text to translate."
         try:
             # Japanese to English specific prompt
-            prompt = f"Translate the following Japanese text to English:\n\n{input_text}"
             messages = [{"role": "user", "content": prompt}]
@@ -54,19 +72,177 @@ class HunyuanTranslator:
                     temperature=0.7,
                     do_sample=True,
                     top_p=0.9,
-                    repetition_penalty=1.1
                 )
             # Decode output
             output_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-            # Extract translation (remove prompt)
-            if prompt in output_text:
-                output_text = output_text.replace(prompt, "").strip()
-            return output_text
         except Exception as e:
-            return f"Translation error: {str(e)}"
-# Rest of the Gradio code remains the same...

 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
+import os
+# Set cache directory for Spaces
+os.environ['TRANSFORMERS_CACHE'] = '/tmp/cache'
 class HunyuanTranslator:
     def __init__(self, model_name: str = "tencent/Hunyuan-MT-7B-fp8"):
         """Load the pre-quantized FP8 model"""
         print("Loading pre-quantized Hunyuan-MT FP8 model...")
+        try:
+            self.tokenizer = AutoTokenizer.from_pretrained(
+                self.model_name,
+                cache_dir='/tmp/cache',
+                trust_remote_code=True
+            )
+            # Load the pre-quantized FP8 model - let transformers handle the quantization automatically
+            self.model = AutoModelForCausalLM.from_pretrained(
+                self.model_name,
+                device_map="auto",
+                trust_remote_code=True,  # Important for custom models
+                cache_dir='/tmp/cache',
+                torch_dtype=torch.float16,  # Use fp16 as fallback, model will use its native fp8 where available
+            )
+            print("FP8 model loaded successfully!")
+            print(f"Model device: {self.model.device}")
+            print(f"Model dtype: {next(self.model.parameters()).dtype}")
+        except Exception as e:
+            print(f"Error loading model: {e}")
+            raise
     def translate_ja_to_en(self, input_text: str) -> str:
         """Translate Japanese to English using FP8 model"""
+        if not input_text or input_text.strip() == "":
+            return "Please enter some Japanese text to translate."
+        # Limit input length for Spaces
+        if len(input_text) > 2000:
+            return "Input too long. Please keep under 2000 characters for this demo."
         try:
             # Japanese to English specific prompt
+            prompt = f"Translate the following Japanese text to English. Provide only the translation without additional explanations:\n\nJapanese: {input_text}\nEnglish:"
             messages = [{"role": "user", "content": prompt}]
                     temperature=0.7,
                     do_sample=True,
                     top_p=0.9,
+                    repetition_penalty=1.1,
+                    pad_token_id=self.tokenizer.eos_token_id,
+                    eos_token_id=self.tokenizer.eos_token_id
                 )
             # Decode output
             output_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            # Extract translation (remove prompt and get only the English part)
+            if "English:" in output_text:
+                output_text = output_text.split("English:")[-1].strip()
+            # Clean up any remaining special tokens or markers
+            output_text = output_text.replace("<|endoftext|>", "").strip()
+            output_text = output_text.replace("</s>", "").strip()
+            return output_text if output_text else "No translation generated. Please try again."
         except Exception as e:
+            return f"Error during translation: {str(e)}"
+def create_translation_interface():
+    """Create the Gradio interface optimized for Spaces"""
+    # Initialize translator
+    translator = HunyuanTranslator()
+    def translate_function(input_text):
+        """Wrapper function for Gradio"""
+        return translator.translate_ja_to_en(input_text)
+    # Custom CSS for better appearance on Spaces
+    custom_css = """
+    .gradio-container {
+        max-width: 900px !important;
+        margin: 0 auto;
+    }
+    .container {
+        max-width: 900px;
+        margin: auto;
+        padding: 20px;
+    }
+    .example-text {
+        font-size: 0.9em;
+        color: #666;
+    }
+    """
+    # Create Gradio interface optimized for Spaces
+    with gr.Blocks(
+        title="Japanese to English Translation - Hunyuan-MT FP8",
+        theme=gr.themes.Soft(),
+        css=custom_css
+    ) as demo:
+        gr.Markdown(
+            """
+            # 🇯🇵 → 🇺🇸 Japanese to English Translation
+            **Model:** `tencent/Hunyuan-MT-7B-fp8` (7B parameters, pre-quantized FP8)
+            **Specialization:** High-quality Japanese → English translation
+            *Enter Japanese text below and click Translate*
+            """
+        )
+        with gr.Row(equal_height=False):
+            with gr.Column(scale=1):
+                input_text = gr.Textbox(
+                    label="Japanese Text Input",
+                    placeholder="日本語のテキストを入力してください... (Enter Japanese text here)",
+                    lines=5,
+                    max_lines=8,
+                    show_copy_button=True,
+                    elem_id="input-text"
+                )
+                with gr.Row():
+                    translate_btn = gr.Button(
+                        "🚀 Translate",
+                        variant="primary",
+                        size="lg",
+                        scale=2
+                    )
+                    clear_btn = gr.Button(
+                        "🗑️ Clear",
+                        variant="secondary",
+                        size="lg",
+                        scale=1
+                    )
+            with gr.Column(scale=1):
+                output_text = gr.Textbox(
+                    label="English Translation",
+                    placeholder="Translation will appear here...",
+                    lines=5,
+                    max_lines=8,
+                    show_copy_button=True,
+                    elem_id="output-text"
+                )
+        # Examples section
+        gr.Markdown("### 💡 Try these examples:")
+        examples = gr.Examples(
+            examples=[
+                ["こんにちは、元気ですか？"],
+                ["今日は良い天気ですね。"],
+                ["機械学習と人工知能は現代技術の重要な分野です。"],
+                ["このレストランの料理はとても美味しいです。"],
+                ["明日の会議は午後二時から始まります。"],
+                ["日本の文化は非常に興味深いと思います。"]
+            ],
+            inputs=input_text,
+            outputs=output_text,
+            fn=translate_function,
+            cache_examples=True,
+            label="Click any example to try:"
+        )
+        # Connect the buttons
+        translate_btn.click(
+            fn=translate_function,
+            inputs=input_text,
+            outputs=output_text,
+            api_name="translate"
+        )
+        clear_btn.click(
+            fn=lambda: ("", ""),
+            inputs=[],
+            outputs=[input_text, output_text]
+        )
+        # Also allow Enter key to trigger translation
+        input_text.submit(
+            fn=translate_function,
+            inputs=input_text,
+            outputs=output_text
+        )
+        # Additional info
+        gr.Markdown(
+            """
+            ---
+            ### ℹ️ Usage Notes:
+            - **Model**: tencent/Hunyuan-MT-7B-fp8 (7B parameters, FP8 quantized)
+            - **Optimized** specifically for Japanese → English translation
+            - **Max input length**: ~2000 characters
+            - **Translation time**: Usually 10-30 seconds
+            - **Memory efficient**: Uses FP8 quantization for faster inference
+            ### 🛠️ Technical Details:
+            - Pre-quantized to FP8 (8-bit floating point)
+            - ~3-4GB memory footprint
+            - Optimized for GPU inference
+            - Supports long-form translation
+            """
+        )
+    return demo
+# For Hugging Face Spaces compatibility
+def get_space_app():
+    """Function that returns the Gradio app for Spaces"""
+    return create_translation_interface()
+# Launch the app
+if __name__ == "__main__":
+    demo = create_translation_interface()
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        show_error=True
+    )