MegaTronX commited on
Commit
0759571
·
verified ·
1 Parent(s): 1f87684

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +199 -23
app.py CHANGED
@@ -1,6 +1,10 @@
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
 
 
 
 
4
 
5
  class HunyuanTranslator:
6
  def __init__(self, model_name: str = "tencent/Hunyuan-MT-7B-fp8"):
@@ -13,28 +17,42 @@ class HunyuanTranslator:
13
  """Load the pre-quantized FP8 model"""
14
  print("Loading pre-quantized Hunyuan-MT FP8 model...")
15
 
16
- self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
17
-
18
- # Let transformers handle the FP8 quantization automatically
19
- self.model = AutoModelForCausalLM.from_pretrained(
20
- self.model_name,
21
- device_map="auto",
22
- # torch_dtype is handled automatically for quantized models
23
- trust_remote_code=True, # Important for custom models
24
- )
25
-
26
- print("FP8 model loaded successfully!")
27
- print(f"Model device: {self.model.device}")
28
- print(f"Model dtype: {next(self.model.parameters()).dtype}")
 
 
 
 
 
 
 
 
 
 
29
 
30
  def translate_ja_to_en(self, input_text: str) -> str:
31
  """Translate Japanese to English using FP8 model"""
32
- if not input_text.strip():
33
- return "Please enter Japanese text to translate."
 
 
 
 
34
 
35
  try:
36
  # Japanese to English specific prompt
37
- prompt = f"Translate the following Japanese text to English:\n\n{input_text}"
38
 
39
  messages = [{"role": "user", "content": prompt}]
40
 
@@ -54,19 +72,177 @@ class HunyuanTranslator:
54
  temperature=0.7,
55
  do_sample=True,
56
  top_p=0.9,
57
- repetition_penalty=1.1
 
 
58
  )
59
 
60
  # Decode output
61
  output_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
62
 
63
- # Extract translation (remove prompt)
64
- if prompt in output_text:
65
- output_text = output_text.replace(prompt, "").strip()
 
 
 
 
66
 
67
- return output_text
68
 
69
  except Exception as e:
70
- return f"Translation error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
- # Rest of the Gradio code remains the same...
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
+ import os
5
+
6
+ # Set cache directory for Spaces
7
+ os.environ['TRANSFORMERS_CACHE'] = '/tmp/cache'
8
 
9
  class HunyuanTranslator:
10
  def __init__(self, model_name: str = "tencent/Hunyuan-MT-7B-fp8"):
 
17
  """Load the pre-quantized FP8 model"""
18
  print("Loading pre-quantized Hunyuan-MT FP8 model...")
19
 
20
+ try:
21
+ self.tokenizer = AutoTokenizer.from_pretrained(
22
+ self.model_name,
23
+ cache_dir='/tmp/cache',
24
+ trust_remote_code=True
25
+ )
26
+
27
+ # Load the pre-quantized FP8 model - let transformers handle the quantization automatically
28
+ self.model = AutoModelForCausalLM.from_pretrained(
29
+ self.model_name,
30
+ device_map="auto",
31
+ trust_remote_code=True, # Important for custom models
32
+ cache_dir='/tmp/cache',
33
+ torch_dtype=torch.float16, # Use fp16 as fallback, model will use its native fp8 where available
34
+ )
35
+
36
+ print("FP8 model loaded successfully!")
37
+ print(f"Model device: {self.model.device}")
38
+ print(f"Model dtype: {next(self.model.parameters()).dtype}")
39
+
40
+ except Exception as e:
41
+ print(f"Error loading model: {e}")
42
+ raise
43
 
44
  def translate_ja_to_en(self, input_text: str) -> str:
45
  """Translate Japanese to English using FP8 model"""
46
+ if not input_text or input_text.strip() == "":
47
+ return "Please enter some Japanese text to translate."
48
+
49
+ # Limit input length for Spaces
50
+ if len(input_text) > 2000:
51
+ return "Input too long. Please keep under 2000 characters for this demo."
52
 
53
  try:
54
  # Japanese to English specific prompt
55
+ prompt = f"Translate the following Japanese text to English. Provide only the translation without additional explanations:\n\nJapanese: {input_text}\nEnglish:"
56
 
57
  messages = [{"role": "user", "content": prompt}]
58
 
 
72
  temperature=0.7,
73
  do_sample=True,
74
  top_p=0.9,
75
+ repetition_penalty=1.1,
76
+ pad_token_id=self.tokenizer.eos_token_id,
77
+ eos_token_id=self.tokenizer.eos_token_id
78
  )
79
 
80
  # Decode output
81
  output_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
82
 
83
+ # Extract translation (remove prompt and get only the English part)
84
+ if "English:" in output_text:
85
+ output_text = output_text.split("English:")[-1].strip()
86
+
87
+ # Clean up any remaining special tokens or markers
88
+ output_text = output_text.replace("<|endoftext|>", "").strip()
89
+ output_text = output_text.replace("</s>", "").strip()
90
 
91
+ return output_text if output_text else "No translation generated. Please try again."
92
 
93
  except Exception as e:
94
+ return f"Error during translation: {str(e)}"
95
+
96
+ def create_translation_interface():
97
+ """Create the Gradio interface optimized for Spaces"""
98
+
99
+ # Initialize translator
100
+ translator = HunyuanTranslator()
101
+
102
+ def translate_function(input_text):
103
+ """Wrapper function for Gradio"""
104
+ return translator.translate_ja_to_en(input_text)
105
+
106
+ # Custom CSS for better appearance on Spaces
107
+ custom_css = """
108
+ .gradio-container {
109
+ max-width: 900px !important;
110
+ margin: 0 auto;
111
+ }
112
+ .container {
113
+ max-width: 900px;
114
+ margin: auto;
115
+ padding: 20px;
116
+ }
117
+ .example-text {
118
+ font-size: 0.9em;
119
+ color: #666;
120
+ }
121
+ """
122
+
123
+ # Create Gradio interface optimized for Spaces
124
+ with gr.Blocks(
125
+ title="Japanese to English Translation - Hunyuan-MT FP8",
126
+ theme=gr.themes.Soft(),
127
+ css=custom_css
128
+ ) as demo:
129
+
130
+ gr.Markdown(
131
+ """
132
+ # 🇯🇵 → 🇺🇸 Japanese to English Translation
133
+ **Model:** `tencent/Hunyuan-MT-7B-fp8` (7B parameters, pre-quantized FP8)
134
+ **Specialization:** High-quality Japanese → English translation
135
+
136
+ *Enter Japanese text below and click Translate*
137
+ """
138
+ )
139
+
140
+ with gr.Row(equal_height=False):
141
+ with gr.Column(scale=1):
142
+ input_text = gr.Textbox(
143
+ label="Japanese Text Input",
144
+ placeholder="日本語のテキストを入力してください... (Enter Japanese text here)",
145
+ lines=5,
146
+ max_lines=8,
147
+ show_copy_button=True,
148
+ elem_id="input-text"
149
+ )
150
+
151
+ with gr.Row():
152
+ translate_btn = gr.Button(
153
+ "🚀 Translate",
154
+ variant="primary",
155
+ size="lg",
156
+ scale=2
157
+ )
158
+ clear_btn = gr.Button(
159
+ "🗑️ Clear",
160
+ variant="secondary",
161
+ size="lg",
162
+ scale=1
163
+ )
164
+
165
+ with gr.Column(scale=1):
166
+ output_text = gr.Textbox(
167
+ label="English Translation",
168
+ placeholder="Translation will appear here...",
169
+ lines=5,
170
+ max_lines=8,
171
+ show_copy_button=True,
172
+ elem_id="output-text"
173
+ )
174
+
175
+ # Examples section
176
+ gr.Markdown("### 💡 Try these examples:")
177
+ examples = gr.Examples(
178
+ examples=[
179
+ ["こんにちは、元気ですか?"],
180
+ ["今日は良い天気ですね。"],
181
+ ["機械学習と人工知能は現代技術の重要な分野です。"],
182
+ ["このレストランの料理はとても美味しいです。"],
183
+ ["明日の会議は午後二時から始まります。"],
184
+ ["日本の文化は非常に興味深いと思います。"]
185
+ ],
186
+ inputs=input_text,
187
+ outputs=output_text,
188
+ fn=translate_function,
189
+ cache_examples=True,
190
+ label="Click any example to try:"
191
+ )
192
+
193
+ # Connect the buttons
194
+ translate_btn.click(
195
+ fn=translate_function,
196
+ inputs=input_text,
197
+ outputs=output_text,
198
+ api_name="translate"
199
+ )
200
+
201
+ clear_btn.click(
202
+ fn=lambda: ("", ""),
203
+ inputs=[],
204
+ outputs=[input_text, output_text]
205
+ )
206
+
207
+ # Also allow Enter key to trigger translation
208
+ input_text.submit(
209
+ fn=translate_function,
210
+ inputs=input_text,
211
+ outputs=output_text
212
+ )
213
+
214
+ # Additional info
215
+ gr.Markdown(
216
+ """
217
+ ---
218
+ ### ℹ️ Usage Notes:
219
+ - **Model**: tencent/Hunyuan-MT-7B-fp8 (7B parameters, FP8 quantized)
220
+ - **Optimized** specifically for Japanese → English translation
221
+ - **Max input length**: ~2000 characters
222
+ - **Translation time**: Usually 10-30 seconds
223
+ - **Memory efficient**: Uses FP8 quantization for faster inference
224
+
225
+ ### 🛠️ Technical Details:
226
+ - Pre-quantized to FP8 (8-bit floating point)
227
+ - ~3-4GB memory footprint
228
+ - Optimized for GPU inference
229
+ - Supports long-form translation
230
+ """
231
+ )
232
+
233
+ return demo
234
+
235
+ # For Hugging Face Spaces compatibility
236
+ def get_space_app():
237
+ """Function that returns the Gradio app for Spaces"""
238
+ return create_translation_interface()
239
 
240
+ # Launch the app
241
+ if __name__ == "__main__":
242
+ demo = create_translation_interface()
243
+ demo.launch(
244
+ server_name="0.0.0.0",
245
+ server_port=7860,
246
+ share=False,
247
+ show_error=True
248
+ )