Lesterchia174 commited on
Commit
f1dffb2
·
verified ·
1 Parent(s): 82c8222

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +351 -0
app.py ADDED
@@ -0,0 +1,351 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import tempfile
4
+ import whisper
5
+ import re
6
+ from groq import Groq
7
+ from gtts import gTTS
8
+
9
+ # Load the local Whisper model for speech-to-text
10
+ whisper_model = whisper.load_model("base")
11
+
12
+ # Instantiate Groq client with API key
13
+ groq_client = Groq(api_key=os.getenv("GROQ_API_KEY"))
14
+
15
+ # Supported languages (separated Malaysian Malay & Indonesian Malay)
16
+ SUPPORTED_LANGUAGES = [
17
+ "English", "Chinese", "Thai",
18
+ "Malaysian Malay", "Indonesian Malay", # Split into two entries
19
+ "Korean", "Japanese", "Spanish", "German",
20
+ "Hindi", "Urdu", "French", "Russian",
21
+ "Tagalog", "Arabic", "Myanmar", "Vietnamese",
22
+ "Khmer"
23
+ ]
24
+
25
+ LANGUAGE_CODES = {
26
+ "English": "en", "Chinese": "zh", "Thai": "th",
27
+ "Malaysian Malay": "ms", # Bahasa Malaysia (ms)
28
+ "Indonesian Malay": "id", # Bahasa Indonesia (id)
29
+ "Korean": "ko", "Japanese": "ja", "Spanish": "es",
30
+ "German": "de", "Hindi": "hi", "Urdu": "ur",
31
+ "French": "fr", "Russian": "ru", "Tagalog": "tl",
32
+ "Arabic": "ar", "Myanmar": "my", "Vietnamese": "vi",
33
+ "Khmer": "km" # Added Khmer language code (km)
34
+ }
35
+
36
+ # Available LLM models
37
+ AVAILABLE_MODELS = {
38
+ "Qwen3 32B": "qwen/qwen3-32b",
39
+ "kimi-k2": "moonshotai/kimi-k2-instruct-0905",
40
+ "Llama-3.3 70B": "llama-3.3-70b-versatile",
41
+ "Llama-3.1 instant 8B": "llama-3.1-8b-instant",
42
+ "Llama-4 guard 12B": "meta-llama/llama-guard-4-12b"
43
+ }
44
+
45
+ def transcribe_audio_locally(audio):
46
+ """Transcribe audio using local Whisper model"""
47
+ if audio is None:
48
+ return ""
49
+
50
+ try:
51
+ audio_path = audio
52
+ result = whisper_model.transcribe(audio_path)
53
+ return result["text"]
54
+ except Exception as e:
55
+ print(f"Error transcribing audio locally: {e}")
56
+ return f"Error transcribing audio: {str(e)}"
57
+
58
+ def translate_text(input_text, input_lang, output_langs, model_name):
59
+ """Translate text using Groq's API with the selected model"""
60
+ if not input_text or not output_langs:
61
+ return ""
62
+
63
+ try:
64
+ # Get the actual model ID from our dictionary
65
+ model_id = AVAILABLE_MODELS.get(model_name, "qwen/qwen3-32b")
66
+
67
+ # Using a more direct instruction to avoid exposing the thinking process
68
+ system_prompt = """You are a translation assistant that provides direct, accurate translations.
69
+ Do NOT include any thinking, reasoning, or explanations in your response.
70
+ Do NOT use phrases like 'In [language]:', 'Translation:' or similar prefixes.
71
+ Do NOT use any special formatting like asterisks (**) or other markdown.
72
+ Always respond with ONLY the exact translation text itself."""
73
+
74
+ user_prompt = f"Translate this {input_lang} text: '{input_text}' into the following languages: {', '.join(output_langs)}. Provide each translation on a separate line with the language name as a prefix. Do not use any special formatting or markdown."
75
+
76
+ response = groq_client.chat.completions.create(
77
+ model=model_id,
78
+ messages=[
79
+ {"role": "system", "content": system_prompt},
80
+ {"role": "user", "content": user_prompt}
81
+ ]
82
+ )
83
+
84
+ translation_text = response.choices[0].message.content.strip()
85
+
86
+ # Remove any "thinking" patterns or COT that might have leaked through
87
+ translation_text = re.sub(r'<think>.*?</think>', '', translation_text, flags=re.DOTALL)
88
+ translation_text = translation_text.replace('**', '')
89
+
90
+ # Remove any line starting with common thinking patterns
91
+ thinking_patterns = [
92
+ r'^\s*Let me think.*$',
93
+ r'^\s*I need to.*$',
94
+ r'^\s*First,.*$',
95
+ r'^\s*Okay, so.*$',
96
+ r'^\s*Hmm,.*$',
97
+ r'^\s*Let\'s break this down.*$'
98
+ ]
99
+
100
+ for pattern in thinking_patterns:
101
+ translation_text = re.sub(pattern, '', translation_text, flags=re.MULTILINE)
102
+
103
+ return translation_text
104
+ except Exception as e:
105
+ print(f"Error translating text: {e}")
106
+ return f"Error: {str(e)}"
107
+
108
+ def synthesize_speech(text, lang):
109
+ """Generate speech from text"""
110
+ if not text:
111
+ return None
112
+
113
+ try:
114
+ lang_code = LANGUAGE_CODES.get(lang, "en")
115
+ tts = gTTS(text=text, lang=lang_code)
116
+
117
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
118
+ tts.save(fp.name)
119
+ return fp.name
120
+ except Exception as e:
121
+ print(f"Error synthesizing speech: {e}")
122
+ return None
123
+
124
+ def clear_all():
125
+ """Clear all fields"""
126
+ return [""] * 4 + [None] * 3
127
+
128
+ def process_speech_to_text(audio):
129
+ """Process audio and return the transcribed text"""
130
+ if not audio:
131
+ return ""
132
+
133
+ transcribed_text = transcribe_audio_locally(audio)
134
+ return transcribed_text
135
+
136
+ def clean_translation_output(text):
137
+ """Clean translation output to remove any thinking or processing text"""
138
+ if not text:
139
+ return ""
140
+
141
+ # Remove any meta-content or thinking
142
+ text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL)
143
+ text = text.replace('**', '')
144
+ text = text.replace('*', '')
145
+
146
+ # Remove lines that appear to be thinking/reasoning
147
+ lines = text.split('\n')
148
+ cleaned_lines = []
149
+
150
+ for line in lines:
151
+ # Skip lines that look like thinking
152
+ if re.search(r'(^I need to|^Let me|^First|^Okay|^Hmm|^I will|^I am thinking|^I should)', line, re.IGNORECASE):
153
+ continue
154
+
155
+ # Keep translations with language names
156
+ if ':' in line and any(lang.lower() in line.lower() for lang in SUPPORTED_LANGUAGES):
157
+ cleaned_lines.append(line)
158
+ # Or keep direct translations without prefixes if they don't look like thinking
159
+ elif line.strip() and not re.search(r'(thinking|translating|understand|process)', line, re.IGNORECASE):
160
+ cleaned_lines.append(line)
161
+
162
+ return '\n'.join(cleaned_lines)
163
+
164
+ def extract_translations(translations_text, output_langs):
165
+ """Extract clean translations from the model output"""
166
+ if not translations_text or not output_langs:
167
+ return [""] * 3
168
+
169
+ # Clean the translations text first
170
+ clean_text = clean_translation_output(translations_text)
171
+
172
+ # Try to match language patterns
173
+ translation_results = []
174
+
175
+ # First try to find language-labeled translations
176
+ for lang in output_langs:
177
+ pattern = rf'{re.escape(lang)}[\s]*:[\s]*(.*?)(?=\n\s*[A-Z]|$)'
178
+ match = re.search(pattern, clean_text, re.IGNORECASE | re.DOTALL)
179
+ if match:
180
+ translation_results.append(match.group(1).strip())
181
+
182
+ # If we couldn't find labeled translations, just split by lines
183
+ if not translation_results and '\n' in clean_text:
184
+ lines = [line.strip() for line in clean_text.split('\n') if line.strip()]
185
+
186
+ for line in lines:
187
+ # Check if this line has a language prefix
188
+ if ':' in line:
189
+ parts = line.split(':', 1)
190
+ if len(parts) == 2:
191
+ translation_results.append(parts[1].strip())
192
+ else:
193
+ # Just add the line as is if it seems like a translation
194
+ translation_results.append(line)
195
+ elif not translation_results:
196
+ # If no newlines, just use the whole text
197
+ translation_results.append(clean_text)
198
+
199
+ # Ensure we have exactly 3 results
200
+ while len(translation_results) < 3:
201
+ translation_results.append("")
202
+
203
+ return translation_results[:3]
204
+
205
+ def perform_translation(audio, typed_text, input_lang, output_langs, model_name):
206
+ """Main function to handle translation process"""
207
+ # Check if we have valid inputs
208
+ if not output_langs:
209
+ return [typed_text] + [""] * 3 + [None] * 3
210
+
211
+ # Limit to 3 output languages
212
+ selected_langs = output_langs[:3]
213
+
214
+ # Get the input text either from typed text or by transcribing audio
215
+ input_text = typed_text
216
+ if not input_text and audio:
217
+ input_text = transcribe_audio_locally(audio)
218
+
219
+ if not input_text:
220
+ return [""] * 4 + [None] * 3
221
+
222
+ # Get translations using the selected model
223
+ translations_text = translate_text(input_text, input_lang, selected_langs, model_name)
224
+
225
+ # Extract clean translations
226
+ translation_results = extract_translations(translations_text, selected_langs)
227
+
228
+ # Generate speech for each valid translation
229
+ audio_paths = []
230
+ for i, (trans, lang) in enumerate(zip(translation_results, selected_langs)):
231
+ if trans and lang:
232
+ audio_path = synthesize_speech(trans, lang)
233
+ audio_paths.append(audio_path)
234
+ else:
235
+ audio_paths.append(None)
236
+
237
+ # Ensure we have exactly 3 audio paths
238
+ while len(audio_paths) < 3:
239
+ audio_paths.append(None)
240
+
241
+ # Return results in the expected format
242
+ return [input_text] + translation_results + audio_paths
243
+
244
+ # Create the Gradio interface
245
+ with gr.Blocks(title="Multilingual Translator") as demo:
246
+ gr.Markdown("## 🌍 Multilingual Translator with Speech Support")
247
+
248
+ with gr.Row():
249
+ with gr.Column():
250
+ input_lang = gr.Dropdown(
251
+ choices=SUPPORTED_LANGUAGES,
252
+ value="English",
253
+ label="Input Language"
254
+ )
255
+ output_langs = gr.CheckboxGroup(
256
+ choices=SUPPORTED_LANGUAGES,
257
+ label="Output Languages (select up to 3)",
258
+ max_choices=3
259
+ )
260
+ model_selector = gr.Dropdown(
261
+ choices=list(AVAILABLE_MODELS.keys()),
262
+ value="Qwen3 32B",
263
+ label="Translation Model"
264
+ )
265
+
266
+ with gr.Row():
267
+ with gr.Column():
268
+ audio_input = gr.Audio(
269
+ sources=["microphone", "upload"],
270
+ type="filepath",
271
+ label="Speak Your Input (upload or record)"
272
+ )
273
+ text_input = gr.Textbox(
274
+ label="Or Type Text",
275
+ placeholder="Enter text to translate here..."
276
+ )
277
+
278
+ with gr.Row():
279
+ transcribed_text = gr.Textbox(
280
+ label="Transcribed Text (from audio)",
281
+ interactive=False
282
+ )
283
+
284
+ # Create translation outputs in a grid
285
+ with gr.Row():
286
+ for i in range(3):
287
+ with gr.Column():
288
+ translated_outputs = gr.Textbox(
289
+ label=f"Translation {i+1}",
290
+ interactive=False,
291
+ visible=False
292
+ )
293
+ audio_outputs = gr.Audio(
294
+ label=f"Speech Output {i+1}",
295
+ visible=False
296
+ )
297
+
298
+ # Make outputs visible based on selected languages
299
+ def update_output_visibility(output_langs):
300
+ visibilities = []
301
+ for i in range(3):
302
+ if i < len(output_langs):
303
+ visibilities.extend([True, True]) # Text and Audio both visible
304
+ else:
305
+ visibilities.extend([False, False]) # Both hidden
306
+ return visibilities
307
+
308
+ output_langs.change(
309
+ update_output_visibility,
310
+ inputs=[output_langs],
311
+ outputs=translated_outputs + audio_outputs
312
+ )
313
+
314
+ with gr.Row():
315
+ translate_btn = gr.Button("Translate", variant="primary")
316
+ clear_btn = gr.Button("Clear All")
317
+
318
+ # Handle audio transcription
319
+ audio_input.change(
320
+ process_speech_to_text,
321
+ inputs=[audio_input],
322
+ outputs=[text_input]
323
+ )
324
+
325
+ # Handle translation
326
+ def on_translate(audio, text, input_lang, output_langs, model):
327
+ return perform_translation(audio, text, input_lang, output_langs, model)
328
+
329
+ translate_btn.click(
330
+ on_translate,
331
+ inputs=[audio_input, text_input, input_lang, output_langs, model_selector],
332
+ outputs=[transcribed_text] + translated_outputs + audio_outputs
333
+ )
334
+
335
+ # Handle Enter key in text input
336
+ text_input.submit(
337
+ on_translate,
338
+ inputs=[audio_input, text_input, input_lang, output_langs, model_selector],
339
+ outputs=[transcribed_text] + translated_outputs + audio_outputs
340
+ )
341
+
342
+ # Handle clear button
343
+ clear_btn.click(
344
+ clear_all,
345
+ inputs=[],
346
+ outputs=[transcribed_text] + translated_outputs + audio_outputs
347
+ )
348
+
349
+ # Launch the application
350
+ if __name__ == "__main__":
351
+ demo.launch(share=True)