Spaces:

pius-code
/

corderBackend

Sleeping

pius-code commited on May 17, 2025

Commit

22e08be

verified ·

1 Parent(s): 6ee8cf7

Update main.py

Files changed (1) hide show

main.py CHANGED Viewed

@@ -55,16 +55,29 @@ async def summarize_text(input: TextInput):
 @app.post("/translateFrench")
 async def translate(input: TextInput):
-    # Step 1: Add task prefix to guide the T5 model
     prefixed_text = "translate English to French: " + input.text
     # Step 2: Tokenize the input
-    input_ids = tokenizer(prefixed_text, return_tensors="pt").input_ids
-    # Step 3: Get the input token length properly
-    input_length = input_ids.shape[1]  # shape is (batch_size, sequence_length)
-    # Step 4: Set a reasonable output length (e.g. 20% longer than input)
-    max_length = max(10, int(input_length * 1.2))
-    # Step 5: Generate translated output
-    output = model.generate(input_ids, max_length=max_length, num_beams=4, early_stopping=True)
-    # Step 6: Decode the generated tokens
-    translated_text = tokenizer.decode(output[0], skip_special_tokens=True)
     return {"translated_text": translated_text}

 @app.post("/translateFrench")
 async def translate(input: TextInput):
+    # Step 1: Prefix the task for the model
     prefixed_text = "translate English to French: " + input.text
     # Step 2: Tokenize the input
+    inputs = tokenizer(prefixed_text, return_tensors="pt", truncation=True)
+    # Step 3: Adjust generation parameters
+    input_length = inputs.input_ids.shape[1]
+    max_length = min(512, input_length * 2)  # 2x input length but not more than 512
+    min_length = int(input_length * 1.1)     # at least 10% longer than input
+    # Step 4: Generate translation
+    outputs = model.generate(
+        **inputs,
+        max_length=max_length,
+        min_length=min_length,
+        num_beams=5,
+        length_penalty=1.2,
+        early_stopping=True,
+        no_repeat_ngram_size=2
+    )
+    # Step 5: Decode result
+    translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
     return {"translated_text": translated_text}