Spaces:

pius-code
/

corderBackend

Sleeping

pius-code commited on May 16, 2025

Commit

6ee8cf7

1 Parent(s): ddd056a

refactor summarize_text and translate endpoint for improved clarity and output length management

Files changed (1) hide show

main.py CHANGED Viewed

@@ -49,19 +49,22 @@ async def summarize_text(input: TextInput):
     )
     return {
-        "summary": summary[0]["summary_text"],
-        "parameters_used": {
-            "input_word_count": word_count,
-            "max_length": max_length,
-            "min_length": min_length
-        }
     }
 @app.post("/translateFrench")
 async def translate(input: TextInput):
-    input.text = "translate English to French: " + input.text
-    input_ids = tokenizer(input.text, return_tensors="pt").input_ids
-    output = model.generate(input_ids, max_length=50, num_beams=4, early_stopping=True)
     translated_text = tokenizer.decode(output[0], skip_special_tokens=True)
     return {"translated_text": translated_text}

     )
     return {
+        "summary": summary[0]["summary_text"]
     }
 @app.post("/translateFrench")
 async def translate(input: TextInput):
+    # Step 1: Add task prefix to guide the T5 model
+    prefixed_text = "translate English to French: " + input.text
+    # Step 2: Tokenize the input
+    input_ids = tokenizer(prefixed_text, return_tensors="pt").input_ids
+    # Step 3: Get the input token length properly
+    input_length = input_ids.shape[1]  # shape is (batch_size, sequence_length)
+    # Step 4: Set a reasonable output length (e.g. 20% longer than input)
+    max_length = max(10, int(input_length * 1.2))
+    # Step 5: Generate translated output
+    output = model.generate(input_ids, max_length=max_length, num_beams=4, early_stopping=True)
+    # Step 6: Decode the generated tokens
     translated_text = tokenizer.decode(output[0], skip_special_tokens=True)
     return {"translated_text": translated_text}