Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- app.py +75 -0
- requirements.txt +16 -0
- utils.py +93 -0
app.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import os
|
| 3 |
+
from utils import process_pipeline
|
| 4 |
+
|
| 5 |
+
# Define the UI
|
| 6 |
+
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 7 |
+
|
| 8 |
+
# Header
|
| 9 |
+
with gr.Row():
|
| 10 |
+
gr.Markdown(
|
| 11 |
+
"""
|
| 12 |
+
# 🇹🇭 Thai to AI Media Generator
|
| 13 |
+
### แปลงข้อความไทยเป็นภาพและเสียงด้วย AI (Translate Thai -> Image & Audio)
|
| 14 |
+
[Built with anycoder](https://huggingface.co/spaces/akhaliq/anycoder)
|
| 15 |
+
"""
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
with gr.Row():
|
| 19 |
+
with gr.Column(scale=1):
|
| 20 |
+
# Input Section
|
| 21 |
+
input_text = gr.Textbox(
|
| 22 |
+
label="ใส่ข้อความภาษาไทย (Input Thai Text)",
|
| 23 |
+
placeholder="ตัวอย่าง: แมวน่ารักใส่แว่นกันแดดนั่งอยู่บนชายหาด",
|
| 24 |
+
lines=3
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
style_dropdown = gr.Dropdown(
|
| 28 |
+
choices=["None", "Cinematic", "Anime", "3D Model", "Oil Painting", "Pixel Art"],
|
| 29 |
+
value="None",
|
| 30 |
+
label="สไตล์ภาพ (Image Style)"
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
submit_btn = gr.Button("✨ สร้างผลงาน (Generate)", variant="primary", size="lg")
|
| 34 |
+
|
| 35 |
+
gr.Markdown(
|
| 36 |
+
"""
|
| 37 |
+
**หมายเหตุ:**
|
| 38 |
+
- ระบบจะใช้โมเดลฟรีบน Hugging Face
|
| 39 |
+
- การประมวลผลอาจใช้เวลา 10-30 วินาทีขึ้นอยู่กับความหนาแน่นของ Server
|
| 40 |
+
"""
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
with gr.Column(scale=1):
|
| 44 |
+
# Output Section
|
| 45 |
+
with gr.Group():
|
| 46 |
+
gr.Markdown("### 1. ผลลัพธ์การแปล (Translation)")
|
| 47 |
+
output_translation = gr.Textbox(label="English Translation", interactive=False)
|
| 48 |
+
|
| 49 |
+
with gr.Group():
|
| 50 |
+
gr.Markdown("### 2. ภาพที่สร้างได้ (Generated Image)")
|
| 51 |
+
output_image = gr.Image(label="AI Image", type="pil")
|
| 52 |
+
|
| 53 |
+
with gr.Group():
|
| 54 |
+
gr.Markdown("### 3. เสียงบรรยาย (Generated Audio)")
|
| 55 |
+
output_audio = gr.Audio(label="AI Speech", type="filepath")
|
| 56 |
+
|
| 57 |
+
# Logic Connection
|
| 58 |
+
submit_btn.click(
|
| 59 |
+
fn=process_pipeline,
|
| 60 |
+
inputs=[input_text, style_dropdown],
|
| 61 |
+
outputs=[output_translation, output_image, output_audio]
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
# Add examples
|
| 65 |
+
gr.Examples(
|
| 66 |
+
examples=[
|
| 67 |
+
["เด็กผู้หญิงใส่ชุดไทยยืนอยู่หน้าวัดอรุณ", "Cinematic"],
|
| 68 |
+
["หุ่นยนต์แห่งโลกอนาคตกำลังทำอาหารในครัว", "3D Model"],
|
| 69 |
+
["ป่ามหัศจรรย์ที่มีต้นไม้เรืองแสง", "Oil Painting"]
|
| 70 |
+
],
|
| 71 |
+
inputs=[input_text, style_dropdown]
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
+
if __name__ == "__main__":
|
| 75 |
+
demo.launch()
|
requirements.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio
|
| 2 |
+
requests
|
| 3 |
+
Pillow
|
| 4 |
+
numpy
|
| 5 |
+
pandas
|
| 6 |
+
matplotlib
|
| 7 |
+
seaborn
|
| 8 |
+
scikit-learn
|
| 9 |
+
torch
|
| 10 |
+
torchvision
|
| 11 |
+
torchaudio
|
| 12 |
+
openpyxl
|
| 13 |
+
python-docx
|
| 14 |
+
PyPDF2
|
| 15 |
+
tqdm
|
| 16 |
+
joblib
|
utils.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from huggingface_hub import InferenceClient
|
| 3 |
+
import tempfile
|
| 4 |
+
import uuid
|
| 5 |
+
|
| 6 |
+
# Initialize the client
|
| 7 |
+
# We rely on the free tier which works for these specific models without a token locally,
|
| 8 |
+
# but in production/Spaces, it uses the environment's token automatically.
|
| 9 |
+
client = InferenceClient()
|
| 10 |
+
|
| 11 |
+
# Define Models
|
| 12 |
+
TRANSLATION_MODEL = "Helsinki-NLP/opus-mt-th-en"
|
| 13 |
+
IMAGE_MODEL = "stabilityai/stable-diffusion-xl-base-1.0"
|
| 14 |
+
AUDIO_MODEL = "facebook/mms-tts-eng"
|
| 15 |
+
|
| 16 |
+
def translate_text(text):
|
| 17 |
+
"""Translates Thai text to English."""
|
| 18 |
+
try:
|
| 19 |
+
if not text.strip():
|
| 20 |
+
return ""
|
| 21 |
+
|
| 22 |
+
# Using the translation API
|
| 23 |
+
result = client.translation(text, model=TRANSLATION_MODEL)
|
| 24 |
+
# The API usually returns [{'translation_text': '...'}] or similar object
|
| 25 |
+
if hasattr(result, 'translation_text'):
|
| 26 |
+
return result.translation_text
|
| 27 |
+
return result[0]['translation_text']
|
| 28 |
+
except Exception as e:
|
| 29 |
+
print(f"Translation Error: {e}")
|
| 30 |
+
return f"Error translating: {text}"
|
| 31 |
+
|
| 32 |
+
def generate_image(prompt, style):
|
| 33 |
+
"""Generates an image from text."""
|
| 34 |
+
try:
|
| 35 |
+
# Enhance prompt based on style
|
| 36 |
+
enhanced_prompt = prompt
|
| 37 |
+
if style == "Cinematic":
|
| 38 |
+
enhanced_prompt += ", cinematic lighting, highly detailed, photorealistic, 8k"
|
| 39 |
+
elif style == "Anime":
|
| 40 |
+
enhanced_prompt += ", anime style, japanese animation, vibrant colors"
|
| 41 |
+
elif style == "3D Model":
|
| 42 |
+
enhanced_prompt += ", 3d render, blender, unreal engine 5, isometric"
|
| 43 |
+
elif style == "Oil Painting":
|
| 44 |
+
enhanced_prompt += ", oil painting, textured, artistic, van gogh style"
|
| 45 |
+
elif style == "Pixel Art":
|
| 46 |
+
enhanced_prompt += ", pixel art, 16-bit, retro game style"
|
| 47 |
+
|
| 48 |
+
image = client.text_to_image(
|
| 49 |
+
enhanced_prompt,
|
| 50 |
+
model=IMAGE_MODEL
|
| 51 |
+
)
|
| 52 |
+
return image
|
| 53 |
+
except Exception as e:
|
| 54 |
+
print(f"Image Generation Error: {e}")
|
| 55 |
+
return None
|
| 56 |
+
|
| 57 |
+
def generate_audio(text):
|
| 58 |
+
"""Generates audio from English text."""
|
| 59 |
+
try:
|
| 60 |
+
# Generate audio bytes
|
| 61 |
+
audio_bytes = client.text_to_speech(
|
| 62 |
+
text,
|
| 63 |
+
model=AUDIO_MODEL
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
# Save to a temporary file
|
| 67 |
+
temp_dir = tempfile.gettempdir()
|
| 68 |
+
filename = f"{uuid.uuid4()}.flac"
|
| 69 |
+
filepath = os.path.join(temp_dir, filename)
|
| 70 |
+
|
| 71 |
+
with open(filepath, "wb") as f:
|
| 72 |
+
f.write(audio_bytes)
|
| 73 |
+
|
| 74 |
+
return filepath
|
| 75 |
+
except Exception as e:
|
| 76 |
+
print(f"Audio Generation Error: {e}")
|
| 77 |
+
return None
|
| 78 |
+
|
| 79 |
+
def process_pipeline(thai_text, style):
|
| 80 |
+
"""Main function to orchestrate the flow."""
|
| 81 |
+
if not thai_text:
|
| 82 |
+
return "Please enter text.", None, None
|
| 83 |
+
|
| 84 |
+
print(f"Processing: {thai_text}")
|
| 85 |
+
|
| 86 |
+
# Step 1: Translate
|
| 87 |
+
eng_text = translate_text(thai_text)
|
| 88 |
+
|
| 89 |
+
# Step 2 & 3: Generate Image and Audio (can be done in parallel ideally, but sequential here for simplicity)
|
| 90 |
+
image = generate_image(eng_text, style)
|
| 91 |
+
audio_path = generate_audio(eng_text)
|
| 92 |
+
|
| 93 |
+
return eng_text, image, audio_path
|