Spaces:

Yeu3ui
/

z-image-lora-trainer

Runtime error

App Files Files Community

Yeu3ui commited on 3 days ago

Commit

dbb5a8c

verified ·

1 Parent(s): 78a15f3

Create app.py

Browse files

Files changed (1) hide show

app.py +397 -0

app.py ADDED Viewed

	@@ -0,0 +1,397 @@

+import gradio as gr
+import os
+import subprocess
+import shutil
+import json
+import time
+from pathlib import Path
+import torch
+# Setup directories
+DATASET_DIR = Path("./datasets")
+OUTPUT_DIR = Path("./output")
+DATASET_DIR.mkdir(exist_ok=True)
+OUTPUT_DIR.mkdir(exist_ok=True)
+# Global variable to store dataset path
+current_dataset_path = None
+def check_gpu():
+    """Check if GPU is available"""
+    if torch.cuda.is_available():
+        gpu_name = torch.cuda.get_device_name(0)
+        return f"✅ GPU Available: {gpu_name}"
+    return "⚠️ No GPU detected - training will be slow"
+def upload_and_prepare_dataset(files, dataset_name, trigger_word):
+    """Upload images and prepare dataset"""
+    global current_dataset_path
+    if not files:
+        return "❌ Please upload at least one image", None, ""
+    if not dataset_name:
+        dataset_name = f"dataset_{int(time.time())}"
+    # Create dataset directory
+    dataset_path = DATASET_DIR / dataset_name
+    dataset_path.mkdir(exist_ok=True, parents=True)
+    # Save images
+    image_count = 0
+    for file in files:
+        if file.name.lower().endswith(('.png', '.jpg', '.jpeg', '.webp', '.bmp')):
+            filename = Path(file.name).name
+            destination = dataset_path / filename
+            shutil.copy(file.name, destination)
+            # Create simple caption file
+            caption_file = destination.with_suffix('.txt')
+            caption_text = trigger_word if trigger_word else "a photo"
+            with open(caption_file, 'w') as f:
+                f.write(caption_text)
+            image_count += 1
+    if image_count == 0:
+        return "❌ No valid images found. Upload PNG, JPG, JPEG, or WEBP files.", None, ""
+    current_dataset_path = str(dataset_path)
+    status = f"✅ Successfully uploaded {image_count} images\n"
+    status += f"📁 Dataset: {dataset_name}\n"
+    if trigger_word:
+        status += f"🏷️  Trigger word: '{trigger_word}'\n"
+    status += f"💾 Location: {current_dataset_path}"
+    return status, current_dataset_path, f"Dataset ready: {dataset_name}"
+def train_lora(
+    dataset_path,
+    project_name,
+    trigger_word,
+    steps,
+    learning_rate,
+    lora_rank,
+    resolution,
+    progress=gr.Progress()
+):
+    """Train LoRA model"""
+    if not dataset_path or not os.path.exists(dataset_path):
+        return "❌ Please upload a dataset first!", None
+    if not project_name:
+        project_name = f"lora_{int(time.time())}"
+    output_path = OUTPUT_DIR / project_name
+    output_path.mkdir(exist_ok=True, parents=True)
+    # Create training config
+    config = {
+        "job": "extension",
+        "config": {
+            "name": project_name,
+            "process": [{
+                "type": "sd_trainer",
+                "training_folder": str(output_path),
+                "device": "cuda:0",
+                "trigger_word": trigger_word or "",
+                "network": {
+                    "type": "lora",
+                    "linear": int(lora_rank),
+                    "linear_alpha": int(lora_rank),
+                },
+                "save": {
+                    "dtype": "float16",
+                    "save_every": max(100, int(steps / 4)),
+                    "max_step_saves_to_keep": 3,
+                },
+                "datasets": [{
+                    "folder_path": dataset_path,
+                    "caption_ext": "txt",
+                    "caption_dropout_rate": 0.05,
+                    "resolution": [int(resolution), int(resolution)],
+                }],
+                "train": {
+                    "batch_size": 1,
+                    "steps": int(steps),
+                    "gradient_accumulation_steps": 1,
+                    "train_unet": True,
+                    "train_text_encoder": False,
+                    "gradient_checkpointing": True,
+                    "noise_scheduler": "flowmatch",
+                    "optimizer": "adamw8bit",
+                    "lr": float(learning_rate),
+                    "ema_config": {
+                        "use_ema": True,
+                        "ema_decay": 0.99,
+                    },
+                    "dtype": "bf16",
+                },
+                "model": {
+                    "name_or_path": "Tongyi-MAI/Z-Image-Base",
+                    "is_v_pred": False,
+                    "quantize": True,
+                },
+                "sample": {
+                    "sampler": "flowmatch",
+                    "sample_every": max(100, int(steps / 4)),
+                    "width": int(resolution),
+                    "height": int(resolution),
+                    "prompts": [
+                        f"{trigger_word} high quality photo" if trigger_word else "high quality photo",
+                        f"{trigger_word} beautiful scene" if trigger_word else "beautiful scene",
+                    ],
+                    "neg": "",
+                    "seed": 42,
+                    "guidance_scale": 0.0,
+                    "sample_steps": 9,
+                },
+            }]
+        }
+    }
+    # Save config
+    config_path = output_path / "config.json"
+    with open(config_path, 'w') as f:
+        json.dump(config, f, indent=2)
+    progress(0.1, desc="Installing AI Toolkit...")
+    # Install AI Toolkit if not exists
+    if not Path("./ai-toolkit").exists():
+        try:
+            subprocess.run(
+                ["git", "clone", "https://github.com/ostris/ai-toolkit.git"],
+                check=True,
+                capture_output=True
+            )
+            os.chdir("ai-toolkit")
+            subprocess.run(
+                ["git", "submodule", "update", "--init", "--recursive"],
+                check=True,
+                capture_output=True
+            )
+            subprocess.run(
+                ["pip", "install", "-q", "-r", "requirements.txt"],
+                check=True
+            )
+            os.chdir("..")
+        except Exception as e:
+            return f"❌ Failed to install AI Toolkit: {str(e)}", None
+    progress(0.3, desc="Starting training...")
+    # Run training
+    try:
+        result = subprocess.run(
+            ["python", "ai-toolkit/run.py", str(config_path)],
+            capture_output=True,
+            text=True,
+            timeout=3600  # 1 hour timeout
+        )
+        if result.returncode != 0:
+            return f"❌ Training failed:\n{result.stderr}", None
+        progress(0.9, desc="Training complete! Finding LoRA file...")
+        # Find the trained LoRA file
+        lora_files = list(output_path.glob("*.safetensors"))
+        if lora_files:
+            lora_file = lora_files[-1]  # Get the latest one
+            success_msg = f"✅ Training Complete!\n\n"
+            success_msg += f"📦 LoRA saved: {lora_file.name}\n"
+            success_msg += f"💾 Size: {lora_file.stat().st_size / (1024*1024):.2f} MB\n"
+            success_msg += f"🏷️  Use trigger word: '{trigger_word}' in your prompts"
+            return success_msg, str(lora_file)
+        else:
+            return "⚠️ Training completed but no LoRA file found", None
+    except subprocess.TimeoutExpired:
+        return "❌ Training timeout (> 1 hour). Try reducing steps.", None
+    except Exception as e:
+        return f"❌ Training error: {str(e)}", None
+# Gradio Interface
+with gr.Blocks(title="Z-Image LoRA Trainer", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("""
+    # 🎨 Z-Image LoRA Trainer
+    Train custom LoRA models for Z-Image-Base (6B parameter model)
+    **Quick Start:**
+    1. Upload 10-50 images of your subject
+    2. Enter a trigger word (e.g., "mycharacter", "mystyle")
+    3. Click Train
+    4. Download your LoRA when complete
+    ⚠️ **Note:** Training takes 10-30 minutes depending on steps. Don't close this tab!
+    """)
+    # GPU Status
+    gpu_status = gr.Textbox(label="GPU Status", value=check_gpu(), interactive=False)
+    with gr.Tab("📤 Upload Dataset"):
+        with gr.Row():
+            with gr.Column():
+                file_input = gr.Files(
+                    label="Upload Images (10-50 recommended)",
+                    file_types=["image"],
+                    file_count="multiple"
+                )
+                dataset_name_input = gr.Textbox(
+                    label="Dataset Name",
+                    placeholder="my_dataset",
+                    value="my_dataset"
+                )
+                trigger_word_input = gr.Textbox(
+                    label="Trigger Word (optional but recommended)",
+                    placeholder="e.g., mycharacter, mystyle",
+                    info="A unique word to activate your LoRA"
+                )
+                upload_btn = gr.Button("📤 Upload Dataset", variant="primary", size="lg")
+            with gr.Column():
+                upload_status = gr.Textbox(label="Upload Status", lines=8)
+                dataset_path_state = gr.Textbox(label="Dataset Path", visible=False)
+                dataset_ready = gr.Textbox(label="Ready to Train", interactive=False)
+    with gr.Tab("🚀 Train LoRA"):
+        with gr.Row():
+            with gr.Column():
+                project_name_input = gr.Textbox(
+                    label="Project Name",
+                    placeholder="my_lora",
+                    value="my_lora"
+                )
+                gr.Markdown("### Training Settings")
+                steps_input = gr.Slider(
+                    label="Training Steps",
+                    minimum=100,
+                    maximum=3000,
+                    value=1000,
+                    step=100,
+                    info="More steps = better quality but slower. Start with 1000."
+                )
+                learning_rate_input = gr.Slider(
+                    label="Learning Rate",
+                    minimum=0.00001,
+                    maximum=0.001,
+                    value=0.0001,
+                    step=0.00001,
+                    info="Default 0.0001 works well for most cases"
+                )
+                lora_rank_input = gr.Slider(
+                    label="LoRA Rank",
+                    minimum=4,
+                    maximum=128,
+                    value=16,
+                    step=4,
+                    info="Higher = more detail but larger file. 16 is balanced."
+                )
+                resolution_input = gr.Radio(
+                    label="Resolution",
+                    choices=[512, 768, 1024],
+                    value=1024,
+                    info="Z-Image native resolution is 1024x1024"
+                )
+                train_btn = gr.Button("🚀 Start Training", variant="primary", size="lg")
+            with gr.Column():
+                training_status = gr.Textbox(label="Training Status", lines=15)
+                lora_output = gr.File(label="Download Trained LoRA")
+    with gr.Tab("ℹ️ Help"):
+        gr.Markdown("""
+        ## 📚 How to Use
+        ### Step 1: Prepare Your Images
+        - **10-50 images** of your subject (more is better for complex subjects)
+        - **Consistent subject** across images
+        - **Good variety** in poses, angles, lighting
+        - **High quality** photos (clear, well-lit)
+        ### Step 2: Upload Dataset
+        - Choose a descriptive **dataset name**
+        - Add a **trigger word** (e.g., "sks person", "mystyle")
+        - Upload your images
+        ### Step 3: Configure Training
+        - **Project name**: Name for your LoRA
+        - **Steps**:
+          - 500-1000 for simple subjects
+          - 1000-2000 for complex subjects/styles
+        - **Learning rate**: Keep default (0.0001)
+        - **LoRA Rank**: 16 is good for most cases
+        ### Step 4: Train
+        - Click "Start Training"
+        - Wait 10-30 minutes (don't close tab)
+        - Download your LoRA when complete
+        ### Step 5: Use Your LoRA
+        - Load in ComfyUI, Automatic1111, or other Z-Image tools
+        - Use your trigger word in prompts
+        - Example: "a photo of [trigger_word] in a forest"
+        ## 🎯 Tips for Best Results
+        - **Good dataset** = good results
+        - **Consistent subject** across images
+        - **Unique trigger word** (not common words)
+        - **Start with 1000 steps**, adjust if needed
+        - **Don't overtrain** (if quality decreases, reduce steps)
+        ## ⚠️ Troubleshooting
+        **Training fails with OOM error:**
+        - Reduce resolution to 768 or 512
+        - Use fewer steps
+        - Upload fewer images
+        **LoRA doesn't look like subject:**
+        - Upload more images (20-30+)
+        - Increase steps to 1500-2000
+        - Ensure images are consistent
+        **LoRA is too strong/weak:**
+        - Adjust LoRA weight in your inference tool (0.5-1.5)
+        ## 📖 Resources
+        - **Z-Image Model**: [Tongyi-MAI/Z-Image-Base](https://huggingface.co/Tongyi-MAI/Z-Image-Base)
+        - **AI Toolkit**: [github.com/ostris/ai-toolkit](https://github.com/ostris/ai-toolkit)
+        - **Training Adapter**: [ostris/zimage_turbo_training_adapter](https://huggingface.co/ostris/zimage_turbo_training_adapter)
+        """)
+    # Event handlers
+    upload_btn.click(
+        fn=upload_and_prepare_dataset,
+        inputs=[file_input, dataset_name_input, trigger_word_input],
+        outputs=[upload_status, dataset_path_state, dataset_ready]
+    )
+    train_btn.click(
+        fn=train_lora,
+        inputs=[
+            dataset_path_state,
+            project_name_input,
+            trigger_word_input,
+            steps_input,
+            learning_rate_input,
+            lora_rank_input,
+            resolution_input
+        ],
+        outputs=[training_status, lora_output]
+    )
+if __name__ == "__main__":
+    demo.launch()