Spaces:

codemichaeld
/

new03

Sleeping

App Files Files Community

codemichaeld commited on 22 days ago

Commit

9de10a2

verified ·

1 Parent(s): 4f1244e

Update app.py

Browse files

Files changed (1) hide show

app.py +414 -183

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ from huggingface_hub import HfApi, hf_hub_download
 from safetensors.torch import load_file, save_file
 import torch
 import torch.nn.functional as F
 try:
     from modelscope.hub.file_download import model_file_download as ms_file_download
     from modelscope.hub.api import HubApi as ModelScopeApi
@@ -17,25 +18,44 @@ except ImportError:
     MODELScope_AVAILABLE = False
 def low_rank_decomposition(weight, rank=64):
-    """Standard LoRA decomposition for 2D tensors."""
-    if weight.ndim != 2:
-        return None, None
     try:
-        weight_f32 = weight.float()
-        U, S, Vh = torch.linalg.svd(weight_f32, full_matrices=False)
-        actual_rank = min(rank, len(S))
-        if actual_rank < 4:
-            return None, None
-        # Standard LoRA factorization: W = W_B @ W_A
-        W_A = Vh[:actual_rank, :].contiguous()  # [rank, in_features]
-        W_B = U[:, :actual_rank] @ torch.diag(S[:actual_rank])  # [out_features, rank]
-        return W_A.to(torch.float16), W_B.to(torch.float16)
     except Exception as e:
-        print(f"Decomposition error: {e}")
         return None, None
 def extract_correction_factors(original_weight, fp8_weight):
@@ -72,36 +92,68 @@ def extract_correction_factors(original_weight, fp8_weight):
         else:
             return error.mean().to(original_weight.dtype)
-def analyze_model_architecture(state_dict):
-    """Auto-detect model architecture and components."""
-    keys = " ".join(state_dict.keys()).lower()
-    components = {
-        "text_encoder": False,
-        "unet": False,
-        "vae": False,
-        "clip": False,
-        "transformer": False
     }
-    # Detect components based on key patterns
-    if "text" in keys or "emb" in keys or ("encoder" in keys and "vae" not in keys):
-        components["text_encoder"] = True
-        if "clip" in keys or "vision" in keys:
-            components["clip"] = True
-    if "unet" in keys or ("down_blocks" in keys and "up_blocks" in keys) or ("input_blocks" in keys and "output_blocks" in keys):
-        components["unet"] = True
-        if "transformer" in keys or "attn" in keys:
-            components["transformer"] = True
-    if "vae" in keys or ("encoder" in keys and "decoder" in keys) or "quant_conv" in keys or "post_quant" in keys:
-        components["vae"] = True
-    return components
 def convert_safetensors_to_fp8_with_recovery(safetensors_path, output_dir, fp8_format,
-                                            recovery_configs, progress=gr.Progress()):
-    """Convert model to FP8 with customizable per-element recovery strategies."""
     progress(0.1, desc="Starting FP8 conversion with precision recovery...")
     try:
         def read_safetensors_metadata(path):
@@ -118,10 +170,6 @@ def convert_safetensors_to_fp8_with_recovery(safetensors_path, output_dir, fp8_f
         state_dict = load_file(safetensors_path)
         progress(0.3, desc="Loaded model weights.")
-        # Auto-detect architecture
-        detected_components = analyze_model_architecture(state_dict)
-        print(f"Detected components: {detected_components}")
         # Setup FP8 format
         fp8_dtype = torch.float8_e5m2 if fp8_format == "e5m2" else torch.float8_e4m3fn
@@ -132,27 +180,17 @@ def convert_safetensors_to_fp8_with_recovery(safetensors_path, output_dir, fp8_f
             "total_layers": len(state_dict),
             "processed_layers": 0,
             "skipped_layers": [],
-            "detected_components": detected_components,
-            "recovery_counts": {"lora": 0, "diff": 0}
         }
-        # Create a mapping from layer keys to recovery config
-        layer_recovery_map = {}
-        for config in recovery_configs:
-            element_pattern = config["element"].lower()
-            for key in state_dict:
-                if element_pattern == "all" or element_pattern in key.lower():
-                    # Only set if not already set (first match wins)
-                    if key not in layer_recovery_map:
-                        layer_recovery_map[key] = config
         # Process each tensor
         total = len(state_dict)
         for i, key in enumerate(state_dict):
             progress(0.3 + 0.5 * (i / total), desc=f"Processing {i+1}/{total}: {key.split('.')[-1]}")
             weight = state_dict[key]
-            # Convert to FP8
             if weight.dtype in [torch.float16, torch.float32, torch.bfloat16]:
                 fp8_weight = weight.to(fp8_dtype)
                 sd_fp8[key] = fp8_weight
@@ -161,43 +199,53 @@ def convert_safetensors_to_fp8_with_recovery(safetensors_path, output_dir, fp8_f
                 stats["skipped_layers"].append(f"{key}: non-float dtype")
                 continue
-            # Get recovery config for this layer
-            recovery_config = layer_recovery_map.get(key)
-            if not recovery_config or recovery_config["method"] == "none":
-                stats["skipped_layers"].append(f"{key}: no recovery configured")
-                continue
-            try:
-                method = recovery_config["method"]
-                if method == "lora" and weight.ndim == 2:
-                    # LoRA recovery for 2D tensors only
-                    rank = recovery_config.get("rank", 64)
-                    # Adjust rank for smaller matrices
-                    adjusted_rank = min(rank, min(weight.shape) // 2)
-                    if adjusted_rank >= 4:
-                        A, B = low_rank_decomposition(weight, rank=adjusted_rank)
-                        if A is not None and B is not None:
-                            recovery_weights[f"lora_A.{key}"] = A
-                            recovery_weights[f"lora_B.{key}"] = B
-                            stats["processed_layers"] += 1
-                            stats["recovery_counts"]["lora"] += 1
-                            continue
-                if method == "diff":
-                    # Difference/correction recovery for any tensor type
-                    corr = extract_correction_factors(weight, fp8_weight)
-                    if corr is not None:
-                        recovery_weights[f"diff.{key}"] = corr
-                        stats["processed_layers"] += 1
-                        stats["recovery_counts"]["diff"] += 1
-                        continue
-                # If we get here, recovery was configured but couldn't be applied
-                reason = "2D tensor required" if method == "lora" and weight.ndim != 2 else "decomposition failed"
-                stats["skipped_layers"].append(f"{key}: {method} recovery failed ({reason})")
-            except Exception as e:
-                stats["skipped_layers"].append(f"{key}: error - {str(e)}")
         # Save FP8 model
         base_name = os.path.splitext(os.path.basename(safetensors_path))[0]
@@ -211,7 +259,7 @@ def convert_safetensors_to_fp8_with_recovery(safetensors_path, output_dir, fp8_f
             recovery_metadata = {
                 "format": "pt",
                 "fp8_format": fp8_format,
-                "recovery_config": json.dumps(recovery_configs),
                 "stats": json.dumps(stats)
             }
             save_file(recovery_weights, recovery_path, metadata=recovery_metadata)
@@ -225,6 +273,16 @@ def convert_safetensors_to_fp8_with_recovery(safetensors_path, output_dir, fp8_f
         stats_msg += f"  - LoRA recovery: {stats['recovery_counts']['lora']}\n"
         stats_msg += f"  - Difference recovery: {stats['recovery_counts']['diff']}\n"
         if not recovery_weights:
             stats_msg += "\n⚠️ No recovery weights were generated. All layers use pure FP8."
@@ -232,9 +290,8 @@ def convert_safetensors_to_fp8_with_recovery(safetensors_path, output_dir, fp8_f
         return True, stats_msg, stats, fp8_path, recovery_path
     except Exception as e:
-        import traceback
-        error_msg = f"Error: {str(e)}\n{traceback.format_exc()}"
-        return False, error_msg, None, None, None
 def parse_hf_url(url):
     url = url.strip().rstrip("/")
@@ -292,12 +349,166 @@ def upload_to_target(target_type, new_repo_id, output_dir, fp8_format, hf_token=
     else:
         raise ValueError("Unknown target")
 def process_and_upload_fp8(
     source_type,
     repo_url,
     safetensors_filename,
     fp8_format,
-    recovery_configs_json,
     target_type,
     new_repo_id,
     hf_token,
@@ -312,20 +523,18 @@ def process_and_upload_fp8(
     if target_type == "huggingface" and not hf_token:
         return None, "❌ Hugging Face token required for target.", "", ""
-    # Parse recovery configs
     try:
-        recovery_configs = json.loads(recovery_configs_json)
     except json.JSONDecodeError:
-        return None, "❌ Invalid recovery configuration JSON.", "", ""
-    # Validate config format
     valid_methods = ["none", "lora", "diff"]
-    for config in recovery_configs:
-        if "element" not in config or "method" not in config:
-            return None, "❌ Invalid config format: each config needs 'element' and 'method'", "", ""
-        if config["method"] not in valid_methods:
-            return None, f"❌ Invalid method: {config['method']}. Use 'none', 'lora', or 'diff'", "", ""
-        if config["method"] == "lora" and "rank" not in config:
             return None, "❌ LoRA method requires 'rank' parameter", "", ""
     temp_dir = None
@@ -338,7 +547,7 @@ def process_and_upload_fp8(
         progress(0.2, desc="Converting to FP8 with precision recovery...")
         success, msg, stats, fp8_path, recovery_path = convert_safetensors_to_fp8_with_recovery(
-            safetensors_path, output_dir, fp8_format, recovery_configs, progress
         )
         if not success:
@@ -363,16 +572,16 @@ tags:
 - mixed-method
 - converted-by-gradio
 ---
-# FP8 Model with Mixed Precision Recovery
 - **Source**: `{repo_url}`
 - **Original File**: `{safetensors_filename}`
 - **FP8 Format**: `{fp8_format.upper()}`
 - **FP8 File**: `{fp8_filename}`
 - **Recovery File**: `{recovery_filename if recovery_filename else "None"}`
-## Recovery Configuration
 ```json
-{json.dumps(recovery_configs, indent=2)}
 ```
 ## Usage (Inference)
@@ -392,16 +601,19 @@ for key in fp8_state:
     fp8_weight = fp8_state[key].to(torch.float32)  # Convert to float32 for computation
     # Apply LoRA recovery if available
-    if f"lora_A.{{key}}" in recovery_state and f"lora_B.{{key}}" in recovery_state:
-        A = recovery_state[f"lora_A.{{key}}"].to(torch.float32)
-        B = recovery_state[f"lora_B.{{key}}"].to(torch.float32)
         # Reconstruct the low-rank approximation
         lora_weight = B @ A
         fp8_weight = fp8_weight + lora_weight
     # Apply difference recovery if available
-    if f"diff.{{key}}" in recovery_state:
-        diff = recovery_state[f"diff.{{key}}"].to(torch.float32)
         fp8_weight = fp8_weight + diff
     reconstructed[key] = fp8_weight
@@ -454,18 +666,17 @@ Includes:
                 recovery_details)
     except Exception as e:
-        import traceback
-        error_details = f"❌ Error: {str(e)}\n{traceback.format_exc()}"
-        return None, error_details, "", ""
     finally:
         if temp_dir:
             shutil.rmtree(temp_dir, ignore_errors=True)
         shutil.rmtree(output_dir, ignore_errors=True)
-with gr.Blocks(title="Advanced FP8 Quantizer with Mixed Precision Recovery") as demo:
-    gr.Markdown("# 🔄 Advanced FP8 Quantizer with Per-Layer Precision Recovery")
-    gr.Markdown("Convert `.safetensors` → **FP8** + **customizable precision recovery**. Full control over LoRA and difference methods per layer.")
     with gr.Row():
         with gr.Column():
@@ -476,40 +687,69 @@ with gr.Blocks(title="Advanced FP8 Quantizer with Mixed Precision Recovery") as
             with gr.Accordion("FP8 Settings", open=True):
                 fp8_format = gr.Radio(["e4m3fn", "e5m2"], value="e5m2", label="FP8 Format")
-            with gr.Accordion("Per-Layer Recovery Configuration", open=True):
                 gr.Markdown("""
-                ### Configure recovery strategy for each layer type
-                Format: JSON array of configuration objects:
                 ```json
                 [
-                    {"element": "pattern1", "method": "lora", "rank": 64},
-                    {"element": "pattern2", "method": "diff"},
-                    {"element": "all", "method": "none"}
                 ]
                 ```
-                - `element`: Substring to match in weight keys (case-insensitive). Use "all" for default.
                 - `method`: "none" (pure FP8), "lora" (low-rank adaptation), or "diff" (difference/correction)
-                - `rank`: Required for "lora" method. Higher = better quality but larger file.
-                **Rules are applied in order** - first match wins. Always end with an "all" rule.
                 """)
-                recovery_configs_json = gr.Textbox(
-                    value="""[
-    {"element": "vae", "method": "diff"},
-    {"element": "encoder", "method": "diff"},
-    {"element": "decoder", "method": "diff"},
-    {"element": "text", "method": "lora", "rank": 64},
-    {"element": "emb", "method": "lora", "rank": 64},
-    {"element": "attn", "method": "lora", "rank": 128},
-    {"element": "all", "method": "none"}
-]""",
-                    lines=10,
-                    label="Recovery Configuration (JSON)",
                     interactive=True
                 )
             with gr.Accordion("Authentication", open=False):
                 hf_token = gr.Textbox(label="Hugging Face Token", type="password")
@@ -534,7 +774,7 @@ with gr.Blocks(title="Advanced FP8 Quantizer with Mixed Precision Recovery") as
             repo_url,
             safetensors_filename,
             fp8_format,
-            recovery_configs_json,
             target_type,
             new_repo_id,
             hf_token,
@@ -552,12 +792,7 @@ with gr.Blocks(title="Advanced FP8 Quantizer with Mixed Precision Recovery") as
                 "https://huggingface.co/stabilityai/sdxl-vae",
                 "diffusion_pytorch_model.safetensors",
                 "e4m3fn",
-                """[
-                    {"element": "vae", "method": "diff"},
-                    {"element": "encoder", "method": "diff"},
-                    {"element": "decoder", "method": "diff"},
-                    {"element": "all", "method": "none"}
-                ]""",
                 "huggingface"
             ],
             [
@@ -565,11 +800,7 @@ with gr.Blocks(title="Advanced FP8 Quantizer with Mixed Precision Recovery") as
                 "https://huggingface.co/runwayml/stable-diffusion-v1-5/tree/main/text_encoder",
                 "model.safetensors",
                 "e5m2",
-                """[
-                    {"element": "text", "method": "lora", "rank": 64},
-                    {"element": "emb", "method": "lora", "rank": 64},
-                    {"element": "all", "method": "none"}
-                ]""",
                 "huggingface"
             ],
             [
@@ -577,44 +808,44 @@ with gr.Blocks(title="Advanced FP8 Quantizer with Mixed Precision Recovery") as
                 "https://huggingface.co/Yabo/FramePainter/tree/main",
                 "unet_diffusion_pytorch_model.safetensors",
                 "e5m2",
-                """[
-                    {"element": "attn", "method": "lora", "rank": 128},
-                    {"element": "transformer", "method": "lora", "rank": 96},
-                    {"element": "conv", "method": "diff"},
-                    {"element": "resnet", "method": "diff"},
-                    {"element": "all", "method": "none"}
-                ]""",
                 "huggingface"
             ]
         ],
-        inputs=[source_type, repo_url, safetensors_filename, fp8_format, recovery_configs_json, target_type],
-        label="Example Conversions"
     )
     gr.Markdown("""
-    ## 💡 Precision Recovery Strategy Guide
-    ### **LoRA Method** (best for attention/linear layers)
-    - **Use for**: `text`, `attn`, `transformer`, `emb`, `mlp` layers
-    - **Rank selection**:
-      - Text encoders: 64-128
-      - Attention blocks: 64-128
-      - Other linear layers: 32-64
-    - **Benefits**: Captures weight matrix structure, better for semantic understanding
-    - **Limitations**: Only works on 2D tensors, not suitable for convolutions
-    ### **Difference Method** (best for convolutional layers)
-    - **Use for**: `vae`, `encoder`, `decoder`, `conv`, `resnet` layers
-    - **How it works**: Stores the exact difference between FP8 and original weights
-    - **Benefits**: Works with any tensor shape, more accurate for spatial features
-    - **Limitations**: Larger file size than LoRA for equivalent quality
-    ### **Rule Ordering Tips**
-    - Put specific patterns first (`vae.encoder`), general patterns last (`all`)
-    - Always end with an `{"element": "all", "method": "none"}` rule as fallback
-    - Layer names are **case-insensitive** - use lowercase patterns for matching
-    > **Pro Tip**: For diffusion models, use Difference for VAE/convolutional components and LoRA for text/attention components for optimal quality/size tradeoff.
     """)
 demo.launch()

 from safetensors.torch import load_file, save_file
 import torch
 import torch.nn.functional as F
+import traceback
 try:
     from modelscope.hub.file_download import model_file_download as ms_file_download
     from modelscope.hub.api import HubApi as ModelScopeApi
     MODELScope_AVAILABLE = False
 def low_rank_decomposition(weight, rank=64):
+    """
+    Correct LoRA decomposition supporting 2D and 4D tensors.
+    Returns (lora_A, lora_B) such that weight ≈ lora_B @ lora_A for 2D,
+    or appropriate conv form for 4D.
+    """
+    original_shape = weight.shape
+    original_dtype = weight.dtype
     try:
+        if weight.ndim == 2:
+            actual_rank = min(rank, min(weight.shape) // 2)
+            if actual_rank < 4:
+                return None, None
+            U, S, Vh = torch.linalg.svd(weight.float(), full_matrices=False)
+            S_sqrt = torch.sqrt(S[:actual_rank])
+            # Standard LoRA factorization: W ≈ W_B @ W_A
+            W_A = (Vh[:actual_rank, :] * S_sqrt.unsqueeze(1)).contiguous()  # [rank, in_features]
+            W_B = (U[:, :actual_rank] * S_sqrt.unsqueeze(0)).contiguous()   # [out_features, rank]
+            return W_A.to(original_dtype), W_B.to(original_dtype)
+        elif weight.ndim == 4:
+            out_ch, in_ch, k_h, k_w = weight.shape
+            if k_h * k_w <= 9:  # small conv kernels (e.g., 3x3)
+                # Reshape to 2D: [out_ch, in_ch * k_h * k_w]
+                weight_2d = weight.view(out_ch, -1)
+                actual_rank = min(rank, min(weight_2d.shape) // 2)
+                if actual_rank < 4:
+                    return None, None
+                U, S, Vh = torch.linalg.svd(weight_2d.float(), full_matrices=False)
+                S_sqrt = torch.sqrt(S[:actual_rank])
+                W_A_2d = (Vh[:actual_rank, :] * S_sqrt.unsqueeze(1)).contiguous()
+                W_B_2d = (U[:, :actual_rank] * S_sqrt.unsqueeze(0)).contiguous()
+                # Reshape back to conv format
+                W_A = W_A_2d.view(actual_rank, in_ch, k_h, k_w).contiguous()
+                W_B = W_B_2d.view(out_ch, actual_rank, 1, 1).contiguous()
+                return W_A.to(original_dtype), W_B.to(original_dtype)
+        return None, None
     except Exception as e:
+        print(f"Decomposition error for {original_shape}: {e}")
+        traceback.print_exc()
         return None, None
 def extract_correction_factors(original_weight, fp8_weight):
         else:
             return error.mean().to(original_weight.dtype)
+def get_tensor_info(tensor):
+    """Get detailed tensor information for pattern matching."""
+    shape = list(tensor.shape)
+    dim = tensor.dim()
+    numel = tensor.numel()
+    dtype = str(tensor.dtype)
+    # Determine tensor type based on shape
+    tensor_type = "other"
+    if dim == 4 and shape[2] == shape[3]:  # Convolutional layer with square kernel
+        tensor_type = "conv"
+    elif dim == 2:
+        if shape[0] > shape[1] * 4:  # More likely to be output projection
+            tensor_type = "output_proj"
+        elif shape[1] > shape[0] * 4:  # More likely to be input projection
+            tensor_type = "input_proj"
+        else:
+            tensor_type = "linear"
+    elif dim == 1:
+        tensor_type = "bias"
+    return {
+        "shape": shape,
+        "dim": dim,
+        "numel": numel,
+        "type": tensor_type,
+        "dtype": dtype
     }
+def matches_pattern(key, tensor_info, pattern):
+    """Check if a tensor matches a pattern definition."""
+    key_lower = key.lower()
+    # Match by key name pattern
+    if "key_pattern" in pattern:
+        key_pattern = pattern["key_pattern"].lower()
+        if key_pattern != "all" and key_pattern not in key_lower:
+            return False
+    # Match by tensor dimension
+    if "dim" in pattern and tensor_info["dim"] != pattern["dim"]:
+        return False
+    # Match by tensor type
+    if "type" in pattern and tensor_info["type"] != pattern["type"]:
+        return False
+    # Match by minimum tensor size
+    if "min_size" in pattern and tensor_info["numel"] < pattern["min_size"]:
+        return False
+    # Match by shape constraints
+    if "shape_contains" in pattern:
+        shape_contains = pattern["shape_contains"]
+        if not any(shape_contains == dim for dim in tensor_info["shape"]):
+            return False
+    return True
 def convert_safetensors_to_fp8_with_recovery(safetensors_path, output_dir, fp8_format,
+                                            recovery_rules, progress=gr.Progress()):
+    """Convert model to FP8 with customizable per-tensor recovery strategies."""
     progress(0.1, desc="Starting FP8 conversion with precision recovery...")
     try:
         def read_safetensors_metadata(path):
         state_dict = load_file(safetensors_path)
         progress(0.3, desc="Loaded model weights.")
         # Setup FP8 format
         fp8_dtype = torch.float8_e5m2 if fp8_format == "e5m2" else torch.float8_e4m3fn
             "total_layers": len(state_dict),
             "processed_layers": 0,
             "skipped_layers": [],
+            "recovery_counts": {"lora": 0, "diff": 0},
+            "rule_matches": {i: 0 for i in range(len(recovery_rules))}
         }
         # Process each tensor
         total = len(state_dict)
         for i, key in enumerate(state_dict):
             progress(0.3 + 0.5 * (i / total), desc=f"Processing {i+1}/{total}: {key.split('.')[-1]}")
             weight = state_dict[key]
+            tensor_info = get_tensor_info(weight)
             if weight.dtype in [torch.float16, torch.float32, torch.bfloat16]:
                 fp8_weight = weight.to(fp8_dtype)
                 sd_fp8[key] = fp8_weight
                 stats["skipped_layers"].append(f"{key}: non-float dtype")
                 continue
+            # Find matching rule for this tensor
+            recovery_applied = False
+            matched_rule_index = -1
+            for rule_idx, rule in enumerate(recovery_rules):
+                if matches_pattern(key, tensor_info, rule):
+                    matched_rule_index = rule_idx
+                    recovery_method = rule["method"]
+                    try:
+                        if recovery_method == "lora" and weight.ndim == 2:
+                            # LoRA recovery for 2D tensors only
+                            rank = rule.get("rank", 64)
+                            # Adjust rank for smaller matrices
+                            adjusted_rank = min(rank, min(weight.shape) // 2)
+                            if adjusted_rank >= 4:
+                                A, B = low_rank_decomposition(weight, rank=adjusted_rank)
+                                if A is not None and B is not None:
+                                    recovery_weights[f"lora_A.{key}"] = A
+                                    recovery_weights[f"lora_B.{key}"] = B
+                                    stats["processed_layers"] += 1
+                                    stats["recovery_counts"]["lora"] += 1
+                                    stats["rule_matches"][rule_idx] += 1
+                                    recovery_applied = True
+                                    break
+                        elif recovery_method == "diff":
+                            # Difference/correction recovery for any tensor type
+                            corr = extract_correction_factors(weight, fp8_weight)
+                            if corr is not None:
+                                recovery_weights[f"diff.{key}"] = corr
+                                stats["processed_layers"] += 1
+                                stats["recovery_counts"]["diff"] += 1
+                                stats["rule_matches"][rule_idx] += 1
+                                recovery_applied = True
+                                break
+                        # If method is "none" or recovery failed, continue to next rule
+                        if recovery_method == "none":
+                            break
+                    except Exception as e:
+                        stats["skipped_layers"].append(f"{key}: error with rule {rule_idx} - {str(e)}")
+            if not recovery_applied:
+                reason = "no matching rule" if matched_rule_index == -1 else f"recovery failed with rule {matched_rule_index}"
+                stats["skipped_layers"].append(f"{key}: {reason}")
         # Save FP8 model
         base_name = os.path.splitext(os.path.basename(safetensors_path))[0]
             recovery_metadata = {
                 "format": "pt",
                 "fp8_format": fp8_format,
+                "recovery_rules": json.dumps(recovery_rules),
                 "stats": json.dumps(stats)
             }
             save_file(recovery_weights, recovery_path, metadata=recovery_metadata)
         stats_msg += f"  - LoRA recovery: {stats['recovery_counts']['lora']}\n"
         stats_msg += f"  - Difference recovery: {stats['recovery_counts']['diff']}\n"
+        # Show rule effectiveness
+        stats_msg += "\nRule effectiveness:\n"
+        for rule_idx, rule in enumerate(recovery_rules):
+            matches = stats["rule_matches"][rule_idx]
+            if matches > 0:
+                method = rule["method"]
+                pattern = rule.get("key_pattern", "no pattern")
+                rank_info = f" (rank {rule.get('rank', 'N/A')})" if method == "lora" else ""
+                stats_msg += f"- Rule {rule_idx}: {matches} layers matched pattern '{pattern}' with {method}{rank_info}\n"
         if not recovery_weights:
             stats_msg += "\n⚠️ No recovery weights were generated. All layers use pure FP8."
         return True, stats_msg, stats, fp8_path, recovery_path
     except Exception as e:
+        traceback.print_exc()
+        return False, str(e), None, None, None
 def parse_hf_url(url):
     url = url.strip().rstrip("/")
     else:
         raise ValueError("Unknown target")
+def generate_default_rules(architecture="auto"):
+    """Generate default recovery rules based on architecture."""
+    if architecture == "vae":
+        return """[
+    {
+        "key_pattern": "vae",
+        "dim": 4,
+        "method": "diff"
+    },
+    {
+        "key_pattern": "encoder",
+        "dim": 4,
+        "method": "diff"
+    },
+    {
+        "key_pattern": "decoder",
+        "dim": 4,
+        "method": "diff"
+    },
+    {
+        "key_pattern": "all",
+        "method": "none"
+    }
+]"""
+    elif architecture == "text_encoder":
+        return """[
+    {
+        "key_pattern": "text",
+        "dim": 2,
+        "min_size": 10000,
+        "method": "lora",
+        "rank": 64
+    },
+    {
+        "key_pattern": "emb",
+        "dim": 2,
+        "min_size": 10000,
+        "method": "lora",
+        "rank": 64
+    },
+    {
+        "key_pattern": "attn",
+        "dim": 2,
+        "min_size": 10000,
+        "method": "lora",
+        "rank": 128
+    },
+    {
+        "key_pattern": "all",
+        "method": "none"
+    }
+]"""
+    elif architecture == "unet_transformer":
+        return """[
+    {
+        "key_pattern": "attn",
+        "dim": 2,
+        "min_size": 10000,
+        "method": "lora",
+        "rank": 128
+    },
+    {
+        "key_pattern": "transformer",
+        "dim": 2,
+        "min_size": 10000,
+        "method": "lora",
+        "rank": 96
+    },
+    {
+        "key_pattern": "all",
+        "method": "none"
+    }
+]"""
+    elif architecture == "unet_conv":
+        return """[
+    {
+        "key_pattern": "conv",
+        "dim": 4,
+        "method": "diff"
+    },
+    {
+        "key_pattern": "resnet",
+        "dim": 4,
+        "method": "diff"
+    },
+    {
+        "key_pattern": "down",
+        "dim": 4,
+        "method": "diff"
+    },
+    {
+        "key_pattern": "up",
+        "dim": 4,
+        "method": "diff"
+    },
+    {
+        "key_pattern": "all",
+        "method": "none"
+    }
+]"""
+    else:  # "all" or "auto"
+        return """[
+    {
+        "key_pattern": "vae",
+        "dim": 4,
+        "method": "diff"
+    },
+    {
+        "key_pattern": "encoder",
+        "dim": 4,
+        "method": "diff"
+    },
+    {
+        "key_pattern": "decoder",
+        "dim": 4,
+        "method": "diff"
+    },
+    {
+        "key_pattern": "text",
+        "dim": 2,
+        "min_size": 10000,
+        "method": "lora",
+        "rank": 64
+    },
+    {
+        "key_pattern": "emb",
+        "dim": 2,
+        "min_size": 10000,
+        "method": "lora",
+        "rank": 64
+    },
+    {
+        "key_pattern": "attn",
+        "dim": 2,
+        "min_size": 10000,
+        "method": "lora",
+        "rank": 128
+    },
+    {
+        "key_pattern": "conv",
+        "dim": 4,
+        "method": "diff"
+    },
+    {
+        "key_pattern": "resnet",
+        "dim": 4,
+        "method": "diff"
+    },
+    {
+        "key_pattern": "all",
+        "method": "none"
+    }
+]"""
 def process_and_upload_fp8(
     source_type,
     repo_url,
     safetensors_filename,
     fp8_format,
+    recovery_rules_json,
     target_type,
     new_repo_id,
     hf_token,
     if target_type == "huggingface" and not hf_token:
         return None, "❌ Hugging Face token required for target.", "", ""
+    # Parse recovery rules
     try:
+        recovery_rules = json.loads(recovery_rules_json)
     except json.JSONDecodeError:
+        return None, "❌ Invalid recovery rules JSON.", "", ""
+    # Validate rules
     valid_methods = ["none", "lora", "diff"]
+    for rule in recovery_rules:
+        if "method" not in rule or rule["method"] not in valid_methods:
+            return None, f"❌ Invalid method in rule. Use 'none', 'lora', or 'diff'", "", ""
+        if rule["method"] == "lora" and "rank" not in rule:
             return None, "❌ LoRA method requires 'rank' parameter", "", ""
     temp_dir = None
         progress(0.2, desc="Converting to FP8 with precision recovery...")
         success, msg, stats, fp8_path, recovery_path = convert_safetensors_to_fp8_with_recovery(
+            safetensors_path, output_dir, fp8_format, recovery_rules, progress
         )
         if not success:
 - mixed-method
 - converted-by-gradio
 ---
+# FP8 Model with Per-Tensor Precision Recovery
 - **Source**: `{repo_url}`
 - **Original File**: `{safetensors_filename}`
 - **FP8 Format**: `{fp8_format.upper()}`
 - **FP8 File**: `{fp8_filename}`
 - **Recovery File**: `{recovery_filename if recovery_filename else "None"}`
+## Recovery Rules Used
 ```json
+{json.dumps(recovery_rules, indent=2)}
 ```
 ## Usage (Inference)
     fp8_weight = fp8_state[key].to(torch.float32)  # Convert to float32 for computation
     # Apply LoRA recovery if available
+    lora_a_key = f"lora_A.{{key}}"
+    lora_b_key = f"lora_B.{{key}}"
+    if lora_a_key in recovery_state and lora_b_key in recovery_state:
+        A = recovery_state[lora_a_key].to(torch.float32)
+        B = recovery_state[lora_b_key].to(torch.float32)
         # Reconstruct the low-rank approximation
         lora_weight = B @ A
         fp8_weight = fp8_weight + lora_weight
     # Apply difference recovery if available
+    diff_key = f"diff.{{key}}"
+    if diff_key in recovery_state:
+        diff = recovery_state[diff_key].to(torch.float32)
         fp8_weight = fp8_weight + diff
     reconstructed[key] = fp8_weight
                 recovery_details)
     except Exception as e:
+        traceback.print_exc()
+        return None, f"❌ Error: {str(e)}", "", ""
     finally:
         if temp_dir:
             shutil.rmtree(temp_dir, ignore_errors=True)
         shutil.rmtree(output_dir, ignore_errors=True)
+with gr.Blocks(title="Advanced FP8 Quantizer with Per-Tensor Precision Recovery") as demo:
+    gr.Markdown("# 🔄 Advanced FP8 Quantizer with Per-Tensor Precision Recovery")
+    gr.Markdown("Convert `.safetensors` → **FP8** + **customizable precision recovery**. Full control over LoRA and difference methods per tensor pattern.")
     with gr.Row():
         with gr.Column():
             with gr.Accordion("FP8 Settings", open=True):
                 fp8_format = gr.Radio(["e4m3fn", "e5m2"], value="e5m2", label="FP8 Format")
+            with gr.Accordion("Per-Tensor Recovery Rules", open=True):
                 gr.Markdown("""
+                ### Configure recovery strategy for each tensor pattern
+                Format: JSON array of rule objects:
                 ```json
                 [
+                    {
+                        "key_pattern": "vae",
+                        "dim": 4,
+                        "method": "diff"
+                    },
+                    {
+                        "key_pattern": "attn",
+                        "dim": 2,
+                        "min_size": 10000,
+                        "method": "lora",
+                        "rank": 64
+                    },
+                    {
+                        "key_pattern": "all",
+                        "method": "none"
+                    }
                 ]
                 ```
+                ### Rule Fields (all optional except "method"):
+                - `key_pattern`: Substring to match in weight keys (case-insensitive). Use "all" to match everything.
+                - `dim`: Tensor dimension (e.g., 2 for linear layers, 4 for convolutions)
+                - `type`: Tensor type ("conv", "linear", "bias", "input_proj", "output_proj")
+                - `min_size`: Minimum number of elements in tensor
+                - `shape_contains`: Specific dimension size that must be present in shape
                 - `method`: "none" (pure FP8), "lora" (low-rank adaptation), or "diff" (difference/correction)
+                - `rank`: Required for "lora" method (higher = better quality but larger file)
+                **Rules are applied in order** - first match wins. Always end with a catch-all rule.
                 """)
+                recovery_rules_json = gr.Textbox(
+                    value=generate_default_rules("all"),
+                    lines=15,
+                    label="Recovery Rules (JSON)",
                     interactive=True
                 )
+                architecture_preset = gr.Dropdown(
+                    choices=[
+                        ("Auto-detect architecture", "auto"),
+                        ("VAE (Difference method)", "vae"),
+                        ("Text Encoder (LoRA)", "text_encoder"),
+                        ("UNet Transformers (LoRA)", "unet_transformer"),
+                        ("UNet Convolutions (Difference)", "unet_conv"),
+                        ("All Components (Mixed)", "all")
+                    ],
+                    value="auto",
+                    label="Architecture Preset"
+                )
+                architecture_preset.change(
+                    fn=generate_default_rules,
+                    inputs=architecture_preset,
+                    outputs=recovery_rules_json
+                )
             with gr.Accordion("Authentication", open=False):
                 hf_token = gr.Textbox(label="Hugging Face Token", type="password")
             repo_url,
             safetensors_filename,
             fp8_format,
+            recovery_rules_json,
             target_type,
             new_repo_id,
             hf_token,
                 "https://huggingface.co/stabilityai/sdxl-vae",
                 "diffusion_pytorch_model.safetensors",
                 "e4m3fn",
+                generate_default_rules("vae"),
                 "huggingface"
             ],
             [
                 "https://huggingface.co/runwayml/stable-diffusion-v1-5/tree/main/text_encoder",
                 "model.safetensors",
                 "e5m2",
+                generate_default_rules("text_encoder"),
                 "huggingface"
             ],
             [
                 "https://huggingface.co/Yabo/FramePainter/tree/main",
                 "unet_diffusion_pytorch_model.safetensors",
                 "e5m2",
+                generate_default_rules("unet_transformer"),
                 "huggingface"
             ]
         ],
+        inputs=[source_type, repo_url, safetensors_filename, fp8_format, recovery_rules_json, target_type],
+        label="Example Conversions",
+        cache_examples=False
     )
     gr.Markdown("""
+    ## 💡 Tensor Pattern Matching Guide
+    This tool uses **advanced tensor pattern matching** to determine which recovery method to apply to each layer:
+    ### **Key Patterns**
+    - Match by substring in weight key name
+    - Case-insensitive matching
+    - Special keyword "all" matches everything
+    ### **Tensor Properties**
+    - **Dimension (dim)**: Use `dim: 2` for linear layers, `dim: 4` for convolutions
+    - **Type**: Automatic classification based on shape:
+      - `conv`: 4D tensors with equal spatial dimensions
+      - `linear`: 2D tensors without extreme aspect ratio
+      - `input_proj`: 2D tensors with much larger second dimension
+      - `output_proj`: 2D tensors with much larger first dimension
+      - `bias`: 1D tensors
+    ### **Size Constraints**
+    - **min_size**: Only apply to tensors with at least N elements
+    - **shape_contains**: Match tensors containing a specific dimension size
+    ### **Rule Processing**
+    - Rules are evaluated **in order**
+    - First matching rule wins
+    - Always include a catch-all rule at the end
+    > **Pro Tip for VAE**: Use `"dim": 4` combined with `"key_pattern": "vae"` to reliably target VAE convolutional layers with difference recovery.
     """)
 demo.launch()