Spaces:

codemichaeld
/

new03

Running

App Files Files Community

codemichaeld commited on 12 days ago

Commit

4f1244e

verified ·

1 Parent(s): 311ef01

Update app.py

Browse files

Files changed (1) hide show

app.py +295 -153

app.py CHANGED Viewed

@@ -17,48 +17,92 @@ except ImportError:
     MODELScope_AVAILABLE = False
 def low_rank_decomposition(weight, rank=64):
-    """Standard LoRA decomposition for 2D tensors only."""
     if weight.ndim != 2:
         return None, None
     try:
-        U, S, Vh = torch.linalg.svd(weight.float(), full_matrices=False)
-        U = U[:, :rank] @ torch.diag(torch.sqrt(S[:rank]))
-        Vh = torch.diag(torch.sqrt(S[:rank])) @ Vh[:rank, :]
-        return U.contiguous(), Vh.contiguous()
-    except Exception:
         return None, None
 def extract_correction_factors(original_weight, fp8_weight):
     """Extract per-channel/tensor correction factors (difference method)."""
     with torch.no_grad():
         orig = original_weight.float()
         quant = fp8_weight.float()
         error = orig - quant
         error_norm = torch.norm(error)
         orig_norm = torch.norm(orig)
         if orig_norm > 1e-6 and error_norm / orig_norm < 0.01:
             return None
-        # For 4D tensors (VAE/conv layers)
         if orig.ndim == 4:
             channel_dim = 0
             channel_mean = error.mean(dim=tuple(i for i in range(1, orig.ndim)), keepdim=True)
             return channel_mean.to(original_weight.dtype)
         # For 2D tensors (linear layers)
         elif orig.ndim == 2:
             row_mean = error.mean(dim=1, keepdim=True)
             return row_mean.to(original_weight.dtype)
-        # For 1D tensors (bias, etc.)
         else:
             return error.mean().to(original_weight.dtype)
-def convert_safetensors_to_fp8_with_recovery(safetensors_path, output_dir, fp8_format, recovery_config, progress=gr.Progress()):
-    progress(0.1, desc="Starting FP8 conversion with precision recovery...")
     try:
         def read_safetensors_metadata(path):
             with open(path, 'rb') as f:
@@ -69,122 +113,128 @@ def convert_safetensors_to_fp8_with_recovery(safetensors_path, output_dir, fp8_f
         metadata = read_safetensors_metadata(safetensors_path)
         progress(0.2, desc="Loaded metadata.")
         state_dict = load_file(safetensors_path)
-        progress(0.4, desc="Loaded weights.")
-        if fp8_format == "e5m2":
-            fp8_dtype = torch.float8_e5m2
-        else:
-            fp8_dtype = torch.float8_e4m3fn
         sd_fp8 = {}
         recovery_weights = {}
         stats = {
             "total_layers": len(state_dict),
             "processed_layers": 0,
             "skipped_layers": [],
-            "recovery_type_counts": {"lora": 0, "diff": 0}
         }
         total = len(state_dict)
         for i, key in enumerate(state_dict):
-            progress(0.4 + 0.4 * (i / total), desc=f"Processing {i+1}/{total}...")
             weight = state_dict[key]
-            lower_key = key.lower()
             if weight.dtype in [torch.float16, torch.float32, torch.bfloat16]:
                 fp8_weight = weight.to(fp8_dtype)
                 sd_fp8[key] = fp8_weight
-                # Match key against recovery config rules
-                recovery_method = "none"
-                lora_rank = 64
-                for rule in recovery_config:
-                    element_pattern = rule.get("element", "").lower()
-                    method = rule.get("method", "none")
-                    if element_pattern == "all" or element_pattern in lower_key:
-                        recovery_method = method
-                        if method == "lora":
-                            lora_rank = rule.get("rank", 64)
-                        break
-                if recovery_method == "lora" and weight.ndim == 2 and min(weight.shape) > lora_rank:
-                    try:
-                        U, V = low_rank_decomposition(weight, rank=lora_rank)
-                        if U is not None and V is not None:
-                            recovery_weights[f"lora_A.{key}"] = U.to(torch.float16)
-                            recovery_weights[f"lora_B.{key}"] = V.to(torch.float16)
-                            stats["processed_layers"] += 1
-                            stats["recovery_type_counts"]["lora"] += 1
-                    except Exception:
-                        stats["skipped_layers"].append(f"{key}: lora failed")
-                elif recovery_method == "diff":
-                    try:
-                        corr = extract_correction_factors(weight, fp8_weight)
-                        if corr is not None:
-                            recovery_weights[f"diff.{key}"] = corr
-                            stats["processed_layers"] += 1
-                            stats["recovery_type_counts"]["diff"] += 1
-                    except Exception:
-                        stats["skipped_layers"].append(f"{key}: diff failed")
-                else:
-                    stats["skipped_layers"].append(f"{key}: {recovery_method}")
             else:
                 sd_fp8[key] = weight
                 stats["skipped_layers"].append(f"{key}: non-float dtype")
         base_name = os.path.splitext(os.path.basename(safetensors_path))[0]
         fp8_path = os.path.join(output_dir, f"{base_name}-fp8-{fp8_format}.safetensors")
-        recovery_path = os.path.join(output_dir, f"{base_name}-recovery.safetensors")
         save_file(sd_fp8, fp8_path, metadata={"format": "pt", "fp8_format": fp8_format, **metadata})
         if recovery_weights:
-            save_file(recovery_weights, recovery_path, metadata={
                 "format": "pt",
                 "fp8_format": fp8_format,
-                "recovery_config": json.dumps(recovery_config),
                 "stats": json.dumps(stats)
-            })
         progress(0.9, desc="Saved FP8 and recovery files.")
-        progress(1.0, desc="✅ FP8 + recovery extraction complete!")
-        stats_msg = f"FP8 ({fp8_format}) and recovery saved.\n"
         stats_msg += f"- Total layers: {stats['total_layers']}\n"
-        stats_msg += f"- Processed: {stats['processed_layers']} ({stats['recovery_type_counts']['lora']} LoRA + {stats['recovery_type_counts']['diff']} Diff)\n"
-        if stats["processed_layers"] == 0:
-            stats_msg += "\n⚠️ No recovery weights generated. Check your rules and rank settings."
-        return True, stats_msg, stats
     except Exception as e:
-        return False, str(e), None
-def generate_config_from_rules(rules_input):
-    """Parse multi-line rule input into config."""
-    config = []
-    for line in rules_input.strip().split('\n'):
-        line = line.strip()
-        if not line or line.startswith('#'):
-            continue
-        parts = [p.strip() for p in line.split(',')]
-        if len(parts) >= 2:
-            element = parts[0]
-            method = parts[1].lower()
-            rank = 64
-            if method == "lora" and len(parts) >= 3:
-                try:
-                    rank = int(parts[2])
-                except ValueError:
-                    pass
-            config.append({"element": element, "method": method, "rank": rank})
-    return config
 def parse_hf_url(url):
     url = url.strip().rstrip("/")
@@ -247,7 +297,7 @@ def process_and_upload_fp8(
     repo_url,
     safetensors_filename,
     fp8_format,
-    recovery_rules,
     target_type,
     new_repo_id,
     hf_token,
@@ -256,15 +306,27 @@ def process_and_upload_fp8(
     progress=gr.Progress()
 ):
     if not re.match(r"^[a-zA-Z0-9._-]+/[a-zA-Z0-9._-]+$", new_repo_id):
-        return None, "❌ Invalid repo ID format. Use 'username/model-name'.", ""
     if source_type == "huggingface" and not hf_token:
-        return None, "❌ Hugging Face token required for source.", ""
     if target_type == "huggingface" and not hf_token:
-        return None, "❌ Hugging Face token required for target.", ""
-    recovery_config = generate_config_from_rules(recovery_rules)
-    if not recovery_config:
-        recovery_config = [{"element": "all", "method": "none"}]
     temp_dir = None
     output_dir = tempfile.mkdtemp()
@@ -274,22 +336,23 @@ def process_and_upload_fp8(
             source_type, repo_url, safetensors_filename, hf_token, progress
         )
-        progress(0.25, desc="Converting to FP8 with recovery...")
-        success, msg, stats = convert_safetensors_to_fp8_with_recovery(
-            safetensors_path, output_dir, fp8_format, recovery_config, progress
         )
         if not success:
-            return None, f"❌ Conversion failed: {msg}", ""
         progress(0.9, desc="Uploading...")
         repo_url_final = upload_to_target(
             target_type, new_repo_id, output_dir, fp8_format, hf_token, modelscope_token, private_repo
         )
         base_name = os.path.splitext(safetensors_filename)[0]
-        fp8_filename = f"{base_name}-fp8-{fp8_format}.safetensors"
-        recovery_filename = f"{base_name}-recovery.safetensors"
         readme = f"""---
 library_name: diffusers
@@ -300,44 +363,61 @@ tags:
 - mixed-method
 - converted-by-gradio
 ---
-# FP8 Model with Custom Precision Recovery
 - **Source**: `{repo_url}`
-- **File**: `{safetensors_filename}`
 - **FP8 Format**: `{fp8_format.upper()}`
-- **Recovery File**: `{recovery_filename}` (contains both LoRA and Difference weights)
-## Recovery Rules Used
-```
-{recovery_rules}
 ```
-## Usage
 ```python
 from safetensors.torch import load_file
 import torch
 fp8_state = load_file("{fp8_filename}")
-recovery_state = load_file("{recovery_filename}")
 reconstructed = {{}}
 for key in fp8_state:
-    fp8_weight = fp8_state[key].to(torch.float32)
-    # Apply LoRA if present
     if f"lora_A.{{key}}" in recovery_state and f"lora_B.{{key}}" in recovery_state:
         A = recovery_state[f"lora_A.{{key}}"].to(torch.float32)
         B = recovery_state[f"lora_B.{{key}}"].to(torch.float32)
         lora_weight = B @ A
         fp8_weight = fp8_weight + lora_weight
-    # Apply Difference if present
     if f"diff.{{key}}" in recovery_state:
         diff = recovery_state[f"diff.{{key}}"].to(torch.float32)
         fp8_weight = fp8_weight + diff
     reconstructed[key] = fp8_weight
 ```
 """
         with open(os.path.join(output_dir, "README.md"), "w") as f:
@@ -353,24 +433,39 @@ for key in fp8_state:
             )
         progress(1.0, desc="✅ Done!")
         result_html = f"""
 ✅ Success!
 Model uploaded to: <a href="{repo_url_final}" target="_blank">{new_repo_id}</a>
-Includes FP8 + custom recovery weights.
 """
-        return gr.HTML(result_html), "✅ FP8 + recovery upload successful!", msg
     except Exception as e:
-        return None, f"❌ Error: {str(e)}", ""
     finally:
         if temp_dir:
             shutil.rmtree(temp_dir, ignore_errors=True)
         shutil.rmtree(output_dir, ignore_errors=True)
-with gr.Blocks(title="FP8 + Custom Recovery Extractor") as demo:
-    gr.Markdown("# 🔄 FP8 Quantizer with Per-Layer Recovery Control")
-    gr.Markdown("Specify **exact recovery method per layer/tensor** using pattern matching. Supports LoRA and Difference methods simultaneously.")
     with gr.Row():
         with gr.Column():
@@ -381,21 +476,39 @@ with gr.Blocks(title="FP8 + Custom Recovery Extractor") as demo:
             with gr.Accordion("FP8 Settings", open=True):
                 fp8_format = gr.Radio(["e4m3fn", "e5m2"], value="e5m2", label="FP8 Format")
-            with gr.Accordion("Recovery Rules (Layer/Tensor Level)", open=True):
                 gr.Markdown("""
-                Define recovery rules **one per line** in format:
-                `layer_pattern, method [, rank]`
-                - `layer_pattern`: substring to match in weight key (case-insensitive)
-                - `method`: `lora` or `diff` or `none`
-                - `rank`: LoRA rank (only for `lora` method)
-                **Rules are applied in order** – first match wins.
                 """)
-                recovery_rules = gr.Textbox(
-                    value="vae, diff\nencoder, diff\ndecoder, diff\ntext, lora, 64\nattn, lora, 128\nall, none",
-                    lines=8,
-                    label="Recovery Rules"
                 )
             with gr.Accordion("Authentication", open=False):
@@ -409,6 +522,7 @@ with gr.Blocks(title="FP8 + Custom Recovery Extractor") as demo:
             status_output = gr.Markdown()
             detailed_log = gr.Textbox(label="Processing Log", interactive=False, lines=10)
     convert_btn = gr.Button("🚀 Convert & Upload", variant="primary")
     repo_link_output = gr.HTML()
@@ -420,59 +534,87 @@ with gr.Blocks(title="FP8 + Custom Recovery Extractor") as demo:
             repo_url,
             safetensors_filename,
             fp8_format,
-            recovery_rules,
             target_type,
             new_repo_id,
             hf_token,
             modelscope_token,
             private_repo
         ],
-        outputs=[repo_link_output, status_output, detailed_log],
         show_progress=True
     )
     gr.Examples(
         examples=[
             [
-                "huggingface",
-                "https://huggingface.co/stabilityai/sdxl-vae",
-                "diffusion_pytorch_model.safetensors",
                 "e5m2",
-                "vae, diff\nencoder, diff\ndecoder, diff\nall, none",
                 "huggingface"
             ],
             [
                 "huggingface",
-                "https://huggingface.co/runwayml/stable-diffusion-v1-5/tree/main/text_encoder",
-                "model.safetensors",
                 "e5m2",
-                "text, lora, 64\nemb, lora, 64\nall, none",
                 "huggingface"
             ]
         ],
-        inputs=[source_type, repo_url, safetensors_filename, fp8_format, recovery_rules, target_type],
         label="Example Conversions"
     )
     gr.Markdown("""
-    ## 💡 Recovery Strategy Guide
-    ### **Difference Method (Recommended for VAE/Convs)**
-    - Use for: `vae`, `encoder`, `decoder`, `conv` layers
-    - Captures exact quantization error
-    - Works with 4D tensors that LoRA cannot handle
-    ### **LoRA Method (Recommended for Attention/Linear)**
-    - Use for: `text`, `attn`, `mlp`, `transformer` layers
-    - Use rank 32-128 depending on layer importance
-    - Only works on 2D tensors
     ### **Rule Ordering Tips**
-    - Put specific patterns first (`vae.encoder`) before general ones (`vae`)
-    - End with `all, none` to set default behavior
-    - Layer names are **case-insensitive**
-    > This implementation restores the successful VAE difference method while adding full per-layer control.
     """)
 demo.launch()

     MODELScope_AVAILABLE = False
 def low_rank_decomposition(weight, rank=64):
+    """Standard LoRA decomposition for 2D tensors."""
     if weight.ndim != 2:
         return None, None
     try:
+        weight_f32 = weight.float()
+        U, S, Vh = torch.linalg.svd(weight_f32, full_matrices=False)
+        actual_rank = min(rank, len(S))
+        if actual_rank < 4:
+            return None, None
+        # Standard LoRA factorization: W = W_B @ W_A
+        W_A = Vh[:actual_rank, :].contiguous()  # [rank, in_features]
+        W_B = U[:, :actual_rank] @ torch.diag(S[:actual_rank])  # [out_features, rank]
+        return W_A.to(torch.float16), W_B.to(torch.float16)
+    except Exception as e:
+        print(f"Decomposition error: {e}")
         return None, None
 def extract_correction_factors(original_weight, fp8_weight):
     """Extract per-channel/tensor correction factors (difference method)."""
     with torch.no_grad():
+        # Convert to float32 for precision
         orig = original_weight.float()
         quant = fp8_weight.float()
+        # Compute error (what needs to be added to FP8 to recover original)
         error = orig - quant
+        # Skip if error is negligible
         error_norm = torch.norm(error)
         orig_norm = torch.norm(orig)
         if orig_norm > 1e-6 and error_norm / orig_norm < 0.01:
             return None
+        # For 4D tensors (common in VAE, CNNs)
         if orig.ndim == 4:
+            # Channel dimension is typically dimension 0 (output channels)
             channel_dim = 0
+            # Compute mean error per output channel
             channel_mean = error.mean(dim=tuple(i for i in range(1, orig.ndim)), keepdim=True)
             return channel_mean.to(original_weight.dtype)
         # For 2D tensors (linear layers)
         elif orig.ndim == 2:
+            # Compute mean error per output row
             row_mean = error.mean(dim=1, keepdim=True)
             return row_mean.to(original_weight.dtype)
+        # For 1D tensors (bias, batchnorm)
         else:
             return error.mean().to(original_weight.dtype)
+def analyze_model_architecture(state_dict):
+    """Auto-detect model architecture and components."""
+    keys = " ".join(state_dict.keys()).lower()
+    components = {
+        "text_encoder": False,
+        "unet": False,
+        "vae": False,
+        "clip": False,
+        "transformer": False
+    }
+    # Detect components based on key patterns
+    if "text" in keys or "emb" in keys or ("encoder" in keys and "vae" not in keys):
+        components["text_encoder"] = True
+        if "clip" in keys or "vision" in keys:
+            components["clip"] = True
+    if "unet" in keys or ("down_blocks" in keys and "up_blocks" in keys) or ("input_blocks" in keys and "output_blocks" in keys):
+        components["unet"] = True
+        if "transformer" in keys or "attn" in keys:
+            components["transformer"] = True
+    if "vae" in keys or ("encoder" in keys and "decoder" in keys) or "quant_conv" in keys or "post_quant" in keys:
+        components["vae"] = True
+    return components
+def convert_safetensors_to_fp8_with_recovery(safetensors_path, output_dir, fp8_format,
+                                            recovery_configs, progress=gr.Progress()):
+    """Convert model to FP8 with customizable per-element recovery strategies."""
+    progress(0.1, desc="Starting FP8 conversion with precision recovery...")
     try:
         def read_safetensors_metadata(path):
             with open(path, 'rb') as f:
         metadata = read_safetensors_metadata(safetensors_path)
         progress(0.2, desc="Loaded metadata.")
+        # Load model
         state_dict = load_file(safetensors_path)
+        progress(0.3, desc="Loaded model weights.")
+        # Auto-detect architecture
+        detected_components = analyze_model_architecture(state_dict)
+        print(f"Detected components: {detected_components}")
+        # Setup FP8 format
+        fp8_dtype = torch.float8_e5m2 if fp8_format == "e5m2" else torch.float8_e4m3fn
+        # Initialize outputs
         sd_fp8 = {}
         recovery_weights = {}
         stats = {
             "total_layers": len(state_dict),
             "processed_layers": 0,
             "skipped_layers": [],
+            "detected_components": detected_components,
+            "recovery_counts": {"lora": 0, "diff": 0}
         }
+        # Create a mapping from layer keys to recovery config
+        layer_recovery_map = {}
+        for config in recovery_configs:
+            element_pattern = config["element"].lower()
+            for key in state_dict:
+                if element_pattern == "all" or element_pattern in key.lower():
+                    # Only set if not already set (first match wins)
+                    if key not in layer_recovery_map:
+                        layer_recovery_map[key] = config
+        # Process each tensor
         total = len(state_dict)
         for i, key in enumerate(state_dict):
+            progress(0.3 + 0.5 * (i / total), desc=f"Processing {i+1}/{total}: {key.split('.')[-1]}")
             weight = state_dict[key]
+            # Convert to FP8
             if weight.dtype in [torch.float16, torch.float32, torch.bfloat16]:
                 fp8_weight = weight.to(fp8_dtype)
                 sd_fp8[key] = fp8_weight
             else:
                 sd_fp8[key] = weight
                 stats["skipped_layers"].append(f"{key}: non-float dtype")
+                continue
+            # Get recovery config for this layer
+            recovery_config = layer_recovery_map.get(key)
+            if not recovery_config or recovery_config["method"] == "none":
+                stats["skipped_layers"].append(f"{key}: no recovery configured")
+                continue
+            try:
+                method = recovery_config["method"]
+                if method == "lora" and weight.ndim == 2:
+                    # LoRA recovery for 2D tensors only
+                    rank = recovery_config.get("rank", 64)
+                    # Adjust rank for smaller matrices
+                    adjusted_rank = min(rank, min(weight.shape) // 2)
+                    if adjusted_rank >= 4:
+                        A, B = low_rank_decomposition(weight, rank=adjusted_rank)
+                        if A is not None and B is not None:
+                            recovery_weights[f"lora_A.{key}"] = A
+                            recovery_weights[f"lora_B.{key}"] = B
+                            stats["processed_layers"] += 1
+                            stats["recovery_counts"]["lora"] += 1
+                            continue
+                if method == "diff":
+                    # Difference/correction recovery for any tensor type
+                    corr = extract_correction_factors(weight, fp8_weight)
+                    if corr is not None:
+                        recovery_weights[f"diff.{key}"] = corr
+                        stats["processed_layers"] += 1
+                        stats["recovery_counts"]["diff"] += 1
+                        continue
+                # If we get here, recovery was configured but couldn't be applied
+                reason = "2D tensor required" if method == "lora" and weight.ndim != 2 else "decomposition failed"
+                stats["skipped_layers"].append(f"{key}: {method} recovery failed ({reason})")
+            except Exception as e:
+                stats["skipped_layers"].append(f"{key}: error - {str(e)}")
+        # Save FP8 model
         base_name = os.path.splitext(os.path.basename(safetensors_path))[0]
         fp8_path = os.path.join(output_dir, f"{base_name}-fp8-{fp8_format}.safetensors")
         save_file(sd_fp8, fp8_path, metadata={"format": "pt", "fp8_format": fp8_format, **metadata})
+        # Save recovery weights if any were generated
+        recovery_path = None
         if recovery_weights:
+            recovery_path = os.path.join(output_dir, f"{base_name}-recovery.safetensors")
+            recovery_metadata = {
                 "format": "pt",
                 "fp8_format": fp8_format,
+                "recovery_config": json.dumps(recovery_configs),
                 "stats": json.dumps(stats)
+            }
+            save_file(recovery_weights, recovery_path, metadata=recovery_metadata)
         progress(0.9, desc="Saved FP8 and recovery files.")
+        # Generate stats message
+        stats_msg = f"FP8 ({fp8_format}) conversion complete with precision recovery:\n"
         stats_msg += f"- Total layers: {stats['total_layers']}\n"
+        stats_msg += f"- Layers with recovery: {stats['processed_layers']}\n"
+        stats_msg += f"  - LoRA recovery: {stats['recovery_counts']['lora']}\n"
+        stats_msg += f"  - Difference recovery: {stats['recovery_counts']['diff']}\n"
+        if not recovery_weights:
+            stats_msg += "\n⚠️ No recovery weights were generated. All layers use pure FP8."
+        progress(1.0, desc="✅ FP8 conversion with precision recovery complete!")
+        return True, stats_msg, stats, fp8_path, recovery_path
     except Exception as e:
+        import traceback
+        error_msg = f"Error: {str(e)}\n{traceback.format_exc()}"
+        return False, error_msg, None, None, None
 def parse_hf_url(url):
     url = url.strip().rstrip("/")
     repo_url,
     safetensors_filename,
     fp8_format,
+    recovery_configs_json,
     target_type,
     new_repo_id,
     hf_token,
     progress=gr.Progress()
 ):
     if not re.match(r"^[a-zA-Z0-9._-]+/[a-zA-Z0-9._-]+$", new_repo_id):
+        return None, "❌ Invalid repo ID format. Use 'username/model-name'.", "", ""
     if source_type == "huggingface" and not hf_token:
+        return None, "❌ Hugging Face token required for source.", "", ""
     if target_type == "huggingface" and not hf_token:
+        return None, "❌ Hugging Face token required for target.", "", ""
+    # Parse recovery configs
+    try:
+        recovery_configs = json.loads(recovery_configs_json)
+    except json.JSONDecodeError:
+        return None, "❌ Invalid recovery configuration JSON.", "", ""
+    # Validate config format
+    valid_methods = ["none", "lora", "diff"]
+    for config in recovery_configs:
+        if "element" not in config or "method" not in config:
+            return None, "❌ Invalid config format: each config needs 'element' and 'method'", "", ""
+        if config["method"] not in valid_methods:
+            return None, f"❌ Invalid method: {config['method']}. Use 'none', 'lora', or 'diff'", "", ""
+        if config["method"] == "lora" and "rank" not in config:
+            return None, "❌ LoRA method requires 'rank' parameter", "", ""
     temp_dir = None
     output_dir = tempfile.mkdtemp()
             source_type, repo_url, safetensors_filename, hf_token, progress
         )
+        progress(0.2, desc="Converting to FP8 with precision recovery...")
+        success, msg, stats, fp8_path, recovery_path = convert_safetensors_to_fp8_with_recovery(
+            safetensors_path, output_dir, fp8_format, recovery_configs, progress
         )
         if not success:
+            return None, f"❌ Conversion failed: {msg}", "", ""
         progress(0.9, desc="Uploading...")
         repo_url_final = upload_to_target(
             target_type, new_repo_id, output_dir, fp8_format, hf_token, modelscope_token, private_repo
         )
+        # Generate README
         base_name = os.path.splitext(safetensors_filename)[0]
+        fp8_filename = os.path.basename(fp8_path)
+        recovery_filename = os.path.basename(recovery_path) if recovery_path else ""
         readme = f"""---
 library_name: diffusers
 - mixed-method
 - converted-by-gradio
 ---
+# FP8 Model with Mixed Precision Recovery
 - **Source**: `{repo_url}`
+- **Original File**: `{safetensors_filename}`
 - **FP8 Format**: `{fp8_format.upper()}`
+- **FP8 File**: `{fp8_filename}`
+- **Recovery File**: `{recovery_filename if recovery_filename else "None"}`
+## Recovery Configuration
+```json
+{json.dumps(recovery_configs, indent=2)}
 ```
+## Usage (Inference)
 ```python
 from safetensors.torch import load_file
 import torch
+# Load FP8 model
 fp8_state = load_file("{fp8_filename}")
+# Load recovery weights if available
+recovery_state = load_file("{recovery_filename}") if "{recovery_filename}" and os.path.exists("{recovery_filename}") else {{}}
+# Reconstruct high-precision weights
 reconstructed = {{}}
 for key in fp8_state:
+    fp8_weight = fp8_state[key].to(torch.float32)  # Convert to float32 for computation
+    # Apply LoRA recovery if available
     if f"lora_A.{{key}}" in recovery_state and f"lora_B.{{key}}" in recovery_state:
         A = recovery_state[f"lora_A.{{key}}"].to(torch.float32)
         B = recovery_state[f"lora_B.{{key}}"].to(torch.float32)
+        # Reconstruct the low-rank approximation
         lora_weight = B @ A
         fp8_weight = fp8_weight + lora_weight
+    # Apply difference recovery if available
     if f"diff.{{key}}" in recovery_state:
         diff = recovery_state[f"diff.{{key}}"].to(torch.float32)
         fp8_weight = fp8_weight + diff
     reconstructed[key] = fp8_weight
+# Use reconstructed weights in your model
+model.load_state_dict(reconstructed)
 ```
+> **Note**: For best results, use the same recovery configuration during inference as was used during extraction.
+> Requires PyTorch ≥ 2.1 for FP8 support.
+## Statistics
+- **Total layers**: {stats['total_layers']}
+- **Layers with recovery**: {stats['processed_layers']}
+  - LoRA recovery: {stats['recovery_counts']['lora']}
+  - Difference recovery: {stats['recovery_counts']['diff']}
 """
         with open(os.path.join(output_dir, "README.md"), "w") as f:
             )
         progress(1.0, desc="✅ Done!")
+        # Generate result HTML
+        recovery_links = []
+        if recovery_path:
+            recovery_links.append(f"- **Recovery weights**: `{recovery_filename}`")
         result_html = f"""
 ✅ Success!
 Model uploaded to: <a href="{repo_url_final}" target="_blank">{new_repo_id}</a>
+Includes:
+- FP8 model: `{fp8_filename}`
+- {chr(10).join(recovery_links)}
 """
+        recovery_details = f"Recovery file: {recovery_filename}" if recovery_filename else "No recovery weights generated"
+        return (gr.HTML(result_html),
+                "✅ FP8 conversion with precision recovery successful!",
+                msg,
+                recovery_details)
     except Exception as e:
+        import traceback
+        error_details = f"❌ Error: {str(e)}\n{traceback.format_exc()}"
+        return None, error_details, "", ""
     finally:
         if temp_dir:
             shutil.rmtree(temp_dir, ignore_errors=True)
         shutil.rmtree(output_dir, ignore_errors=True)
+with gr.Blocks(title="Advanced FP8 Quantizer with Mixed Precision Recovery") as demo:
+    gr.Markdown("# 🔄 Advanced FP8 Quantizer with Per-Layer Precision Recovery")
+    gr.Markdown("Convert `.safetensors` → **FP8** + **customizable precision recovery**. Full control over LoRA and difference methods per layer.")
     with gr.Row():
         with gr.Column():
             with gr.Accordion("FP8 Settings", open=True):
                 fp8_format = gr.Radio(["e4m3fn", "e5m2"], value="e5m2", label="FP8 Format")
+            with gr.Accordion("Per-Layer Recovery Configuration", open=True):
                 gr.Markdown("""
+                ### Configure recovery strategy for each layer type
+                Format: JSON array of configuration objects:
+                ```json
+                [
+                    {"element": "pattern1", "method": "lora", "rank": 64},
+                    {"element": "pattern2", "method": "diff"},
+                    {"element": "all", "method": "none"}
+                ]
+                ```
+                - `element`: Substring to match in weight keys (case-insensitive). Use "all" for default.
+                - `method`: "none" (pure FP8), "lora" (low-rank adaptation), or "diff" (difference/correction)
+                - `rank`: Required for "lora" method. Higher = better quality but larger file.
+                **Rules are applied in order** - first match wins. Always end with an "all" rule.
                 """)
+                recovery_configs_json = gr.Textbox(
+                    value="""[
+    {"element": "vae", "method": "diff"},
+    {"element": "encoder", "method": "diff"},
+    {"element": "decoder", "method": "diff"},
+    {"element": "text", "method": "lora", "rank": 64},
+    {"element": "emb", "method": "lora", "rank": 64},
+    {"element": "attn", "method": "lora", "rank": 128},
+    {"element": "all", "method": "none"}
+]""",
+                    lines=10,
+                    label="Recovery Configuration (JSON)",
+                    interactive=True
                 )
             with gr.Accordion("Authentication", open=False):
             status_output = gr.Markdown()
             detailed_log = gr.Textbox(label="Processing Log", interactive=False, lines=10)
+            recovery_summary = gr.Textbox(label="Recovery Files Generated", interactive=False, lines=3)
     convert_btn = gr.Button("🚀 Convert & Upload", variant="primary")
     repo_link_output = gr.HTML()
             repo_url,
             safetensors_filename,
             fp8_format,
+            recovery_configs_json,
             target_type,
             new_repo_id,
             hf_token,
             modelscope_token,
             private_repo
         ],
+        outputs=[repo_link_output, status_output, detailed_log, recovery_summary],
         show_progress=True
     )
     gr.Examples(
         examples=[
             [
+                "huggingface",
+                "https://huggingface.co/stabilityai/sdxl-vae",
+                "diffusion_pytorch_model.safetensors",
+                "e4m3fn",
+                """[
+                    {"element": "vae", "method": "diff"},
+                    {"element": "encoder", "method": "diff"},
+                    {"element": "decoder", "method": "diff"},
+                    {"element": "all", "method": "none"}
+                ]""",
+                "huggingface"
+            ],
+            [
+                "huggingface",
+                "https://huggingface.co/runwayml/stable-diffusion-v1-5/tree/main/text_encoder",
+                "model.safetensors",
                 "e5m2",
+                """[
+                    {"element": "text", "method": "lora", "rank": 64},
+                    {"element": "emb", "method": "lora", "rank": 64},
+                    {"element": "all", "method": "none"}
+                ]""",
                 "huggingface"
             ],
             [
                 "huggingface",
+                "https://huggingface.co/Yabo/FramePainter/tree/main",
+                "unet_diffusion_pytorch_model.safetensors",
                 "e5m2",
+                """[
+                    {"element": "attn", "method": "lora", "rank": 128},
+                    {"element": "transformer", "method": "lora", "rank": 96},
+                    {"element": "conv", "method": "diff"},
+                    {"element": "resnet", "method": "diff"},
+                    {"element": "all", "method": "none"}
+                ]""",
                 "huggingface"
             ]
         ],
+        inputs=[source_type, repo_url, safetensors_filename, fp8_format, recovery_configs_json, target_type],
         label="Example Conversions"
     )
     gr.Markdown("""
+    ## 💡 Precision Recovery Strategy Guide
+    ### **LoRA Method** (best for attention/linear layers)
+    - **Use for**: `text`, `attn`, `transformer`, `emb`, `mlp` layers
+    - **Rank selection**:
+      - Text encoders: 64-128
+      - Attention blocks: 64-128
+      - Other linear layers: 32-64
+    - **Benefits**: Captures weight matrix structure, better for semantic understanding
+    - **Limitations**: Only works on 2D tensors, not suitable for convolutions
+    ### **Difference Method** (best for convolutional layers)
+    - **Use for**: `vae`, `encoder`, `decoder`, `conv`, `resnet` layers
+    - **How it works**: Stores the exact difference between FP8 and original weights
+    - **Benefits**: Works with any tensor shape, more accurate for spatial features
+    - **Limitations**: Larger file size than LoRA for equivalent quality
     ### **Rule Ordering Tips**
+    - Put specific patterns first (`vae.encoder`), general patterns last (`all`)
+    - Always end with an `{"element": "all", "method": "none"}` rule as fallback
+    - Layer names are **case-insensitive** - use lowercase patterns for matching
+    > **Pro Tip**: For diffusion models, use Difference for VAE/convolutional components and LoRA for text/attention components for optimal quality/size tradeoff.
     """)
 demo.launch()