Spaces:

codemichaeld
/

new03

Running

App Files Files Community

codemichaeld commited on 4 days ago

Commit

840bb85

verified ·

1 Parent(s): 9de10a2

Update app.py

Browse files

Files changed (1) hide show

app.py +387 -52

app.py CHANGED Viewed

@@ -5,11 +5,14 @@ import shutil
 import re
 import json
 from pathlib import Path
-from huggingface_hub import HfApi, hf_hub_download
 from safetensors.torch import load_file, save_file
 import torch
 import torch.nn.functional as F
 import traceback
 try:
     from modelscope.hub.file_download import model_file_download as ms_file_download
     from modelscope.hub.api import HubApi as ModelScopeApi
@@ -151,24 +154,145 @@ def matches_pattern(key, tensor_info, pattern):
     return True
-def convert_safetensors_to_fp8_with_recovery(safetensors_path, output_dir, fp8_format,
-                                            recovery_rules, progress=gr.Progress()):
-    """Convert model to FP8 with customizable per-tensor recovery strategies."""
-    progress(0.1, desc="Starting FP8 conversion with precision recovery...")
-    try:
-        def read_safetensors_metadata(path):
-            with open(path, 'rb') as f:
                 header_size = int.from_bytes(f.read(8), 'little')
                 header_json = f.read(header_size).decode('utf-8')
                 header = json.loads(header_json)
-                return header.get('__metadata__', {})
-        metadata = read_safetensors_metadata(safetensors_path)
-        progress(0.2, desc="Loaded metadata.")
-        # Load model
-        state_dict = load_file(safetensors_path)
-        progress(0.3, desc="Loaded model weights.")
         # Setup FP8 format
         fp8_dtype = torch.float8_e5m2 if fp8_format == "e5m2" else torch.float8_e4m3fn
@@ -234,7 +358,7 @@ def convert_safetensors_to_fp8_with_recovery(safetensors_path, output_dir, fp8_f
                                 stats["recovery_counts"]["diff"] += 1
                                 stats["rule_matches"][rule_idx] += 1
                                 recovery_applied = True
-                                break
                         # If method is "none" or recovery failed, continue to next rule
                         if recovery_method == "none":
@@ -247,17 +371,19 @@ def convert_safetensors_to_fp8_with_recovery(safetensors_path, output_dir, fp8_f
                 reason = "no matching rule" if matched_rule_index == -1 else f"recovery failed with rule {matched_rule_index}"
                 stats["skipped_layers"].append(f"{key}: {reason}")
         # Save FP8 model
-        base_name = os.path.splitext(os.path.basename(safetensors_path))[0]
         fp8_path = os.path.join(output_dir, f"{base_name}-fp8-{fp8_format}.safetensors")
-        save_file(sd_fp8, fp8_path, metadata={"format": "pt", "fp8_format": fp8_format, **metadata})
         # Save recovery weights if any were generated
         recovery_path = None
         if recovery_weights:
             recovery_path = os.path.join(output_dir, f"{base_name}-recovery.safetensors")
             recovery_metadata = {
-                "format": "pt",
                 "fp8_format": fp8_format,
                 "recovery_rules": json.dumps(recovery_rules),
                 "stats": json.dumps(stats)
@@ -309,27 +435,176 @@ def parse_hf_url(url):
         subfolder = "/".join(parts[2:])
     return repo_id, subfolder
-def download_safetensors_file(source_type, repo_url, filename, hf_token=None, progress=gr.Progress()):
     temp_dir = tempfile.mkdtemp()
     try:
         if source_type == "huggingface":
             repo_id, subfolder = parse_hf_url(repo_url)
-            safetensors_path = hf_hub_download(
-                repo_id=repo_id,
-                filename=filename,
-                subfolder=subfolder or None,
-                cache_dir=temp_dir,
-                token=hf_token,
-                resume_download=True
-            )
         elif source_type == "modelscope":
             if not MODELScope_AVAILABLE:
                 raise ImportError("ModelScope not installed")
             repo_id = repo_url.strip()
-            safetensors_path = ms_file_download(model_id=repo_id, file_path=filename)
         else:
             raise ValueError("Unknown source")
-        return safetensors_path, temp_dir
     except Exception as e:
         shutil.rmtree(temp_dir, ignore_errors=True)
         raise e
@@ -506,7 +781,8 @@ def generate_default_rules(architecture="auto"):
 def process_and_upload_fp8(
     source_type,
     repo_url,
-    safetensors_filename,
     fp8_format,
     recovery_rules_json,
     target_type,
@@ -541,13 +817,13 @@ def process_and_upload_fp8(
     output_dir = tempfile.mkdtemp()
     try:
         progress(0.05, desc="Downloading model...")
-        safetensors_path, temp_dir = download_safetensors_file(
-            source_type, repo_url, safetensors_filename, hf_token, progress
         )
-        progress(0.2, desc="Converting to FP8 with precision recovery...")
-        success, msg, stats, fp8_path, recovery_path = convert_safetensors_to_fp8_with_recovery(
-            safetensors_path, output_dir, fp8_format, recovery_rules, progress
         )
         if not success:
@@ -559,7 +835,14 @@ def process_and_upload_fp8(
         )
         # Generate README
-        base_name = os.path.splitext(safetensors_filename)[0]
         fp8_filename = os.path.basename(fp8_path)
         recovery_filename = os.path.basename(recovery_path) if recovery_path else ""
@@ -574,27 +857,23 @@ tags:
 ---
 # FP8 Model with Per-Tensor Precision Recovery
 - **Source**: `{repo_url}`
-- **Original File**: `{safetensors_filename}`
 - **FP8 Format**: `{fp8_format.upper()}`
 - **FP8 File**: `{fp8_filename}`
 - **Recovery File**: `{recovery_filename if recovery_filename else "None"}`
 ## Recovery Rules Used
 ```json
 {json.dumps(recovery_rules, indent=2)}
 ```
 ## Usage (Inference)
 ```python
 from safetensors.torch import load_file
 import torch
 # Load FP8 model
 fp8_state = load_file("{fp8_filename}")
 # Load recovery weights if available
 recovery_state = load_file("{recovery_filename}") if "{recovery_filename}" and os.path.exists("{recovery_filename}") else {{}}
 # Reconstruct high-precision weights
 reconstructed = {{}}
 for key in fp8_state:
@@ -617,14 +896,11 @@ for key in fp8_state:
         fp8_weight = fp8_weight + diff
     reconstructed[key] = fp8_weight
 # Use reconstructed weights in your model
 model.load_state_dict(reconstructed)
 ```
 > **Note**: For best results, use the same recovery configuration during inference as was used during extraction.
 > Requires PyTorch ≥ 2.1 for FP8 support.
 ## Statistics
 - **Total layers**: {stats['total_layers']}
 - **Layers with recovery**: {stats['processed_layers']}
@@ -676,13 +952,24 @@ Includes:
 with gr.Blocks(title="Advanced FP8 Quantizer with Per-Tensor Precision Recovery") as demo:
     gr.Markdown("# 🔄 Advanced FP8 Quantizer with Per-Tensor Precision Recovery")
-    gr.Markdown("Convert `.safetensors` → **FP8** + **customizable precision recovery**. Full control over LoRA and difference methods per tensor pattern.")
     with gr.Row():
         with gr.Column():
             source_type = gr.Radio(["huggingface", "modelscope"], value="huggingface", label="Source")
             repo_url = gr.Textbox(label="Repo URL or ID", placeholder="https://huggingface.co/... or modelscope-id")
-            safetensors_filename = gr.Textbox(label="Filename", placeholder="model.safetensors")
             with gr.Accordion("FP8 Settings", open=True):
                 fp8_format = gr.Radio(["e4m3fn", "e5m2"], value="e5m2", label="FP8 Format")
@@ -772,7 +1059,8 @@ with gr.Blocks(title="Advanced FP8 Quantizer with Per-Tensor Precision Recovery"
         inputs=[
             source_type,
             repo_url,
-            safetensors_filename,
             fp8_format,
             recovery_rules_json,
             target_type,
@@ -790,7 +1078,8 @@ with gr.Blocks(title="Advanced FP8 Quantizer with Per-Tensor Precision Recovery"
             [
                 "huggingface",
                 "https://huggingface.co/stabilityai/sdxl-vae",
-                "diffusion_pytorch_model.safetensors",
                 "e4m3fn",
                 generate_default_rules("vae"),
                 "huggingface"
@@ -798,7 +1087,8 @@ with gr.Blocks(title="Advanced FP8 Quantizer with Per-Tensor Precision Recovery"
             [
                 "huggingface",
                 "https://huggingface.co/runwayml/stable-diffusion-v1-5/tree/main/text_encoder",
-                "model.safetensors",
                 "e5m2",
                 generate_default_rules("text_encoder"),
                 "huggingface"
@@ -806,13 +1096,32 @@ with gr.Blocks(title="Advanced FP8 Quantizer with Per-Tensor Precision Recovery"
             [
                 "huggingface",
                 "https://huggingface.co/Yabo/FramePainter/tree/main",
-                "unet_diffusion_pytorch_model.safetensors",
                 "e5m2",
                 generate_default_rules("unet_transformer"),
                 "huggingface"
             ]
         ],
-        inputs=[source_type, repo_url, safetensors_filename, fp8_format, recovery_rules_json, target_type],
         label="Example Conversions",
         cache_examples=False
     )
@@ -846,6 +1155,32 @@ with gr.Blocks(title="Advanced FP8 Quantizer with Per-Tensor Precision Recovery"
     - Always include a catch-all rule at the end
     > **Pro Tip for VAE**: Use `"dim": 4` combined with `"key_pattern": "vae"` to reliably target VAE convolutional layers with difference recovery.
     """)
 demo.launch()

 import re
 import json
 from pathlib import Path
+from huggingface_hub import HfApi, hf_hub_download, snapshot_download, list_repo_files
 from safetensors.torch import load_file, save_file
 import torch
 import torch.nn.functional as F
 import traceback
+import glob
+import time
+from concurrent.futures import ThreadPoolExecutor, as_completed
 try:
     from modelscope.hub.file_download import model_file_download as ms_file_download
     from modelscope.hub.api import HubApi as ModelScopeApi
     return True
+def load_model_files(model_paths, model_format="safetensors", progress_callback=None):
+    """
+    Load model weights from one or more files, supporting sharded safetensors and other formats.
+    """
+    state_dict = {}
+    if model_format == "safetensors":
+        # Handle sharded safetensors files
+        for i, path in enumerate(model_paths):
+            if progress_callback:
+                progress_callback(f"Loading shard {i+1}/{len(model_paths)}: {os.path.basename(path)}")
+            part_dict = load_file(path)
+            state_dict.update(part_dict)
+    elif model_format in ["pth", "pt"]:
+        # PyTorch checkpoint files
+        for i, path in enumerate(model_paths):
+            if progress_callback:
+                progress_callback(f"Loading checkpoint {i+1}/{len(model_paths)}: {os.path.basename(path)}")
+            checkpoint = torch.load(path, map_location="cpu")
+            if isinstance(checkpoint, dict):
+                # Try to extract state dict from checkpoint
+                if "state_dict" in checkpoint:
+                    state_dict.update(checkpoint["state_dict"])
+                elif "model_state_dict" in checkpoint:
+                    state_dict.update(checkpoint["model_state_dict"])
+                elif "model" in checkpoint and isinstance(checkpoint["model"], dict):
+                    state_dict.update(checkpoint["model"])
+                else:
+                    # Assume the checkpoint itself is the state dict
+                    state_dict.update(checkpoint)
+    elif model_format == "ckpt":
+        # Checkpoint files (similar to pth)
+        for i, path in enumerate(model_paths):
+            if progress_callback:
+                progress_callback(f"Loading checkpoint {i+1}/{len(model_paths)}: {os.path.basename(path)}")
+            checkpoint = torch.load(path, map_location="cpu")
+            if isinstance(checkpoint, dict):
+                if "state_dict" in checkpoint:
+                    state_dict.update(checkpoint["state_dict"])
+                elif "model_state_dict" in checkpoint:
+                    state_dict.update(checkpoint["model_state_dict"])
+                elif "model" in checkpoint and isinstance(checkpoint["model"], dict):
+                    state_dict.update(checkpoint["model"])
+                else:
+                    state_dict.update(checkpoint)
+    return state_dict
+def read_model_metadata(model_paths, model_format="safetensors"):
+    """Read metadata from model files."""
+    metadata = {}
+    if model_format == "safetensors":
+        # Read metadata from the first safetensors file
+        if model_paths:
+            with open(model_paths[0], 'rb') as f:
                 header_size = int.from_bytes(f.read(8), 'little')
                 header_json = f.read(header_size).decode('utf-8')
                 header = json.loads(header_json)
+                metadata = header.get('__metadata__', {})
+    elif model_format in ["pth", "pt", "ckpt"]:
+        # Try to extract metadata from checkpoint files
+        if model_paths:
+            checkpoint = torch.load(model_paths[0], map_location="cpu")
+            if isinstance(checkpoint, dict):
+                # Look for common metadata keys
+                for key in ["hyperparameters", "args", "config", "metadata"]:
+                    if key in checkpoint:
+                        metadata[key] = checkpoint[key]
+    return metadata
+def extract_base_name_from_sharded_files(model_paths):
+    """Extract a common base name from sharded files."""
+    if not model_paths:
+        return "model"
+    if len(model_paths) == 1:
+        # Single file case
+        base_name = os.path.splitext(os.path.basename(model_paths[0]))[0]
+        # Remove common suffixes
+        for suffix in ["-fp8", "-fp16", "-bf16", "-32", "-16"]:
+            if base_name.endswith(suffix):
+                base_name = base_name[:-len(suffix)]
+        return base_name
+    # Multiple files case - find common prefix
+    base_names = [os.path.splitext(os.path.basename(p))[0] for p in model_paths]
+    # Handle Hugging Face pattern: model-00001-of-00002.safetensors
+    # Extract the part before the shard numbering
+    if all("-of-" in name for name in base_names):
+        # All files follow the "model-XXXXX-of-YYYYY" pattern
+        common_parts = []
+        for name in base_names:
+            # Split at the shard numbering
+            parts = name.split("-")
+            if len(parts) >= 3 and parts[-2].isdigit() and parts[-1].startswith("of"):
+                # Remove the last two parts (shard number and total)
+                common_part = "-".join(parts[:-2])
+                common_parts.append(common_part)
+            else:
+                common_parts.append(name)
+        # Use the most common base name
+        from collections import Counter
+        base_name = Counter(common_parts).most_common(1)[0][0]
+        return base_name
+    # Fallback: find common prefix
+    common_prefix = ""
+    for chars in zip(*base_names):
+        if len(set(chars)) == 1:
+            common_prefix += chars[0]
+        else:
+            break
+    # Clean up the common prefix
+    base_name = re.sub(r'[-_]+$', '', common_prefix)
+    if not base_name:
+        base_name = "model"
+    return base_name
+def convert_model_to_fp8_with_recovery(model_paths, output_dir, fp8_format, recovery_rules,
+                                       model_format="safetensors", progress=gr.Progress()):
+    """Convert model to FP8 with customizable per-tensor recovery strategies."""
+    progress(0.05, desc=f"Starting FP8 conversion with precision recovery for {model_format}...")
+    try:
+        metadata = read_model_metadata(model_paths, model_format)
+        progress(0.1, desc="Loaded metadata.")
+        # Load model with progress tracking
+        state_dict = load_model_files(
+            model_paths,
+            model_format,
+            progress_callback=lambda msg: progress(0.15, desc=msg)
+        )
+        progress(0.25, desc=f"Loaded {len(model_paths)} model files with {len(state_dict)} tensors.")
         # Setup FP8 format
         fp8_dtype = torch.float8_e5m2 if fp8_format == "e5m2" else torch.float8_e4m3fn
                                 stats["recovery_counts"]["diff"] += 1
                                 stats["rule_matches"][rule_idx] += 1
                                 recovery_applied = True
+                                    break
                         # If method is "none" or recovery failed, continue to next rule
                         if recovery_method == "none":
                 reason = "no matching rule" if matched_rule_index == -1 else f"recovery failed with rule {matched_rule_index}"
                 stats["skipped_layers"].append(f"{key}: {reason}")
+        # Extract base name for output files
+        base_name = extract_base_name_from_sharded_files(model_paths)
         # Save FP8 model
         fp8_path = os.path.join(output_dir, f"{base_name}-fp8-{fp8_format}.safetensors")
+        save_file(sd_fp8, fp8_path, metadata={"format": model_format, "fp8_format": fp8_format, **metadata})
         # Save recovery weights if any were generated
         recovery_path = None
         if recovery_weights:
             recovery_path = os.path.join(output_dir, f"{base_name}-recovery.safetensors")
             recovery_metadata = {
+                "format": model_format,
                 "fp8_format": fp8_format,
                 "recovery_rules": json.dumps(recovery_rules),
                 "stats": json.dumps(stats)
         subfolder = "/".join(parts[2:])
     return repo_id, subfolder
+def download_single_file(args):
+    """Helper function for parallel downloads."""
+    repo_id, filename, subfolder, cache_dir, token = args
+    try:
+        path = hf_hub_download(
+            repo_id=repo_id,
+            filename=filename,
+            subfolder=subfolder,
+            cache_dir=cache_dir,
+            token=token,
+            resume_download=True
+        )
+        return path, None
+    except Exception as e:
+        return None, str(e)
+def find_sharded_safetensors_files(repo_id, subfolder=None, hf_token=None, max_shards=50):
+    """Find all sharded safetensors files in a repository."""
+    try:
+        # List all files in the repository
+        repo_files = list_repo_files(repo_id, repo_type="model", token=hf_token)
+        # Filter for safetensors files in the subfolder
+        if subfolder:
+            pattern = f"{subfolder}/"
+            safetensors_files = [f for f in repo_files if f.endswith('.safetensors') and f.startswith(pattern)]
+            # Remove subfolder prefix
+            safetensors_files = [f[len(pattern):] for f in safetensors_files]
+        else:
+            safetensors_files = [f for f in repo_files if f.endswith('.safetensors')]
+        # Check if files follow sharding pattern
+        sharded_files = []
+        single_files = []
+        for f in safetensors_files:
+            if "-of-" in f:
+                sharded_files.append(f)
+            else:
+                single_files.append(f)
+        # Return sharded files if found, otherwise single files
+        if sharded_files:
+            # Sort by shard number for consistent ordering
+            sharded_files.sort(key=lambda x: int(re.search(r'-(\d+)-of-', x).group(1)))
+            # Limit number of shards to prevent accidental downloads of huge models
+            if len(sharded_files) > max_shards:
+                raise ValueError(f"Too many shards found ({len(sharded_files)}). Maximum allowed is {max_shards}. "
+                               f"Please specify a more specific pattern.")
+            return sharded_files
+        elif single_files:
+            return single_files
+        else:
+            return []
+    except Exception as e:
+        print(f"Error listing repository files: {e}")
+        return []
+def download_model_files(source_type, repo_url, filename_pattern, model_format, hf_token=None, progress=gr.Progress()):
     temp_dir = tempfile.mkdtemp()
     try:
         if source_type == "huggingface":
             repo_id, subfolder = parse_hf_url(repo_url)
+            if model_format == "safetensors":
+                # Handle different patterns for safetensors
+                if filename_pattern == "auto" or filename_pattern == "":
+                    # Auto-detect sharded files
+                    progress(0.1, desc="Discovering model files...")
+                    found_files = find_sharded_safetensors_files(repo_id, subfolder, hf_token)
+                    if not found_files:
+                        raise ValueError("No safetensors files found in repository")
+                    progress(0.2, desc=f"Found {len(found_files)} shard(s). Downloading...")
+                    # Download files in parallel for better performance
+                    model_paths = []
+                    download_args = [
+                        (repo_id, filename, subfolder, temp_dir, hf_token)
+                        for filename in found_files
+                    ]
+                    with ThreadPoolExecutor(max_workers=4) as executor:
+                        futures = {executor.submit(download_single_file, args): args[1] for args in download_args}
+                        for i, future in enumerate(as_completed(futures)):
+                            filename = futures[future]
+                            try:
+                                path, error = future.result()
+                                if error:
+                                    raise Exception(f"Failed to download {filename}: {error}")
+                                model_paths.append(path)
+                                progress(0.2 + 0.6 * (i + 1) / len(futures),
+                                        desc=f"Downloaded {i+1}/{len(futures)}: {filename}")
+                            except Exception as e:
+                                raise e
+                    return model_paths, temp_dir
+                elif "*" in filename_pattern:
+                    # For wildcard patterns, download the entire directory and filter
+                    progress(0.1, desc="Downloading repository snapshot...")
+                    local_dir = os.path.join(temp_dir, "download")
+                    snapshot_download(
+                        repo_id=repo_id,
+                        subfolder=subfolder or None,
+                        local_dir=local_dir,
+                        token=hf_token,
+                        resume_download=True
+                    )
+                    # Find files matching the pattern
+                    if subfolder:
+                        pattern_dir = os.path.join(local_dir, subfolder)
+                    else:
+                        pattern_dir = local_dir
+                    model_files = glob.glob(os.path.join(pattern_dir, filename_pattern))
+                    if not model_files:
+                        raise ValueError(f"No files found matching pattern: {filename_pattern}")
+                    # Limit number of files
+                    if len(model_files) > 50:
+                        raise ValueError(f"Too many files found ({len(model_files)}). Please use a more specific pattern.")
+                    return model_files, temp_dir
+                else:
+                    # Single file
+                    progress(0.2, desc=f"Downloading {filename_pattern}...")
+                    model_path = hf_hub_download(
+                        repo_id=repo_id,
+                        filename=filename_pattern,
+                        subfolder=subfolder or None,
+                        cache_dir=temp_dir,
+                        token=hf_token,
+                        resume_download=True
+                    )
+                    return [model_path], temp_dir
+            else:
+                # For non-safetensors formats
+                if "*" in filename_pattern:
+                    raise ValueError("Wildcards only supported for safetensors format")
+                progress(0.2, desc=f"Downloading {filename_pattern}...")
+                model_path = hf_hub_download(
+                    repo_id=repo_id,
+                    filename=filename_pattern,
+                    subfolder=subfolder or None,
+                    cache_dir=temp_dir,
+                    token=hf_token,
+                    resume_download=True
+                )
+                return [model_path], temp_dir
         elif source_type == "modelscope":
             if not MODELScope_AVAILABLE:
                 raise ImportError("ModelScope not installed")
             repo_id = repo_url.strip()
+            if model_format == "safetensors" and "*" in filename_pattern:
+                # For ModelScope, we need to handle sharded files differently
+                # This is a simplified approach - in a real implementation, you might need to list files first
+                raise NotImplementedError("Pattern matching for ModelScope sharded files not fully implemented")
+            else:
+                progress(0.2, desc=f"Downloading {filename_pattern}...")
+                model_path = ms_file_download(model_id=repo_id, file_path=filename_pattern)
+                return [model_path], temp_dir
         else:
             raise ValueError("Unknown source")
     except Exception as e:
         shutil.rmtree(temp_dir, ignore_errors=True)
         raise e
 def process_and_upload_fp8(
     source_type,
     repo_url,
+    filename_pattern,
+    model_format,
     fp8_format,
     recovery_rules_json,
     target_type,
     output_dir = tempfile.mkdtemp()
     try:
         progress(0.05, desc="Downloading model...")
+        model_paths, temp_dir = download_model_files(
+            source_type, repo_url, filename_pattern, model_format, hf_token, progress
         )
+        progress(0.8, desc="Converting to FP8 with precision recovery...")
+        success, msg, stats, fp8_path, recovery_path = convert_model_to_fp8_with_recovery(
+            model_paths, output_dir, fp8_format, recovery_rules, model_format, progress
         )
         if not success:
         )
         # Generate README
+        if len(model_paths) == 1:
+            original_filename = os.path.basename(model_paths[0])
+        else:
+            original_filename = f"{len(model_paths)} sharded files"
+            # Add the pattern if not auto
+            if filename_pattern != "auto":
+                original_filename += f" matching '{filename_pattern}'"
         fp8_filename = os.path.basename(fp8_path)
         recovery_filename = os.path.basename(recovery_path) if recovery_path else ""
 ---
 # FP8 Model with Per-Tensor Precision Recovery
 - **Source**: `{repo_url}`
+- **Original File(s)**: `{original_filename}`
+- **Original Format**: `{model_format}`
 - **FP8 Format**: `{fp8_format.upper()}`
 - **FP8 File**: `{fp8_filename}`
 - **Recovery File**: `{recovery_filename if recovery_filename else "None"}`
 ## Recovery Rules Used
 ```json
 {json.dumps(recovery_rules, indent=2)}
 ```
 ## Usage (Inference)
 ```python
 from safetensors.torch import load_file
 import torch
 # Load FP8 model
 fp8_state = load_file("{fp8_filename}")
 # Load recovery weights if available
 recovery_state = load_file("{recovery_filename}") if "{recovery_filename}" and os.path.exists("{recovery_filename}") else {{}}
 # Reconstruct high-precision weights
 reconstructed = {{}}
 for key in fp8_state:
         fp8_weight = fp8_weight + diff
     reconstructed[key] = fp8_weight
 # Use reconstructed weights in your model
 model.load_state_dict(reconstructed)
 ```
 > **Note**: For best results, use the same recovery configuration during inference as was used during extraction.
 > Requires PyTorch ≥ 2.1 for FP8 support.
 ## Statistics
 - **Total layers**: {stats['total_layers']}
 - **Layers with recovery**: {stats['processed_layers']}
 with gr.Blocks(title="Advanced FP8 Quantizer with Per-Tensor Precision Recovery") as demo:
     gr.Markdown("# 🔄 Advanced FP8 Quantizer with Per-Tensor Precision Recovery")
+    gr.Markdown("Convert model files (safetensors, pth, ckpt) → **FP8** + **customizable precision recovery**. Supports any number of sharded files.")
     with gr.Row():
         with gr.Column():
             source_type = gr.Radio(["huggingface", "modelscope"], value="huggingface", label="Source")
             repo_url = gr.Textbox(label="Repo URL or ID", placeholder="https://huggingface.co/... or modelscope-id")
+            with gr.Row():
+                model_format = gr.Dropdown(
+                    choices=["safetensors", "pth", "pt", "ckpt"],
+                    value="safetensors",
+                    label="Model Format"
+                )
+                filename_pattern = gr.Textbox(
+                    label="Filename or Pattern",
+                    placeholder="auto (detects sharded files) or model-*.safetensors",
+                    value="auto"
+                )
             with gr.Accordion("FP8 Settings", open=True):
                 fp8_format = gr.Radio(["e4m3fn", "e5m2"], value="e5m2", label="FP8 Format")
         inputs=[
             source_type,
             repo_url,
+            filename_pattern,
+            model_format,
             fp8_format,
             recovery_rules_json,
             target_type,
             [
                 "huggingface",
                 "https://huggingface.co/stabilityai/sdxl-vae",
+                "auto",
+                "safetensors",
                 "e4m3fn",
                 generate_default_rules("vae"),
                 "huggingface"
             [
                 "huggingface",
                 "https://huggingface.co/runwayml/stable-diffusion-v1-5/tree/main/text_encoder",
+                "auto",
+                "safetensors",
                 "e5m2",
                 generate_default_rules("text_encoder"),
                 "huggingface"
             [
                 "huggingface",
                 "https://huggingface.co/Yabo/FramePainter/tree/main",
+                "auto",
+                "safetensors",
                 "e5m2",
                 generate_default_rules("unet_transformer"),
                 "huggingface"
+            ],
+            [
+                "huggingface",
+                "https://huggingface.co/stabilityai/stable-diffusion-2-1",
+                "model-*.safetensors",
+                "safetensors",
+                "e5m2",
+                generate_default_rules("all"),
+                "huggingface"
+            ],
+            [
+                "huggingface",
+                "https://huggingface.co/CompVis/stable-diffusion-v1-4",
+                "sd-v1-4.ckpt",
+                "ckpt",
+                "e5m2",
+                generate_default_rules("all"),
+                "huggingface"
             ]
         ],
+        inputs=[source_type, repo_url, filename_pattern, model_format, fp8_format, recovery_rules_json, target_type],
         label="Example Conversions",
         cache_examples=False
     )
     - Always include a catch-all rule at the end
     > **Pro Tip for VAE**: Use `"dim": 4` combined with `"key_pattern": "vae"` to reliably target VAE convolutional layers with difference recovery.
+    ## 📁 File Format Support
+    This tool supports multiple model formats:
+    - **Safetensors**: Modern, secure format for storing tensors. Supports sharded files (e.g., `model-00001-of-00005.safetensors`).
+    - **PTH/PT**: PyTorch checkpoint files. Can contain state dicts or full model objects.
+    - **CKPT**: Checkpoint files, commonly used for stable diffusion models.
+    ### Shard Support:
+    - **Unlimited Shards**: Supports any number of sharded files (2, 5, 10, 20+)
+    - **Auto-Detection**: Automatically finds all shards when using "auto" pattern
+    - **Parallel Downloads**: Downloads multiple shards simultaneously for faster processing
+    - **Memory Efficient**: Processes shards one at a time to manage memory usage
+    - **Progress Tracking**: Shows detailed progress for each shard download and processing
+    ### Filename Patterns:
+    - **Auto-detection**: Use "auto" to automatically find all sharded safetensors files
+    - **Wildcard patterns**: Use `model-*.safetensors` to match sharded files
+    - **Specific file**: Use exact filename for single files
+    For models with many shards (e.g., 5+ files), the tool will:
+    1. Automatically detect all shards
+    2. Download them in parallel (up to 4 simultaneous downloads)
+    3. Load them sequentially to manage memory
+    4. Merge them into a single FP8 model
     """)
 demo.launch()