Spaces:

codemichaeld
/

new03

Running

App Files Files Community

codemichaeld commited on 13 days ago

Commit

1fd8c55

verified ·

1 Parent(s): dd07a59

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -33

app.py CHANGED Viewed

@@ -19,8 +19,9 @@ except ImportError:
 def low_rank_decomposition(weight, rank=64):
     """
-    Improved LoRA decomposition supporting 2D and 4D tensors with proper factorization.
-    Returns (lora_A, lora_B) such that weight ≈ lora_B @ lora_A (for 2D) or appropriate conv form.
     """
     original_shape = weight.shape
     original_dtype = weight.dtype
@@ -34,7 +35,7 @@ def low_rank_decomposition(weight, rank=64):
             U, S, Vh = torch.linalg.svd(weight.float(), full_matrices=False)
             S_sqrt = torch.sqrt(S[:actual_rank])
-            # Standard LoRA: W ≈ W_B @ W_A
             W_A = (Vh[:actual_rank, :] * S_sqrt.unsqueeze(1)).contiguous()  # [rank, in_features]
             W_B = (U[:, :actual_rank] * S_sqrt.unsqueeze(0)).contiguous()   # [out_features, rank]
@@ -42,8 +43,9 @@ def low_rank_decomposition(weight, rank=64):
         elif weight.ndim == 4:
             out_ch, in_ch, k_h, k_w = weight.shape
-            if k_h * k_w <= 9:  # small kernel (e.g., 3x3)
-                weight_2d = weight.permute(0, 2, 3, 1).reshape(out_ch, -1)
                 actual_rank = min(rank, min(weight_2d.shape) // 2)
                 if actual_rank < 4:
                     return None, None
@@ -54,7 +56,8 @@ def low_rank_decomposition(weight, rank=64):
                 W_A_2d = (Vh[:actual_rank, :] * S_sqrt.unsqueeze(1)).contiguous()
                 W_B_2d = (U[:, :actual_rank] * S_sqrt.unsqueeze(0)).contiguous()
-                W_A = W_A_2d.view(actual_rank, k_h, k_w, in_ch).permute(0, 3, 1, 2).contiguous()
                 W_B = W_B_2d.view(out_ch, actual_rank, 1, 1).contiguous()
                 return W_A.to(original_dtype), W_B.to(original_dtype)
@@ -62,25 +65,25 @@ def low_rank_decomposition(weight, rank=64):
         return None, None
     except Exception as e:
-        print(f"Decomposition failed for {original_shape}: {e}")
         return None, None
 def should_apply_lora(key, weight, architecture="auto"):
-    """Determine if LoRA should be applied based on architecture selection."""
     lower_key = key.lower()
-    # Skip unimportant weights
     if 'bias' in lower_key or 'norm' in lower_key or weight.numel() < 256:
         return False
     if architecture == "text_encoder":
-        return any(t in lower_key for t in ['emb', 'embed', 'attn'])
     elif architecture == "unet_transformer":
-        return any(t in lower_key for t in ['attn', 'transformer'])
     elif architecture == "unet_conv":
-        return any(t in lower_key for t in ['conv', 'resnet', 'down', 'up'])
     elif architecture == "vae":
-        return any(t in lower_key for t in ['encoder', 'decoder', 'quant'])
     else:  # "auto" or "all"
         return weight.ndim in [2, 4]
@@ -112,7 +115,6 @@ def convert_safetensors_to_fp8_with_lora(safetensors_path, output_dir, fp8_forma
         lora_stats = {
             'total_layers': total,
-            'layers_analyzed': 0,
             'layers_eligible': 0,
             'layers_processed': 0,
             'layers_skipped': [],
@@ -121,13 +123,11 @@ def convert_safetensors_to_fp8_with_lora(safetensors_path, output_dir, fp8_forma
         for i, key in enumerate(state_dict):
             progress(0.4 + 0.4 * (i / total), desc=f"Processing {i+1}/{total}...")
             weight = state_dict[key]
-            lora_stats['layers_analyzed'] += 1
             if weight.dtype in [torch.float16, torch.float32, torch.bfloat16]:
                 fp8_weight = weight.to(fp8_dtype)
                 sd_fp8[key] = fp8_weight
-                # Apply LoRA based on architecture selection
                 if should_apply_lora(key, weight, architecture):
                     lora_stats['layers_eligible'] += 1
@@ -139,11 +139,11 @@ def convert_safetensors_to_fp8_with_lora(safetensors_path, output_dir, fp8_forma
                             lora_keys.append(key)
                             lora_stats['layers_processed'] += 1
                         else:
-                            lora_stats['layers_skipped'].append(f"{key}: decomposition returned None")
                     except Exception as e:
-                        lora_stats['layers_skipped'].append(f"{key}: {str(e)}")
                 else:
-                    reason = "architecture filter" if architecture != "auto" else "not 2D/4D or too small"
                     lora_stats['layers_skipped'].append(f"{key}: skipped ({reason})")
             else:
                 sd_fp8[key] = weight
@@ -154,8 +154,6 @@ def convert_safetensors_to_fp8_with_lora(safetensors_path, output_dir, fp8_forma
         lora_path = os.path.join(output_dir, f"{base_name}-lora-r{lora_rank}-{architecture}.safetensors")
         save_file(sd_fp8, fp8_path, metadata={"format": "pt", "fp8_format": fp8_format, **metadata})
-        # Always save LoRA file (even if empty) with stats
         save_file(lora_weights, lora_path, metadata={
             "format": "pt",
             "lora_rank": str(lora_rank),
@@ -166,17 +164,12 @@ def convert_safetensors_to_fp8_with_lora(safetensors_path, output_dir, fp8_forma
         progress(0.9, desc="Saved FP8 and LoRA files.")
         progress(1.0, desc="✅ FP8 + LoRA extraction complete!")
-        stats_msg = f"""
-📊 LoRA Extraction Stats:
-- Total layers: {lora_stats['total_layers']}
-- Eligible for LoRA: {lora_stats['layers_eligible']}
-- Successfully processed: {lora_stats['layers_processed']}
-- Architecture: {architecture}
-"""
         if lora_stats['layers_processed'] == 0:
-            stats_msg += "\n⚠️ No LoRA weights generated. Try lower rank or different architecture."
-        return True, f"FP8 ({fp8_format}) and rank-{lora_rank} LoRA saved.\n{stats_msg}", lora_stats
     except Exception as e:
         import traceback
@@ -324,14 +317,15 @@ for key in fp8_state:
         if A.ndim == 2 and B.ndim == 2:
             lora_weight = B @ A
         else:
-            # Handle convolutional LoRA (simplified)
-            lora_weight = torch.zeros_like(fp8_state[key], dtype=torch.float32)
         reconstructed[key] = fp8_state[key].to(torch.float32) + lora_weight
     else:
         reconstructed[key] = fp8_state[key].to(torch.float32)
 ```
-> Requires PyTorch ≥ 2.1 for FP8 support. Use the same architecture selection during inference.
 """
         with open(os.path.join(output_dir, "README.md"), "w") as f:

 def low_rank_decomposition(weight, rank=64):
     """
+    Correct LoRA decomposition supporting 2D and 4D tensors.
+    Returns (lora_A, lora_B) such that weight ≈ lora_B @ lora_A for 2D,
+    or appropriate conv form for 4D.
     """
     original_shape = weight.shape
     original_dtype = weight.dtype
             U, S, Vh = torch.linalg.svd(weight.float(), full_matrices=False)
             S_sqrt = torch.sqrt(S[:actual_rank])
+            # Standard LoRA factorization: W ≈ W_B @ W_A
             W_A = (Vh[:actual_rank, :] * S_sqrt.unsqueeze(1)).contiguous()  # [rank, in_features]
             W_B = (U[:, :actual_rank] * S_sqrt.unsqueeze(0)).contiguous()   # [out_features, rank]
         elif weight.ndim == 4:
             out_ch, in_ch, k_h, k_w = weight.shape
+            if k_h * k_w <= 9:  # small conv kernels (e.g., 3x3)
+                # Reshape to 2D: [out_ch, in_ch * k_h * k_w]
+                weight_2d = weight.view(out_ch, -1)
                 actual_rank = min(rank, min(weight_2d.shape) // 2)
                 if actual_rank < 4:
                     return None, None
                 W_A_2d = (Vh[:actual_rank, :] * S_sqrt.unsqueeze(1)).contiguous()
                 W_B_2d = (U[:, :actual_rank] * S_sqrt.unsqueeze(0)).contiguous()
+                # Reshape back to conv format
+                W_A = W_A_2d.view(actual_rank, in_ch, k_h, k_w).contiguous()
                 W_B = W_B_2d.view(out_ch, actual_rank, 1, 1).contiguous()
                 return W_A.to(original_dtype), W_B.to(original_dtype)
         return None, None
     except Exception as e:
+        print(f"Decomposition error for {original_shape}: {e}")
         return None, None
 def should_apply_lora(key, weight, architecture="auto"):
+    """Architecture-aware LoRA eligibility."""
     lower_key = key.lower()
+    # Skip bias, norm, and tiny tensors
     if 'bias' in lower_key or 'norm' in lower_key or weight.numel() < 256:
         return False
     if architecture == "text_encoder":
+        return any(t in lower_key for t in ['emb', 'embed', 'attn', 'mlp'])
     elif architecture == "unet_transformer":
+        return any(t in lower_key for t in ['attn', 'transformer', 'to_q', 'to_k', 'to_v', 'to_out'])
     elif architecture == "unet_conv":
+        return any(t in lower_key for t in ['conv', 'resnet', 'down', 'up', 'skip'])
     elif architecture == "vae":
+        return any(t in lower_key for t in ['encoder', 'decoder', 'quant', 'post_quant', 'pre_quant'])
     else:  # "auto" or "all"
         return weight.ndim in [2, 4]
         lora_stats = {
             'total_layers': total,
             'layers_eligible': 0,
             'layers_processed': 0,
             'layers_skipped': [],
         for i, key in enumerate(state_dict):
             progress(0.4 + 0.4 * (i / total), desc=f"Processing {i+1}/{total}...")
             weight = state_dict[key]
             if weight.dtype in [torch.float16, torch.float32, torch.bfloat16]:
                 fp8_weight = weight.to(fp8_dtype)
                 sd_fp8[key] = fp8_weight
                 if should_apply_lora(key, weight, architecture):
                     lora_stats['layers_eligible'] += 1
                             lora_keys.append(key)
                             lora_stats['layers_processed'] += 1
                         else:
+                            lora_stats['layers_skipped'].append(f"{key}: decomposition failed")
                     except Exception as e:
+                        lora_stats['layers_skipped'].append(f"{key}: exception: {e}")
                 else:
+                    reason = "filtered by architecture" if architecture != "auto" else "not 2D/4D or too small"
                     lora_stats['layers_skipped'].append(f"{key}: skipped ({reason})")
             else:
                 sd_fp8[key] = weight
         lora_path = os.path.join(output_dir, f"{base_name}-lora-r{lora_rank}-{architecture}.safetensors")
         save_file(sd_fp8, fp8_path, metadata={"format": "pt", "fp8_format": fp8_format, **metadata})
         save_file(lora_weights, lora_path, metadata={
             "format": "pt",
             "lora_rank": str(lora_rank),
         progress(0.9, desc="Saved FP8 and LoRA files.")
         progress(1.0, desc="✅ FP8 + LoRA extraction complete!")
+        stats_msg = f"FP8 ({fp8_format}) and rank-{lora_rank} LoRA ({architecture}) saved.\n"
+        stats_msg += f"Processed {lora_stats['layers_processed']}/{lora_stats['layers_eligible']} eligible layers."
         if lora_stats['layers_processed'] == 0:
+            stats_msg += " ⚠️ No valid LoRA weights generated."
+        return True, stats_msg, lora_stats
     except Exception as e:
         import traceback
         if A.ndim == 2 and B.ndim == 2:
             lora_weight = B @ A
         else:
+            # Conv LoRA: simplified reconstruction
+            lora_weight = F.conv2d(fp8_state[key].unsqueeze(0).to(torch.float32), A, groups=1)[:, :B.shape[0]]
+            lora_weight = lora_weight.squeeze(0) + F.conv2d(fp8_state[key].unsqueeze(0).to(torch.float32), B, groups=1).squeeze(0)
         reconstructed[key] = fp8_state[key].to(torch.float32) + lora_weight
     else:
         reconstructed[key] = fp8_state[key].to(torch.float32)
 ```
+> Requires PyTorch ≥ 2.1 for FP8 support. Use matching architecture during inference.
 """
         with open(os.path.join(output_dir, "README.md"), "w") as f: