Spaces:
Runtime error
Runtime error
| import torch | |
| from backend import operations, memory_management | |
| from backend.patcher.base import ModelPatcher | |
| from transformers import modeling_utils | |
| class DiffusersModelPatcher: | |
| def __init__(self, pipeline_class, dtype=torch.float16, *args, **kwargs): | |
| load_device = memory_management.get_torch_device() | |
| offload_device = torch.device("cpu") | |
| if not memory_management.should_use_fp16(device=load_device): | |
| dtype = torch.float32 | |
| self.dtype = dtype | |
| with operations.using_forge_operations(): | |
| with modeling_utils.no_init_weights(): | |
| self.pipeline = pipeline_class.from_pretrained(*args, **kwargs) | |
| if hasattr(self.pipeline, 'unet'): | |
| if hasattr(self.pipeline.unet, 'set_attn_processor'): | |
| from diffusers.models.attention_processor import AttnProcessor2_0 | |
| self.pipeline.unet.set_attn_processor(AttnProcessor2_0()) | |
| print('Attention optimization applied to DiffusersModelPatcher') | |
| self.pipeline = self.pipeline.to(device=offload_device) | |
| if self.dtype == torch.float16: | |
| self.pipeline = self.pipeline.half() | |
| self.pipeline.eval() | |
| self.patcher = ModelPatcher( | |
| model=self.pipeline, | |
| load_device=load_device, | |
| offload_device=offload_device) | |
| def prepare_memory_before_sampling(self, batchsize, latent_width, latent_height): | |
| area = 2 * batchsize * latent_width * latent_height | |
| inference_memory = (((area * 0.6) / 0.9) + 1024) * (1024 * 1024) | |
| memory_management.load_models_gpu( | |
| models=[self.patcher], | |
| memory_required=inference_memory | |
| ) | |
| def move_tensor_to_current_device(self, x): | |
| return x.to(device=self.patcher.current_device, dtype=self.dtype) | |