Spaces:
Running
on
Zero
Running
on
Zero
| import os | |
| from pytorch_memlab import LineProfiler,profile | |
| import torch | |
| import torch.nn as nn | |
| import numpy as np | |
| import pytorch_lightning as pl | |
| from torch.optim.lr_scheduler import LambdaLR | |
| from einops import rearrange, repeat | |
| from contextlib import contextmanager | |
| from functools import partial | |
| from tqdm import tqdm | |
| from ldm.modules.diffusionmodules.util import make_ddim_sampling_parameters, make_ddim_timesteps | |
| from torchvision.utils import make_grid | |
| try: | |
| from pytorch_lightning.utilities.distributed import rank_zero_only | |
| except: | |
| from pytorch_lightning.utilities import rank_zero_only # torch2 | |
| from torchdyn.core import NeuralODE | |
| from ldm.models.diffusion.cfm_audio import Wrapper, Wrapper_cfg | |
| from ldm.modules.diffusionmodules.util import make_beta_schedule, extract_into_tensor, noise_like | |
| from omegaconf import ListConfig | |
| from ldm.util import log_txt_as_img, exists, default | |
| class CFMSampler(object): | |
| def __init__(self, model, num_timesteps, schedule="linear", **kwargs): | |
| super().__init__() | |
| self.model = model | |
| self.ddpm_num_timesteps = model.num_timesteps | |
| self.num_timesteps = num_timesteps | |
| self.schedule = schedule | |
| def register_buffer(self, name, attr): | |
| if type(attr) == torch.Tensor: | |
| if attr.device != torch.device("cuda"): | |
| attr = attr.to(torch.device("cuda")) | |
| setattr(self, name, attr) | |
| def stochastic_encode(self, x_start, t, noise=None): | |
| x1 = x_start | |
| x0 = default(noise, lambda: torch.randn_like(x_start)) | |
| t_unsqueeze = 1 - t.unsqueeze(1).unsqueeze(1).float() / self.num_timesteps | |
| x_noisy = t_unsqueeze * x1 + (1. - (1 - self.model.sigma_min) * t_unsqueeze) * x0 | |
| return x_noisy | |
| def sample(self, cond, batch_size=16, timesteps=None, shape=None, x_latent=None, t_start=None, **kwargs): | |
| if shape is None: | |
| if self.model.channels > 0: | |
| shape = (batch_size, self.model.channels, self.model.mel_dim, self.model.mel_length) | |
| else: | |
| shape = (batch_size, self.model.mel_dim, self.model.mel_length) | |
| # if cond is not None: | |
| # if isinstance(cond, dict): | |
| # cond = {key: cond[key][:batch_size] if not isinstance(cond[key], list) else | |
| # list(map(lambda x: x[:batch_size], cond[key])) for key in cond} | |
| # else: | |
| # cond = [c[:batch_size] for c in cond] if isinstance(cond, list) else cond[:batch_size] | |
| neural_ode = NeuralODE(self.ode_wrapper(cond), solver='euler', sensitivity="adjoint", atol=1e-4, rtol=1e-4) | |
| t_span = torch.linspace(0, 1, 25 if timesteps is None else timesteps) | |
| if t_start is not None: | |
| t_span = t_span[t_start:] | |
| x0 = torch.randn(shape, device=self.model.device) if x_latent is None else x_latent | |
| eval_points, traj = neural_ode(x0, t_span) | |
| return traj[-1], traj | |
| def ode_wrapper(self, cond): | |
| # self.estimator receives x, mask, mu, t, spk as arguments | |
| return Wrapper(self.model, cond) | |
| def sample_cfg(self, cond, unconditional_guidance_scale, unconditional_conditioning, batch_size=16, timesteps=None, shape=None, x_latent=None, t_start=None, **kwargs): | |
| if shape is None: | |
| if self.model.channels > 0: | |
| shape = (batch_size, self.model.channels, self.model.mel_dim, self.model.mel_length) | |
| else: | |
| shape = (batch_size, self.model.mel_dim, self.model.mel_length) | |
| # if cond is not None: | |
| # if isinstance(cond, dict): | |
| # cond = {key: cond[key][:batch_size] if not isinstance(cond[key], list) else | |
| # list(map(lambda x: x[:batch_size], cond[key])) for key in cond} | |
| # else: | |
| # cond = [c[:batch_size] for c in cond] if isinstance(cond, list) else cond[:batch_size] | |
| neural_ode = NeuralODE(self.ode_wrapper_cfg(cond, unconditional_guidance_scale, unconditional_conditioning), solver='euler', sensitivity="adjoint", atol=1e-4, rtol=1e-4) | |
| t_span = torch.linspace(0, 1, 25 if timesteps is None else timesteps) | |
| if t_start is not None: | |
| t_span = t_span[t_start:] | |
| x0 = torch.randn(shape, device=self.model.device) if x_latent is None else x_latent | |
| eval_points, traj = neural_ode(x0, t_span) | |
| return traj[-1], traj | |
| def ode_wrapper_cfg(self, cond, unconditional_guidance_scale, unconditional_conditioning): | |
| # self.estimator receives x, mask, mu, t, spk as arguments | |
| return Wrapper_cfg(self.model, cond, unconditional_guidance_scale, unconditional_conditioning) | |