Spaces:

Cidaut
/

DeMoE

Running

App Files Files Community

danifei commited on Oct 6

Commit

034f4b8

1 Parent(s): 266758d

basic functionality

Browse files

Files changed (16) hide show

.gitattributes +1 -0
DeMoE.pt +3 -0
README.md +1 -1
app.py +96 -0
archs/DeMoE.py +133 -0
archs/__init__.py +59 -0
archs/arch_model.py +105 -0
archs/arch_util.py +79 -0
archs/moeblocks.py +65 -0
check_file.py +5 -0
examples/000143.png +3 -0
examples/0031.png +3 -0
examples/12_blur.png +3 -0
examples/1P0A1811.png +3 -0
examples/blur_4.png +3 -0
requirements.txt +18 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text

DeMoE.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5b0aef148ffcb1a5572b4da6cbd33f86ed2e18639db28b46838aae46bcd011a5
+size 80848778

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 title: DeMoE
-emoji: 🌍
 colorFrom: gray
 colorTo: green
 sdk: gradio

 ---
 title: DeMoE
+emoji: 🌪️
 colorFrom: gray
 colorTo: green
 sdk: gradio

app.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import gradio as gr
+from PIL import Image
+import torch
+import torchvision.transforms as transforms
+import torch.nn.functional as F
+from archs import create_model, resume_model
+PATH_MODEL = './DeMoE.pt'
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model_opt = {
+    'name': 'DeMoE',
+    'img_channels': 3,
+    'width': 32,
+    'middle_blk_num': 2,
+    'enc_blk_nums': [2, 2, 2, 2],
+    'dec_blk_nums': [2, 2, 2, 2],
+    'num_experts': 5,
+    'k_used': 1
+}
+pil_to_tensor = transforms.ToTensor()
+tensor_to_pil = transforms.ToPILImage()
+model = create_model(model_opt, device)
+checkpoints = torch.load(PATH_MODEL, map_location=device, weights_only=False)
+model = resume_model(model, PATH_MODEL, device)
+def pad_tensor(tensor, multiple = 16):
+    '''pad the tensor to be multiple of some number'''
+    multiple = multiple
+    _, _, H, W = tensor.shape
+    pad_h = (multiple - H % multiple) % multiple
+    pad_w = (multiple - W % multiple) % multiple
+    tensor = F.pad(tensor, (0, pad_w, 0, pad_h), value = 0)
+    return tensor
+def process_img(image, task = 'auto'):
+    tensor = pil_to_tensor(image).unsqueeze(0).to(device)
+    _, _, H, W = tensor.shape
+    tensor = pad_tensor(tensor)
+    with torch.no_grad():
+        output = model(tensor, task)
+    output = torch.clamp(output, 0., 1.)
+    output = output[:,:, :H, :W].squeeze(0)
+    return tensor_to_pil(output)
+title = 'DeMoE 🌪️'
+description = ''' >**Abstract**: Image deblurring, removing blurring artifacts from images, is a fundamental task in computational photography and low-level computer vision. Existing approaches focus on specialized solutions tailored to particular blur types, thus, these solutions lack generalization. This limitation in current methods implies requiring multiple models to cover several blur types, which is not practical in many real scenarios. In this paper, we introduce the first all-in-one deblurring method capable of efficiently restoring images affected by diverse blur degradations, including global motion, local motion, blur in low-light conditions, and defocus blur. We propose a mixture-of-experts (MoE) decoding module, which dynamically routes image features based on the recognized blur degradation, enabling precise and efficient restoration in an end-to-end manner. Our unified approach not only achieves performance comparable to dedicated task-specific models, but also shows promising generalization to unseen blur scenarios, particularly when leveraging appropriate expert selection.
+[Daniel Feijoo](https://github.com/danifei), Paula Garrido-Mellado, Jaesung Rim, Álvaro García, Marcos V. Conde
+[Fundación Cidaut](https://cidaut.ai/)
+Available code at [github](https://github.com/cidautai/DeMoE). More information on the [Arxiv paper](https://arxiv.org/pdf/2508.06228).
+> **Disclaimer:** please remember this is not a product, thus, you will notice some limitations.
+**This demo expects an image with some Low-Light degradations.**
+<br>
+'''
+examples = [['examples/1POA1811.png'],
+            ['examples/12_blur.png'],
+            ['examples/0031.png'],
+            ['examples/000143.png'],
+            ['examples/blur_4.png']]
+css = """
+    .image-frame img, .image-container img {
+        width: auto;
+        height: auto;
+        max-width: none;
+    }
+"""
+demo = gr.Interface(
+    fn = process_img,
+    inputs = [
+            gr.Image(type = 'pil', label = 'input')
+    ],
+    outputs = [gr.Image(type='pil', label = 'output')],
+    title = title,
+    description = description,
+    examples = examples,
+    css = css
+)
+if __name__ == '__main__':
+    demo.launch()

archs/DeMoE.py ADDED Viewed

	@@ -0,0 +1,133 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+try:
+    from .arch_util import CustomSequential
+    from .arch_model import EfficientClassificationHead,NAFBlock
+    from .moeblocks import  MoEBlock
+except:
+    from arch_util import CustomSequential
+    from arch_model import EfficientClassificationHead, NAFBlock
+    from moeblocks import  MoEBlock
+TASKS = {'defocus': [1.0, 0, 0, 0, 0],
+         'global_motion': [0, 1.0, 0, 0, 0],
+         'local_motion': [0, 0, 1.0, 0, 0],
+         'synth_global_motion': [0, 0, 0, 1.0, 0],
+         'low_light': [0, 0, 0, 0, 1.0]}
+class DeMoE(nn.Module):
+    def __init__(self, img_channel=3, width=16, middle_blk_num=1, enc_blk_nums=[], dec_blk_nums=[], num_exp=5, k_used=3):
+        super().__init__()
+        self.num_experts = num_exp
+        self.intro = nn.Conv2d(in_channels=img_channel, out_channels=width, kernel_size=3, padding=1, stride=1, groups=1,
+                              bias=True)
+        self.ending = nn.Conv2d(in_channels=width, out_channels=img_channel, kernel_size=3, padding=1, stride=1, groups=1,
+                              bias=True)
+        self.encoders = nn.ModuleList()
+        self.decoders = nn.ModuleList()
+        self.middle_blks = nn.ModuleList()
+        self.ups = nn.ModuleList()
+        self.downs = nn.ModuleList()
+        self.experts = nn.ModuleList()
+        chan = width
+        for num in enc_blk_nums:
+            self.encoders.append(
+                CustomSequential(
+                    *[NAFBlock(chan)  if i==0 else NAFBlock(chan) for i in range(num)]
+                )
+            )
+            self.downs.append(
+                nn.Conv2d(chan, 2*chan, 2, 2)
+            )
+            chan = chan * 2
+        self.middle_blks = \
+            CustomSequential(
+                *[NAFBlock(chan)  if i==0 else NAFBlock(chan) for i in range(middle_blk_num)]
+            )
+        self.experts.append(MoEBlock(c=chan, n=num_exp, used=k_used))
+        for num in dec_blk_nums:
+            self.ups.append(
+                nn.Sequential(
+                    nn.Conv2d(chan, chan * 2, 1, bias=False),
+                    nn.PixelShuffle(2)
+                )
+            )
+            chan = chan // 2
+            self.decoders.append(
+                CustomSequential(
+                    *[NAFBlock(chan)  if i==0 else NAFBlock(chan) for i in range(num)]
+                )
+            )
+            self.experts.append(MoEBlock(c=chan, n=num_exp, used=k_used))
+        self.mlp_branch = EfficientClassificationHead(in_channels=width*2**len(enc_blk_nums), num_classes=num_exp)
+        self.padder_size = 2 ** len(self.encoders)
+    def forward(self, inp, task = 'auto'):
+        B, C, H, W = inp.shape
+        inp = self.check_image_size(inp)
+        x = self.intro(inp)
+        encs = []
+        bins = []
+        weights = []
+        for encoder, down in zip(self.encoders, self.downs):
+            x = encoder(x)
+            encs.append(x)
+            x = down(x)
+        class_weights_0 = self.mlp_branch(x)
+        class_weights = F.softmax(class_weights_0)
+        # if the task is selected manually
+        if task != 'auto':
+            class_weights = torch.tensor(TASKS[task], device=x.device).unsqueeze(0).expand(B, -1)
+        x = self.middle_blks(x)
+        x, expert_bins, weight = self.experts[0].forward(x, class_weights)
+        bins.append(expert_bins)
+        weights.append(weight)
+        for decoder, up, enc_skip, expert in zip(self.decoders, self.ups, encs[::-1], self.experts[1::1]):
+            x = up(x)
+            x = x + enc_skip
+            x = decoder(x)
+            x, expert_bins, weight= expert.forward(x, class_weights)
+            bins.append(expert_bins)
+            weights.append(weight)
+        x = self.ending(x)
+        x = x + inp
+        return {'output': x[:, :, :H, :W],
+                'bin_counts': torch.stack(bins, dim=0),
+                'pred_labels': class_weights,
+                'weights': weights}
+    def check_image_size(self, x):
+        _, _, h, w = x.size()
+        mod_pad_h = (self.padder_size - h % self.padder_size) % self.padder_size
+        mod_pad_w = (self.padder_size - w % self.padder_size) % self.padder_size
+        x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), value = 0)
+        return x
+if __name__=='__main__':
+    from ptflops import get_model_complexity_info
+    net = DeMoE(img_channel=3, width=32,
+                 middle_blk_num=2, enc_blk_nums=[2,2,2,2], dec_blk_nums=[2,2,2,2],k_used=1)
+    print('State dict: ',len(net.state_dict().keys()))
+    macs, params = get_model_complexity_info(net, input_res=(3, 256, 256), print_per_layer_stat=False, verbose=False)
+    print(macs, params)

archs/__init__.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import torch
+from .DeMoE import DeMoE
+def create_model(opt, device):
+    '''
+    Creates the model.
+    opt: a dictionary from the yaml config key network
+    '''
+    name = opt['name']
+    if name == 'DeMoE':
+        model = DeMoE(img_channel=opt['img_channels'],
+                width=opt['width'],
+                middle_blk_num=opt['middle_blk_num'],
+                enc_blk_nums=opt['enc_blk_nums'],
+                dec_blk_nums=opt['dec_blk_nums'],
+                num_exp=opt['num_experts'],
+                k_used=opt['k_used'])
+    else:
+        raise NotImplementedError('This network is not implemented')
+    model.to(device)
+    return model
+def load_weights(model, model_weights):
+    '''
+    Loads the weights of a pretrained model, picking only the weights that are
+    in the new model.
+    '''
+    new_weights = model.state_dict()
+    new_weights.update({k: v for k, v in model_weights.items() if k in new_weights})
+    model.load_state_dict(new_weights)
+    return model
+def resume_model(model,
+                 path_model,
+                 device):
+    '''
+    Returns the loaded weights of model and optimizer if resume flag is True
+    '''
+    checkpoints = torch.load(path_model, map_location=device, weights_only=False)
+    weights = checkpoints['params']
+    model = load_weights(model, model_weights=weights)
+    return model
+__all__ = ['create_model', 'resume_model', 'load_weights']

archs/arch_model.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import torch
+import torch.nn as nn
+try:
+    from .arch_util import LayerNorm2d
+except:
+    from arch_util import LayerNorm2d
+# ------------------------------------------------------------------------
+# Modified from NAFNet (https://github.com/megvii-research/NAFNet)
+# ------------------------------------------------------------------------
+class SimpleGate(nn.Module):
+    def forward(self, x):
+        x1, x2 = x.chunk(2, dim=1)
+        return x1 * x2
+class NAFBlock(nn.Module):
+    def __init__(self, c, DW_Expand=2, FFN_Expand=2, drop_out_rate=0.):
+        super().__init__()
+        dw_channel = c * DW_Expand
+        self.conv1 = nn.Conv2d(in_channels=c, out_channels=dw_channel, kernel_size=1, padding=0, stride=1, groups=1, bias=True)
+        self.conv2 = nn.Conv2d(in_channels=dw_channel, out_channels=dw_channel, kernel_size=3, padding=1, stride=1, groups=dw_channel,
+                               bias=True) # the dconv
+        self.conv3 = nn.Conv2d(in_channels=dw_channel // 2, out_channels=c, kernel_size=1, padding=0, stride=1, groups=1, bias=True)
+        # Simplified Channel Attention
+        self.sca = nn.Sequential(
+            nn.AdaptiveAvgPool2d(1),
+            nn.Conv2d(in_channels=dw_channel // 2, out_channels=dw_channel // 2, kernel_size=1, padding=0, stride=1,
+                      groups=1, bias=True),
+        )
+        # SimpleGate
+        self.sg = SimpleGate()
+        ffn_channel = FFN_Expand * c
+        self.conv4 = nn.Conv2d(in_channels=c, out_channels=ffn_channel, kernel_size=1, padding=0, stride=1, groups=1, bias=True)
+        self.conv5 = nn.Conv2d(in_channels=ffn_channel // 2, out_channels=c, kernel_size=1, padding=0, stride=1, groups=1, bias=True)
+        self.norm1 = LayerNorm2d(c)
+        self.norm2 = LayerNorm2d(c)
+        self.dropout1 = nn.Dropout(drop_out_rate) if drop_out_rate > 0. else nn.Identity()
+        self.dropout2 = nn.Dropout(drop_out_rate) if drop_out_rate > 0. else nn.Identity()
+        self.beta = nn.Parameter(torch.zeros((1, c, 1, 1)), requires_grad=True)
+        self.gamma = nn.Parameter(torch.zeros((1, c, 1, 1)), requires_grad=True)
+    def forward(self, inp):
+        x = inp           # size [B, C, H, W]
+        x = self.norm1(x) # size [B, C, H, W]
+        x = self.conv1(x) # size [B, 2*C, H, W]
+        x = self.conv2(x) # size [B, 2*C, H, W]
+        x = self.sg(x)    # size [B, C, H, W]
+        x = x * self.sca(x) # size [B, C, H, W]
+        x = self.conv3(x) # size [B, C, H, W]
+        x = self.dropout1(x)
+        y = inp + x * self.beta # size [B, C, H, W]
+        x = self.conv4(self.norm2(y)) # size [B, 2*C, H, W]
+        x = self.sg(x)  # size [B, C, H, W]
+        x = self.conv5(x) # size [B, C, H, W]
+        x = self.dropout2(x)
+        x = y + x * self.gamma
+        return x
+class EfficientClassificationHead(nn.Module):
+    def __init__(self, in_channels, num_classes=5):
+        super().__init__()
+        self.conv_bottleneck = nn.Sequential(
+            nn.Conv2d(in_channels, 256, kernel_size=1),  # Channel reduction
+            nn.BatchNorm2d(256),
+            nn.ReLU(inplace=True),
+            nn.Dropout2d(0.2))
+        self.attention = nn.Sequential(
+            nn.Conv2d(256, 1, kernel_size=1),
+            nn.Sigmoid())
+        self.classifier = nn.Sequential(
+            nn.AdaptiveAvgPool2d(1),
+            nn.Flatten(),
+            nn.Linear(256, 128),
+            nn.BatchNorm1d(128),
+            nn.ReLU(inplace=True),
+            nn.Dropout(0.3),
+            nn.Linear(128, num_classes))
+    def forward(self, x):
+        x = self.conv_bottleneck(x)
+        attention_mask = self.attention(x)
+        x = x * attention_mask  # Spatial attention
+        return self.classifier(x)

archs/arch_util.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import torch
+import numpy as np
+from torch import nn as nn
+from torch.nn import init as init
+# ------------------------------------------------------------------------
+# Modified from NAFNet (https://github.com/megvii-research/NAFNet)
+# ------------------------------------------------------------------------
+class LayerNormFunction(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x, weight, bias, eps):
+        ctx.eps = eps
+        N, C, H, W = x.size()
+        mu = x.mean(1, keepdim=True)
+        var = (x - mu).pow(2).mean(1, keepdim=True)
+        y = (x - mu) / (var + eps).sqrt()
+        ctx.save_for_backward(y, var, weight)
+        y = weight.view(1, C, 1, 1) * y + bias.view(1, C, 1, 1)
+        return y
+    @staticmethod
+    def backward(ctx, grad_output):
+        eps = ctx.eps
+        N, C, H, W = grad_output.size()
+        y, var, weight = ctx.saved_variables
+        g = grad_output * weight.view(1, C, 1, 1)
+        mean_g = g.mean(dim=1, keepdim=True)
+        mean_gy = (g * y).mean(dim=1, keepdim=True)
+        gx = 1. / torch.sqrt(var + eps) * (g - y * mean_gy - mean_g)
+        return gx, (grad_output * y).sum(dim=3).sum(dim=2).sum(dim=0), grad_output.sum(dim=3).sum(dim=2).sum(
+            dim=0), None
+class LayerNorm2d(nn.Module):
+    def __init__(self, channels, eps=1e-6):
+        super(LayerNorm2d, self).__init__()
+        self.register_parameter('weight', nn.Parameter(torch.ones(channels)))
+        self.register_parameter('bias', nn.Parameter(torch.zeros(channels)))
+        self.eps = eps
+    def forward(self, x):
+        return LayerNormFunction.apply(x, self.weight, self.bias, self.eps)
+def calc_mean_std(feat, eps=1e-5):
+    """
+    Calculate mean and std for the given feature map.
+    feat: Tensor of shape [B, C, H, W]
+    eps: small value to avoid division by zero
+    """
+    B, C, _, _ = feat.size()
+    # Compute mean and std for the feature map across spatial dimensions.
+    feat_mean = feat.mean(dim=1, keepdim=True)
+    feat_std = feat.var(dim=1, keepdim=True) + eps
+    feat_std = feat_std.sqrt()
+    return feat_mean, feat_std
+class CustomSequential(nn.Module):
+    '''
+    Similar to nn.Sequential, but it lets us introduce a second argument in the forward method
+    so adaptors can be considered in the inference.
+    '''
+    def __init__(self, *args):
+        super(CustomSequential, self).__init__()
+        self.modules_list = nn.ModuleList(args)
+    def forward(self, x):
+        for module in self.modules_list:
+            x = module(x)
+        return x
+if __name__ == '__main__':
+    pass

archs/moeblocks.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+try:
+    from .arch_model import NAFBlock
+except:
+    from arch_model import NAFBlock
+class MoEBlock(nn.Module):
+    def __init__(self, c, n=5, used=3):
+        super().__init__()
+        self.used = int(used)
+        self.num_experts = n
+        self.experts = nn.ModuleList([NAFBlock(c=c) for _ in range(n)])
+    # Sparse implementation for large n
+    def forward(self, feat, weights):
+        B, _, _, _ = feat.shape
+        k = self.used
+        # Get top-k weights and indices
+        topk_weights, topk_indices = torch.topk(weights, k, dim=1)  # (B, k)
+        expert_counts = torch.bincount(topk_indices.flatten(), minlength=self.num_experts)
+        # Apply l1 normalization to keep the sum to 1 and maintain aspect relation between weights
+        topk_weights = topk_weights / topk_weights.sum(dim=1, keepdim=True)  # (B, k)
+        mask = torch.zeros(B, self.num_experts, dtype=torch.float32, device=feat.device)
+        mask.scatter_(1, topk_indices, 1.0)  # Set 1.0 for used experts
+        # Initialize output tensor
+        outputs = torch.zeros_like(feat)
+        # Process only used experts
+        for expert_idx in range(self.num_experts):
+            batch_mask = mask[:, expert_idx].bool()  # Convert to boolean mask
+            if batch_mask.any():
+                # Get the weights for this expert
+                expert_weights = topk_weights[batch_mask, (topk_indices[batch_mask] == expert_idx).nonzero()[:, 1]]
+                expert_out = self.experts[expert_idx](feat[batch_mask])
+                outputs[batch_mask] += expert_out * expert_weights.view(-1, 1, 1, 1)
+        return outputs, expert_counts, weights
+#
+#----------------------------------------------------------------------------------------------
+if __name__ == '__main__':
+    img_channel = 3
+    width = 32
+    enc_blks = [1, 2, 3]
+    middle_blk_num = 3
+    dec_blks = [3, 1, 1]
+    dilations = [1, 4, 9]
+    extra_depth_wise = True
+    net  = MoEBlock(c = img_channel,
+                            n=5,
+                            used=3)
+    inp_shape = (3, 256, 256)
+    from ptflops import get_model_complexity_info
+    macs, params = get_model_complexity_info(net, inp_shape, verbose=False, print_per_layer_stat=False)
+    output = net(torch.randn((4, 3, 256, 256)), F.softmax(torch.randn((4,5))))

check_file.py ADDED Viewed

	@@ -0,0 +1,5 @@

+import torch
+pt_dict = torch.load('DeMoE.pt', map_location='cpu')
+print(pt_dict['params'].keys())
+print(len(pt_dict['params'].keys()))

examples/000143.png ADDED Viewed

Git LFS Details

SHA256: 2522a72a4f83f6cf363848628914007efe6f2d98490a11a37985c934ec746e85
Pointer size: 131 Bytes
Size of remote file: 839 kB

examples/0031.png ADDED Viewed

Git LFS Details

SHA256: c566978dafd3282daa3f00e3366444f5acd96eedd4855d88f8b109289e7a7d31
Pointer size: 131 Bytes
Size of remote file: 246 kB

examples/12_blur.png ADDED Viewed

Git LFS Details

SHA256: a873478349366796559b7d9c934d66bfa24a496197cf23b465c55d7a39efddd3
Pointer size: 132 Bytes
Size of remote file: 3.47 MB

examples/1P0A1811.png ADDED Viewed

Git LFS Details

SHA256: 0f15ee1e9e132a78121d30807ac736ef2002bdfca77d3b6d6f12f7c917477b1b
Pointer size: 132 Bytes
Size of remote file: 7.22 MB

examples/blur_4.png ADDED Viewed

Git LFS Details

SHA256: 4daac3165f76b91c48f80562196d5c357f849d34a0db1024c264142331c216b3
Pointer size: 131 Bytes
Size of remote file: 553 kB

requirements.txt ADDED Viewed

	@@ -0,0 +1,18 @@

+einops==0.8.0
+gradio==5.49.0
+kornia==0.7.2
+lpips==0.1.4
+numpy==2.0.0
+opencv-python==4.10.0.84
+pandas==2.2.2
+pillow==10.3.0
+ptflops==0.7.3
+pyiqa==0.1.13
+pytorch-msssim==1.0.0
+PyYAML==6.0.1
+scikit-image==0.24.0
+scipy==1.13.1
+torch==2.5.1
+torchaudio==2.5.1
+torchvision==0.20.1
+tqdm==4.66.4