Spaces:

jatin-tech
/

Tru_Image_Classifier

Running

App Files Files Community

Jatin-tec commited on 28 days ago

Commit

65d7391

1 Parent(s): 140553b

Add application file

Browse files

Files changed (20) hide show

.gitignore +42 -0
README.md +32 -0
app.py +132 -0
requirements.txt +7 -0
trufor_native/__init__.py +5 -0
trufor_native/inference.py +130 -0
trufor_native/models/DnCNN.py +145 -0
trufor_native/models/__init__.py +10 -0
trufor_native/models/cmx/LICENSE_CMX.txt +21 -0
trufor_native/models/cmx/__init__.py +0 -0
trufor_native/models/cmx/builder_np_conf.py +175 -0
trufor_native/models/cmx/decoders/MLPDecoder.py +86 -0
trufor_native/models/cmx/decoders/__init__.py +0 -0
trufor_native/models/cmx/encoders/__init__.py +0 -0
trufor_native/models/cmx/encoders/dual_segformer.py +518 -0
trufor_native/models/cmx/layer_utils.py +45 -0
trufor_native/models/cmx/net_utils.py +193 -0
trufor_native/models/cmx/utils/__init__.py +0 -0
trufor_native/models/cmx/utils/init_func.py +58 -0
trufor_runner.py +309 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,42 @@

+# Python artifacts
+__pycache__/
+*.py[cod]
+*.so
+# Virtual environments
+.venv/
+venv/
+ENV/
+env/
+.env
+.env.*
+# Packaging / build outputs
+build/
+dist/
+*.egg-info/
+# Testing and type checking caches
+.pytest_cache/
+.mypy_cache/
+.pytype/
+.ruff_cache/
+.coverage
+coverage.xml
+# Jupyter
+.ipynb_checkpoints/
+# Editor and OS cruft
+.vscode/
+.idea/
+*.swp
+.DS_Store
+# Logs
+*.log
+# Docker scratch space
+test_docker/data/
+test_docker/data_out/

README.md ADDED Viewed

	@@ -0,0 +1,32 @@

+# Hugging Face Interface Demo
+This Gradio app compares two detectors for image provenance:
+- Hugging Face `Ateeqq/ai-vs-human-image-detector` estimates whether an image is AI-generated or human-made.
+- A bundled TruFor backend estimates tampering and renders heatmaps when the required weights are present.
+## Requirements
+- Python 3.9 or newer
+- `pip install -r requirements.txt`
+## Getting Started
+1. Create or activate a virtual environment that uses Python 3.9+.
+2. Install dependencies:
+   ```bash
+   pip install -r requirements.txt
+   ```
+3. Launch the interface:
+   ```bash
+   python app.py
+   ```
+   Gradio prints a local URL in the terminal; open it in a browser and upload an image to view the AI/Human probabilities alongside TruFor diagnostics.
+## TruFor Weights
+TruFor is released for non-commercial research use. Obtain the official `trufor.pth.tar` weight file from the upstream project and place it at `weights/trufor.pth.tar` (or set the environment variable `TRUFOR_WEIGHTS` to point to the file). When the weights are available, the app switches to the native TruFor backend and overlays tamper and confidence heatmaps next to the classifier output.
+Optional environment variables:
+- `TRUFOR_BACKEND`: force a backend (`native`, `docker`, or `auto`). The default is `auto`, which prefers the bundled native implementation.
+- `TRUFOR_WEIGHTS`: absolute or relative path to `trufor.pth.tar` if you keep the file outside `weights/`.
+## Notes
+- The TruFor assets are redistributed here as Python modules for convenience, but you must still respect the upstream license for any research or redistribution.
+- Docker support remains available for legacy setups, but no container build steps are required when using the bundled backend.

app.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import gradio as gr
+import torch
+from PIL import Image
+from typing import Dict, Optional, Tuple
+from transformers import AutoImageProcessor, SiglipForImageClassification
+from trufor_runner import TruForEngine, TruForResult, TruForUnavailableError
+MODEL_ID = "Ateeqq/ai-vs-human-image-detector"
+# Use GPU when available so large batches stay responsive.
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+try:
+    processor = AutoImageProcessor.from_pretrained(MODEL_ID)
+    model = SiglipForImageClassification.from_pretrained(MODEL_ID)
+    model.to(device)
+    model.eval()
+except Exception as exc:  # pragma: no cover - surface loading issues early.
+    raise RuntimeError(f"Failed to load model from {MODEL_ID}") from exc
+try:
+    TRUFOR_ENGINE: Optional[TruForEngine] = TruForEngine(device="cpu")
+    TRUFOR_STATUS = TRUFOR_ENGINE.status_message
+except TruForUnavailableError as exc:
+    TRUFOR_ENGINE = None
+    TRUFOR_STATUS = str(exc)
+def analyze_ai_vs_human(image: Image.Image) -> Tuple[Dict[str, float], str]:
+    """Run the Hugging Face detector and return confidences with a readable summary."""
+    if image is None:
+        empty_scores = {label: 0.0 for label in model.config.id2label.values()}
+        return empty_scores, "No image provided."
+    image = image.convert("RGB")
+    inputs = processor(images=image, return_tensors="pt").to(device)
+    with torch.no_grad():
+        logits = model(**inputs).logits
+    probabilities = torch.softmax(logits, dim=-1)[0]
+    scores = {
+        model.config.id2label[idx]: float(probabilities[idx])
+        for idx in range(probabilities.size(0))
+    }
+    top_idx = int(probabilities.argmax().item())
+    top_label = model.config.id2label[top_idx]
+    top_score = scores[top_label]
+    summary = f"**Predicted Label:** {top_label}  \
+**Confidence:** {top_score:.4f}"
+    return scores, summary
+def analyze_trufor(image: Image.Image) -> Tuple[str, Optional[Image.Image], Optional[Image.Image]]:
+    """Run TruFor inference when available, otherwise return diagnostics."""
+    if TRUFOR_ENGINE is None:
+        return TRUFOR_STATUS, None, None
+    if image is None:
+        return "Upload an image to run TruFor.", None, None
+    try:
+        result: TruForResult = TRUFOR_ENGINE.infer(image)
+    except TruForUnavailableError as exc:
+        return str(exc), None, None
+    summary_lines = []
+    if result.score is not None:
+        summary_lines.append(f"**Tamper Score:** {result.score:.4f}")
+    extras_dict = result.raw_scores.copy()
+    if result.score is not None:
+        extras_dict.pop("tamper_score", None)
+    if extras_dict:
+        extras = "  ".join(f"{key}: {value:.4f}" for key, value in extras_dict.items())
+        summary_lines.append(f"`{extras}`")
+    if not summary_lines:
+        summary_lines.append("TruFor returned no scores for this image.")
+    return "\n".join(summary_lines), result.map_overlay, result.confidence_overlay
+def analyze_image(image: Image.Image) -> Tuple[Dict[str, float], str, str, Optional[Image.Image], Optional[Image.Image]]:
+    ai_scores, ai_summary = analyze_ai_vs_human(image)
+    trufor_summary, tamper_overlay, conf_overlay = analyze_trufor(image)
+    return ai_scores, ai_summary, trufor_summary, tamper_overlay, conf_overlay
+with gr.Blocks() as demo:
+    gr.Markdown(
+        """# Image Authenticity Workbench\nUpload an image to compare the AI-vs-human classifier with the TruFor forgery detector."""
+    )
+    status_box = gr.Markdown(f"`{TRUFOR_STATUS}`")
+    image_input = gr.Image(label="Input Image", type="pil")
+    analyze_button = gr.Button("Analyze", variant="primary", size="sm")
+    with gr.Tabs():
+        with gr.TabItem("AI vs Human"):
+            ai_label_output = gr.Label(label="Prediction", num_top_classes=2)
+            ai_summary_output = gr.Markdown("Upload an image to view the prediction.")
+        with gr.TabItem("TruFor Forgery Detection"):
+            trufor_summary_output = gr.Markdown("Configure TruFor assets to enable tamper analysis.")
+            tamper_overlay_output = gr.Image(label="Tamper Heatmap", type="pil", interactive=False)
+            conf_overlay_output = gr.Image(label="Confidence Heatmap", type="pil", interactive=False)
+    output_components = [
+        ai_label_output,
+        ai_summary_output,
+        trufor_summary_output,
+        tamper_overlay_output,
+        conf_overlay_output,
+    ]
+    analyze_button.click(
+        fn=analyze_image,
+        inputs=image_input,
+        outputs=output_components,
+    )
+    image_input.change(
+        fn=analyze_image,
+        inputs=image_input,
+        outputs=output_components,
+    )
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+gradio==4.44.1
+pydantic==2.8.2
+transformers==4.44.2
+torch>=2.1,<3
+Pillow>=10.0
+numpy>=1.23
+timm>=0.5.4

trufor_native/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+"""Bundled TruFor model for native inference."""
+from .inference import TruForBundledModel
+__all__ = ["TruForBundledModel"]

trufor_native/inference.py ADDED Viewed

	@@ -0,0 +1,130 @@

+from __future__ import annotations
+import logging
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Optional
+import numpy as np
+import torch
+from PIL import Image
+from .models.cmx.builder_np_conf import myEncoderDecoder as TruForNetwork
+LOGGER = logging.getLogger(__name__)
+@dataclass(frozen=True)
+class TruForOutputs:
+    """Lightweight container for TruFor inference outputs."""
+    tamper_map: np.ndarray
+    confidence_map: Optional[np.ndarray]
+    detection_score: Optional[float]
+class TruForBundledModel:
+    """Loads the TruFor network from the vendored sources and runs inference."""
+    def __init__(self, weights_path: Path | str, device: str = "cpu") -> None:
+        self.weights_path = Path(weights_path)
+        if not self.weights_path.exists():
+            raise FileNotFoundError(f"TruFor weights missing at {self.weights_path}")
+        try:
+            self.device = torch.device(device)
+        except RuntimeError as exc:  # pragma: no cover - defensive path for invalid strings
+            raise ValueError(f"Unsupported torch device '{device}'") from exc
+        self.model = self._build_model().to(self.device)
+        self.model.eval()
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+    def predict(self, image: Image.Image) -> TruForOutputs:
+        if image is None:
+            raise ValueError("An input image is required for TruFor inference.")
+        tensor = self._prepare_tensor(image).to(self.device)
+        with torch.inference_mode():
+            pred, conf, det, _ = self.model(tensor)
+            tamper_map = torch.softmax(pred[0], dim=0)[1].cpu().numpy()
+            confidence_map: Optional[np.ndarray] = None
+            if conf is not None:
+                confidence_map = torch.sigmoid(conf[0][0]).cpu().numpy()
+            detection_score: Optional[float] = None
+            if det is not None:
+                detection_score = torch.sigmoid(det).item()
+        return TruForOutputs(
+            tamper_map=tamper_map,
+            confidence_map=confidence_map,
+            detection_score=detection_score,
+        )
+    # ------------------------------------------------------------------
+    # Internal helpers
+    # ------------------------------------------------------------------
+    def _build_model(self) -> torch.nn.Module:
+        cfg = self._default_config()
+        model = TruForNetwork(cfg=cfg)
+        checkpoint = torch.load(self.weights_path, map_location="cpu", weights_only=False)
+        state_dict = checkpoint.get("state_dict", checkpoint)
+        missing, unexpected = model.load_state_dict(state_dict, strict=False)
+        if missing:
+            LOGGER.warning("TruFor missing keys: %s", sorted(missing))
+        if unexpected:
+            LOGGER.warning("TruFor unexpected keys: %s", sorted(unexpected))
+        return model
+    @staticmethod
+    def _prepare_tensor(image: Image.Image) -> torch.Tensor:
+        rgb = np.asarray(image.convert("RGB"), dtype=np.float32)
+        tensor = torch.from_numpy(rgb.transpose(2, 0, 1)).unsqueeze(0)
+        tensor = tensor / 256.0  # matches the reference implementation
+        return tensor
+    class AttrNamespace(dict):
+        def __getattr__(self, item):
+            try:
+                return self[item]
+            except KeyError as exc:
+                raise AttributeError(item) from exc
+        def __setattr__(self, key, value):
+            self[key] = value
+        def __contains__(self, item):
+            return item in self.keys()
+    @classmethod
+    def _default_config(cls) -> AttrNamespace:
+        extra = cls.AttrNamespace(
+            BACKBONE="mit_b2",
+            DECODER="MLPDecoder",
+            DECODER_EMBED_DIM=512,
+            PREPRC="imagenet",
+            BN_EPS=0.001,
+            BN_MOMENTUM=0.1,
+            DETECTION="confpool",
+            CONF=True,
+            NP_WEIGHTS="",
+        )
+        model = cls.AttrNamespace(
+            NAME="detconfcmx",
+            MODS=("RGB", "NP++"),
+            PRETRAINED="",
+            EXTRA=extra,
+        )
+        dataset = cls.AttrNamespace(NUM_CLASSES=2)
+        return cls.AttrNamespace(MODEL=model, DATASET=dataset)

trufor_native/models/DnCNN.py ADDED Viewed

	@@ -0,0 +1,145 @@

+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Copyright (c) 2023 Image Processing Research Group of University Federico II of Naples ('GRIP-UNINA').
+#
+# All rights reserved.
+# This work should only be used for nonprofit purposes.
+#
+# By downloading and/or using any of these files, you implicitly agree to all the
+# terms of the license, as specified in the document LICENSE.txt
+# (included in this package) and online at
+# http://www.grip.unina.it/download/LICENSE_OPEN.txt
+"""
+Created in September 2020
+@author: davide.cozzolino
+"""
+import math
+import torch.nn as nn
+def conv_with_padding(in_planes, out_planes, kernelsize, stride=1, dilation=1, bias=False, padding = None):
+    if padding is None:
+        padding = kernelsize//2
+    return nn.Conv2d(in_planes, out_planes, kernel_size=kernelsize, stride=stride, dilation=dilation, padding=padding, bias=bias)
+def conv_init(conv, act='linear'):
+    r"""
+    Reproduces conv initialization from DnCNN
+    """
+    n = conv.kernel_size[0] * conv.kernel_size[1] * conv.out_channels
+    conv.weight.data.normal_(0, math.sqrt(2. / n))
+def batchnorm_init(m, kernelsize=3):
+    r"""
+    Reproduces batchnorm initialization from DnCNN
+    """
+    n = kernelsize**2 * m.num_features
+    m.weight.data.normal_(0, math.sqrt(2. / (n)))
+    m.bias.data.zero_()
+def make_activation(act):
+    if act is None:
+        return None
+    elif act == 'relu':
+        return nn.ReLU(inplace=True)
+    elif act == 'tanh':
+        return nn.Tanh()
+    elif act == 'leaky_relu':
+        return nn.LeakyReLU(inplace=True)
+    elif act == 'softmax':
+        return nn.Softmax()
+    elif act == 'linear':
+        return None
+    else:
+        assert(False)
+def make_net(nplanes_in, kernels, features, bns, acts, dilats, bn_momentum = 0.1, padding=None):
+    r"""
+    :param nplanes_in: number of of input feature channels
+    :param kernels: list of kernel size for convolution layers
+    :param features: list of hidden layer feature channels
+    :param bns: list of whether to add batchnorm layers
+    :param acts: list of activations
+    :param dilats: list of dilation factors
+    :param bn_momentum: momentum of batchnorm
+    :param padding: integer for padding (None for same padding)
+    """
+    depth = len(features)
+    assert(len(features)==len(kernels))
+    layers = list()
+    for i in range(0,depth):
+        if i==0:
+            in_feats = nplanes_in
+        else:
+            in_feats = features[i-1]
+        elem = conv_with_padding(in_feats, features[i], kernelsize=kernels[i], dilation=dilats[i], padding=padding, bias=not(bns[i]))
+        conv_init(elem, act=acts[i])
+        layers.append(elem)
+        if bns[i]:
+            elem = nn.BatchNorm2d(features[i], momentum = bn_momentum)
+            batchnorm_init(elem, kernelsize=kernels[i])
+            layers.append(elem)
+        elem = make_activation(acts[i])
+        if elem is not None:
+            layers.append(elem)
+    return nn.Sequential(*layers)
+class DnCNN(nn.Module):
+    r"""
+    Implements a DnCNN network
+    """
+    def __init__(self, nplanes_in, nplanes_out, features, kernel, depth, activation, residual, bn, lastact=None, bn_momentum = 0.10, padding=None):
+        r"""
+        :param nplanes_in: number of of input feature channels
+        :param nplanes_out: number of of output feature channels
+        :param features: number of of hidden layer feature channels
+        :param kernel: kernel size of convolution layers
+        :param depth: number of convolution layers (minimum 2)
+        :param bn:  whether to add batchnorm layers
+        :param residual: whether to add a residual connection from input to output
+        :param bn_momentum: momentum of batchnorm
+        :param padding: inteteger for padding
+        """
+        super(DnCNN, self).__init__()
+        self.residual = residual
+        self.nplanes_out = nplanes_out
+        self.nplanes_in = nplanes_in
+        kernels = [kernel, ] * depth
+        features = [features, ] * (depth-1) + [nplanes_out, ]
+        bns = [False, ] + [bn,] * (depth - 2) + [False, ]
+        dilats = [1, ] * depth
+        acts = [activation, ] * (depth - 1) + [lastact, ]
+        self.layers = make_net(nplanes_in, kernels, features, bns, acts, dilats=dilats, bn_momentum = bn_momentum, padding=padding)
+    def forward(self, x):
+        shortcut = x
+        x = self.layers(x)
+        if self.residual:
+            nshortcut = min(self.nplanes_in, self.nplanes_out)
+            x[:, :nshortcut, :, :] = x[:, :nshortcut, :, :] + shortcut[:, :nshortcut, :, :]
+        return x
+def add_commandline_networkparams(parser, name, features, depth, kernel, activation, bn):
+    parser.add_argument("--{}.{}".format(name, "features"  ), type=int, default=features  )
+    parser.add_argument("--{}.{}".format(name, "depth"     ), type=int, default=depth     )
+    parser.add_argument("--{}.{}".format(name, "kernel"    ), type=int, default=kernel    )
+    parser.add_argument("--{}.{}".format(name, "activation"), type=str, default=activation)
+    bnarg = "{}.{}".format(name, "bn")
+    parser.add_argument("--"+bnarg   , action="store_true", dest=bnarg)
+    parser.add_argument("--{}.{}".format(name, "no-bn"), action="store_false", dest=bnarg)
+    parser.set_defaults(**{bnarg: bn})

trufor_native/models/__init__.py ADDED Viewed

	@@ -0,0 +1,10 @@

+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Copyright (c) 2023 Image Processing Research Group of University Federico II of Naples ('GRIP-UNINA').
+#
+# All rights reserved.
+# This work should only be used for nonprofit purposes.
+#
+# By downloading and/or using any of these files, you implicitly agree to all the
+# terms of the license, as specified in the document LICENSE.txt
+# (included in this package) and online at
+# http://www.grip.unina.it/download/LICENSE_OPEN.txt

trufor_native/models/cmx/LICENSE_CMX.txt ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2022 Huayao Liu
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

trufor_native/models/cmx/__init__.py ADDED Viewed

File without changes

trufor_native/models/cmx/builder_np_conf.py ADDED Viewed

	@@ -0,0 +1,175 @@

+"""
+Edited in September 2022
+@author: fabrizio.guillaro, davide.cozzolino
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import os
+from .utils.init_func import init_weight
+import logging
+def preprc_imagenet_torch(x):
+    mean = torch.Tensor([0.485, 0.456, 0.406]).to(x.device)
+    std  = torch.Tensor([0.229, 0.224, 0.225]).to(x.device)
+    x = (x-mean[None, :, None, None]) / std[None, :, None, None]
+    return x
+def create_backbone(typ, norm_layer):
+    channels = [64, 128, 320, 512]
+    if typ == 'mit_b2':
+        logging.info('Using backbone: Segformer-B2')
+        from .encoders.dual_segformer import mit_b2 as backbone_
+        backbone = backbone_(norm_fuse=norm_layer)
+    else:
+        raise NotImplementedError('backbone not implemented')
+    return backbone, channels
+class myEncoderDecoder(nn.Module):
+    def __init__(self, cfg=None, norm_layer=nn.BatchNorm2d):
+        super(myEncoderDecoder, self).__init__()
+        self.norm_layer = norm_layer
+        self.cfg  = cfg.MODEL.EXTRA
+        self.mods = cfg.MODEL.MODS
+        # import backbone and decoder
+        self.backbone, self.channels = create_backbone(self.cfg.BACKBONE, norm_layer)
+        if 'CONF_BACKBONE' in self.cfg:
+            self.backbone_conf, self.channels_conf = create_backbone(self.cfg.CONF_BACKBONE, norm_layer)
+        else:
+            self.backbone_conf = None
+        if self.cfg.DECODER == 'MLPDecoder':
+            logging.info('Using MLP Decoder')
+            from .decoders.MLPDecoder import DecoderHead
+            self.decode_head = DecoderHead(in_channels=self.channels, num_classes=cfg.DATASET.NUM_CLASSES, norm_layer=norm_layer, embed_dim=self.cfg.DECODER_EMBED_DIM)
+            if self.cfg.CONF:
+                self.decode_head_conf = DecoderHead(in_channels=self.channels, num_classes=1, norm_layer=norm_layer, embed_dim=self.cfg.DECODER_EMBED_DIM)
+            else:
+                self.decode_head_conf = None
+            self.conf_detection = None
+            if self.cfg.DETECTION is not None:
+                if self.cfg.DETECTION == 'none':
+                    pass
+                elif self.cfg.DETECTION == 'confpool':
+                    self.conf_detection = 'confpool'
+                    assert self.cfg.CONF
+                    self.detection  = nn.Sequential(
+                            nn.Linear(in_features=8, out_features=128),
+                            nn.ReLU(),
+                            nn.Dropout(p=0.5),
+                            nn.Linear(in_features=128, out_features=1),
+                            )
+                else:
+                    raise NotImplementedError('Detection mechanism not implemented')
+        else:
+            raise NotImplementedError('decoder not implemented')
+        from ..DnCNN import make_net
+        num_levels = 17
+        out_channel = 1
+        self.dncnn = make_net(3, kernels=[3, ] * num_levels,
+                       features=[64, ] * (num_levels - 1) + [out_channel],
+                       bns=[False, ] + [True, ] * (num_levels - 2) + [False, ],
+                       acts=['relu', ] * (num_levels - 1) + ['linear', ],
+                       dilats=[1, ] * num_levels,
+                       bn_momentum=0.1, padding=1)
+        if self.cfg.PREPRC == 'imagenet': #RGB (mean and variance)
+            self.prepro = preprc_imagenet_torch
+        else:
+            assert False
+        self.init_weights(pretrained=cfg.MODEL.PRETRAINED)
+    def init_weights(self, pretrained=None):
+        if pretrained:
+            logging.info('Loading pretrained model: {}'.format(pretrained))
+            self.backbone.init_weights(pretrained=pretrained)
+            if self.backbone_conf is not None:
+                self.backbone_conf.init_weights(pretrained=pretrained)
+            np_weights = self.cfg.NP_WEIGHTS
+            assert os.path.isfile(np_weights)
+            dat = torch.load(np_weights, map_location=torch.device('cpu'))
+            logging.info(f'Noiseprint++ weights: {np_weights}')
+            if 'network' in dat:
+                dat = dat['network']
+            self.dncnn.load_state_dict(dat)
+        logging.info('Initing weights ...')
+        init_weight(self.decode_head, nn.init.kaiming_normal_,
+                    self.norm_layer, self.cfg.BN_EPS, self.cfg.BN_MOMENTUM,
+                    mode='fan_in', nonlinearity='relu')
+    def encode_decode(self, rgb, modal_x):
+        if rgb is not None:
+            orisize = rgb.shape
+        else:
+            orisize = modal_x.shape
+        # cmx
+        x = self.backbone(rgb, modal_x)
+        out, feats = self.decode_head(x, return_feats=True)
+        out = F.interpolate(out, size=orisize[2:], mode='bilinear', align_corners=False)
+        # confidence
+        if self.decode_head_conf is not None:
+            if self.backbone_conf is not None:
+                x_conf = self.backbone_conf(rgb, modal_x)
+            else:
+                x_conf = x # same encoder of Localization Network
+            conf = self.decode_head_conf(x_conf)
+            conf = F.interpolate(conf, size=orisize[2:], mode='bilinear', align_corners=False)
+        else:
+            conf = None
+        # detection
+        if self.conf_detection is not None:
+            if self.conf_detection == 'confpool':
+                from .layer_utils import weighted_statistics_pooling
+                f1 = weighted_statistics_pooling(conf).view(out.shape[0],-1)
+                f2 = weighted_statistics_pooling(out[:,1:2,:,:]-out[:,0:1,:,:], F.logsigmoid(conf)).view(out.shape[0],-1)
+                det = self.detection(torch.cat((f1,f2),-1))
+            else:
+                assert False
+        else:
+            det = None
+        return out, conf, det
+    def forward(self, rgb):
+        # Noiseprint++ extraction
+        if 'NP++' in self.mods:
+            modal_x = self.dncnn(rgb)
+            modal_x = torch.tile(modal_x, (3, 1, 1))
+        else:
+            modal_x = None
+        if self.prepro is not None:
+            rgb = self.prepro(rgb)
+        out, conf, det = self.encode_decode(rgb, modal_x)
+        return out, conf, det, modal_x

trufor_native/models/cmx/decoders/MLPDecoder.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import numpy as np
+import torch.nn as nn
+import torch
+import torch.nn.functional as F
+class MLP(nn.Module):
+    """
+    Linear Embedding:
+    """
+    def __init__(self, input_dim=2048, embed_dim=768):
+        super().__init__()
+        self.proj = nn.Linear(input_dim, embed_dim)
+    def forward(self, x):
+        x = x.flatten(2).transpose(1, 2)
+        x = self.proj(x)
+        return x
+class DecoderHead(nn.Module):
+    def __init__(self,
+                 in_channels=[64, 128, 320, 512],
+                 num_classes=40,
+                 dropout_ratio=0.1,
+                 norm_layer=nn.BatchNorm2d,
+                 embed_dim=768,
+                 align_corners=False):
+        super(DecoderHead, self).__init__()
+        self.num_classes = num_classes
+        self.dropout_ratio = dropout_ratio
+        self.align_corners = align_corners
+        self.in_channels = in_channels
+        if dropout_ratio > 0:
+            self.dropout = nn.Dropout2d(dropout_ratio)
+        else:
+            self.dropout = None
+        c1_in_channels, c2_in_channels, c3_in_channels, c4_in_channels = self.in_channels
+        embedding_dim = embed_dim
+        self.linear_c4 = MLP(input_dim=c4_in_channels, embed_dim=embedding_dim)
+        self.linear_c3 = MLP(input_dim=c3_in_channels, embed_dim=embedding_dim)
+        self.linear_c2 = MLP(input_dim=c2_in_channels, embed_dim=embedding_dim)
+        self.linear_c1 = MLP(input_dim=c1_in_channels, embed_dim=embedding_dim)
+        self.linear_fuse = nn.Sequential(
+                            nn.Conv2d(in_channels=embedding_dim*4, out_channels=embedding_dim, kernel_size=1),
+                            norm_layer(embedding_dim),
+                            nn.ReLU(inplace=True)
+                            )
+        self.linear_pred = nn.Conv2d(embedding_dim, self.num_classes, kernel_size=1)
+    def forward(self, inputs, return_feats=False):
+        # len=4, 1/4,1/8,1/16,1/32
+        c1, c2, c3, c4 = inputs
+        ############## MLP decoder on C1-C4 ###########
+        n, _, h, w = c4.shape
+        _c4 = self.linear_c4(c4).permute(0,2,1).reshape(n, -1, c4.shape[2], c4.shape[3])
+        _c4 = F.interpolate(_c4, size=c1.size()[2:],mode='bilinear',align_corners=self.align_corners)
+        _c3 = self.linear_c3(c3).permute(0,2,1).reshape(n, -1, c3.shape[2], c3.shape[3])
+        _c3 = F.interpolate(_c3, size=c1.size()[2:],mode='bilinear',align_corners=self.align_corners)
+        _c2 = self.linear_c2(c2).permute(0,2,1).reshape(n, -1, c2.shape[2], c2.shape[3])
+        _c2 = F.interpolate(_c2, size=c1.size()[2:],mode='bilinear',align_corners=self.align_corners)
+        _c1 = self.linear_c1(c1).permute(0,2,1).reshape(n, -1, c1.shape[2], c1.shape[3])
+        _c = torch.cat([_c4, _c3, _c2, _c1], dim=1)
+        x = self.linear_fuse(_c)
+        x = self.dropout(x)
+        x = self.linear_pred(x)
+        if return_feats:
+            return x, _c
+        else:
+            return x

trufor_native/models/cmx/decoders/__init__.py ADDED Viewed

File without changes

trufor_native/models/cmx/encoders/__init__.py ADDED Viewed

File without changes

trufor_native/models/cmx/encoders/dual_segformer.py ADDED Viewed

	@@ -0,0 +1,518 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from functools import partial
+from timm.models.layers import DropPath, to_2tuple, trunc_normal_
+from ..net_utils import FeatureFusionModule as FFM
+from ..net_utils import FeatureRectifyModule as FRM
+import math
+import time
+#from engine.logger import get_logger
+import logging as logger
+#logger = get_logger()
+class DWConv(nn.Module):
+    """
+    Depthwise convolution bloc: input: x with size(B N C); output size (B N C)
+    """
+    def __init__(self, dim=768):
+        super(DWConv, self).__init__()
+        self.dwconv = nn.Conv2d(dim, dim, kernel_size=3, stride=1, padding=1, bias=True, groups=dim)
+    def forward(self, x, H, W):
+        B, N, C = x.shape
+        x = x.permute(0, 2, 1).reshape(B, C, H, W).contiguous() # B N C -> B C N -> B C H W
+        x = self.dwconv(x)
+        x = x.flatten(2).transpose(1, 2) # B C H W -> B N C
+        return x
+class Mlp(nn.Module):
+    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+        super().__init__()
+        """
+        MLP Block:
+        """
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.dwconv = DWConv(hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+    def forward(self, x, H, W):
+        x = self.fc1(x)
+        x = self.dwconv(x, H, W)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+class Attention(nn.Module):
+    def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0., sr_ratio=1):
+        super().__init__()
+        assert dim % num_heads == 0, f"dim {dim} should be divided by num_heads {num_heads}."
+        self.dim = dim
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim ** -0.5
+        # Linear embedding
+        self.q = nn.Linear(dim, dim, bias=qkv_bias)
+        self.kv = nn.Linear(dim, dim * 2, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+        self.sr_ratio = sr_ratio
+        if sr_ratio > 1:
+            self.sr = nn.Conv2d(dim, dim, kernel_size=sr_ratio, stride=sr_ratio)
+            self.norm = nn.LayerNorm(dim)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+    def forward(self, x, H, W):
+        B, N, C = x.shape
+        # B N C -> B N num_head C//num_head -> B C//num_head N num_heads
+        q = self.q(x).reshape(B, N, self.num_heads, C // self.num_heads).permute(0, 2, 1, 3)
+        if self.sr_ratio > 1:
+            x_ = x.permute(0, 2, 1).reshape(B, C, H, W)
+            x_ = self.sr(x_).reshape(B, C, -1).permute(0, 2, 1)
+            x_ = self.norm(x_)
+            kv = self.kv(x_).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        else:
+            kv = self.kv(x).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        k, v = kv[0], kv[1]
+        attn = (q @ k.transpose(-2, -1)) * self.scale
+        attn = attn.softmax(dim=-1)
+        attn = self.attn_drop(attn)
+        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+class Block(nn.Module):
+    """
+    Transformer Block: Self-Attention -> Mix FFN -> OverLap Patch Merging
+    """
+    def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
+                 drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm, sr_ratio=1):
+        super().__init__()
+        self.norm1 = norm_layer(dim)
+        self.attn = Attention(
+            dim,
+            num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale,
+            attn_drop=attn_drop, proj_drop=drop, sr_ratio=sr_ratio)
+        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+    def forward(self, x, H, W):
+        x = x + self.drop_path(self.attn(self.norm1(x), H, W))
+        x = x + self.drop_path(self.mlp(self.norm2(x), H, W))
+        return x
+class OverlapPatchEmbed(nn.Module):
+    """ Image to Patch Embedding
+    """
+    def __init__(self, img_size=224, patch_size=7, stride=4, in_chans=3, embed_dim=768):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.H, self.W = img_size[0] // patch_size[0], img_size[1] // patch_size[1]
+        self.num_patches = self.H * self.W
+        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=stride,
+                              padding=(patch_size[0] // 2, patch_size[1] // 2))
+        self.norm = nn.LayerNorm(embed_dim)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+    def forward(self, x):
+        # B C H W
+        x = self.proj(x)
+        _, _, H, W = x.shape
+        x = x.flatten(2).transpose(1, 2)
+        # B H*W/16 C
+        x = self.norm(x)
+        return x, H, W
+class RGBXTransformer(nn.Module):
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=1000, embed_dims=[64, 128, 256, 512],
+                 num_heads=[1, 2, 4, 8], mlp_ratios=[4, 4, 4, 4], qkv_bias=False, qk_scale=None, drop_rate=0.,
+                 attn_drop_rate=0., drop_path_rate=0., norm_layer=nn.LayerNorm, norm_fuse=nn.BatchNorm2d,
+                 depths=[3, 4, 6, 3], sr_ratios=[8, 4, 2, 1], stride0=4):
+        super().__init__()
+        self.num_classes = num_classes
+        self.depths = depths
+        # patch_embed
+        self.patch_embed1 = OverlapPatchEmbed(img_size=img_size, patch_size=7, stride=stride0, in_chans=in_chans,
+                                              embed_dim=embed_dims[0])
+        self.patch_embed2 = OverlapPatchEmbed(img_size=img_size // 4, patch_size=3, stride=2, in_chans=embed_dims[0],
+                                              embed_dim=embed_dims[1])
+        self.patch_embed3 = OverlapPatchEmbed(img_size=img_size // 8, patch_size=3, stride=2, in_chans=embed_dims[1],
+                                              embed_dim=embed_dims[2])
+        self.patch_embed4 = OverlapPatchEmbed(img_size=img_size // 16, patch_size=3, stride=2, in_chans=embed_dims[2],
+                                              embed_dim=embed_dims[3])
+        self.extra_patch_embed1 = OverlapPatchEmbed(img_size=img_size, patch_size=7, stride=stride0, in_chans=in_chans,
+                                              embed_dim=embed_dims[0])
+        self.extra_patch_embed2 = OverlapPatchEmbed(img_size=img_size // 4, patch_size=3, stride=2, in_chans=embed_dims[0],
+                                              embed_dim=embed_dims[1])
+        self.extra_patch_embed3 = OverlapPatchEmbed(img_size=img_size // 8, patch_size=3, stride=2, in_chans=embed_dims[1],
+                                              embed_dim=embed_dims[2])
+        self.extra_patch_embed4 = OverlapPatchEmbed(img_size=img_size // 16, patch_size=3, stride=2, in_chans=embed_dims[2],
+                                              embed_dim=embed_dims[3])
+        # transformer encoder
+        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule
+        cur = 0
+        self.block1 = nn.ModuleList([Block(
+            dim=embed_dims[0], num_heads=num_heads[0], mlp_ratio=mlp_ratios[0], qkv_bias=qkv_bias, qk_scale=qk_scale,
+            drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer,
+            sr_ratio=sr_ratios[0])
+            for i in range(depths[0])])
+        self.norm1 = norm_layer(embed_dims[0])
+        self.extra_block1 = nn.ModuleList([Block(
+            dim=embed_dims[0], num_heads=num_heads[0], mlp_ratio=mlp_ratios[0], qkv_bias=qkv_bias, qk_scale=qk_scale,
+            drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer,
+            sr_ratio=sr_ratios[0])
+            for i in range(depths[0])])
+        self.extra_norm1 = norm_layer(embed_dims[0])
+        cur += depths[0]
+        self.block2 = nn.ModuleList([Block(
+            dim=embed_dims[1], num_heads=num_heads[1], mlp_ratio=mlp_ratios[1], qkv_bias=qkv_bias, qk_scale=qk_scale,
+            drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur], norm_layer=norm_layer,
+            sr_ratio=sr_ratios[1])
+            for i in range(depths[1])])
+        self.norm2 = norm_layer(embed_dims[1])
+        self.extra_block2 = nn.ModuleList([Block(
+            dim=embed_dims[1], num_heads=num_heads[1], mlp_ratio=mlp_ratios[1], qkv_bias=qkv_bias, qk_scale=qk_scale,
+            drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur+1], norm_layer=norm_layer,
+            sr_ratio=sr_ratios[1])
+            for i in range(depths[1])])
+        self.extra_norm2 = norm_layer(embed_dims[1])
+        cur += depths[1]
+        self.block3 = nn.ModuleList([Block(
+            dim=embed_dims[2], num_heads=num_heads[2], mlp_ratio=mlp_ratios[2], qkv_bias=qkv_bias, qk_scale=qk_scale,
+            drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer,
+            sr_ratio=sr_ratios[2])
+            for i in range(depths[2])])
+        self.norm3 = norm_layer(embed_dims[2])
+        self.extra_block3 = nn.ModuleList([Block(
+            dim=embed_dims[2], num_heads=num_heads[2], mlp_ratio=mlp_ratios[2], qkv_bias=qkv_bias, qk_scale=qk_scale,
+            drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer,
+            sr_ratio=sr_ratios[2])
+            for i in range(depths[2])])
+        self.extra_norm3 = norm_layer(embed_dims[2])
+        cur += depths[2]
+        self.block4 = nn.ModuleList([Block(
+            dim=embed_dims[3], num_heads=num_heads[3], mlp_ratio=mlp_ratios[3], qkv_bias=qkv_bias, qk_scale=qk_scale,
+            drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer,
+            sr_ratio=sr_ratios[3])
+            for i in range(depths[3])])
+        self.norm4 = norm_layer(embed_dims[3])
+        self.extra_block4 = nn.ModuleList([Block(
+            dim=embed_dims[3], num_heads=num_heads[3], mlp_ratio=mlp_ratios[3], qkv_bias=qkv_bias, qk_scale=qk_scale,
+            drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer,
+            sr_ratio=sr_ratios[3])
+            for i in range(depths[3])])
+        self.extra_norm4 = norm_layer(embed_dims[3])
+        cur += depths[3]
+        self.FRMs = nn.ModuleList([
+                    FRM(dim=embed_dims[0], reduction=1),
+                    FRM(dim=embed_dims[1], reduction=1),
+                    FRM(dim=embed_dims[2], reduction=1),
+                    FRM(dim=embed_dims[3], reduction=1)])
+        self.FFMs = nn.ModuleList([
+                    FFM(dim=embed_dims[0], reduction=1, num_heads=num_heads[0], norm_layer=norm_fuse),
+                    FFM(dim=embed_dims[1], reduction=1, num_heads=num_heads[1], norm_layer=norm_fuse),
+                    FFM(dim=embed_dims[2], reduction=1, num_heads=num_heads[2], norm_layer=norm_fuse),
+                    FFM(dim=embed_dims[3], reduction=1, num_heads=num_heads[3], norm_layer=norm_fuse)])
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+    def init_weights(self, pretrained=None):
+        if isinstance(pretrained, str):
+            load_dualpath_model(self, pretrained)
+        else:
+            raise TypeError('pretrained must be a str or None')
+    def forward_features(self, x_rgb, x_e):
+        """
+        x_rgb: B x N x H x W
+        """
+        B = x_rgb.shape[0]
+        outs = []
+        outs_fused = []
+        # stage 1
+        x_rgb, H, W = self.patch_embed1(x_rgb)
+        # B H*W/16 C
+        x_e, _, _ = self.extra_patch_embed1(x_e)
+        for i, blk in enumerate(self.block1):
+            x_rgb = blk(x_rgb, H, W)
+        for i, blk in enumerate(self.extra_block1):
+            x_e = blk(x_e, H, W)
+        x_rgb = self.norm1(x_rgb)
+        x_e = self.extra_norm1(x_e)
+        x_rgb = x_rgb.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+        x_e = x_e.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+        x_rgb, x_e = self.FRMs[0](x_rgb, x_e)
+        x_fused = self.FFMs[0](x_rgb, x_e)
+        outs.append(x_fused)
+        # stage 2
+        x_rgb, H, W = self.patch_embed2(x_rgb)
+        x_e, _, _ = self.extra_patch_embed2(x_e)
+        for i, blk in enumerate(self.block2):
+            x_rgb = blk(x_rgb, H, W)
+        for i, blk in enumerate(self.extra_block2):
+            x_e = blk(x_e, H, W)
+        x_rgb = self.norm2(x_rgb)
+        x_e = self.extra_norm2(x_e)
+        x_rgb = x_rgb.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+        x_e = x_e.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+        x_rgb, x_e = self.FRMs[1](x_rgb, x_e)
+        x_fused = self.FFMs[1](x_rgb, x_e)
+        outs.append(x_fused)
+        # stage 3
+        x_rgb, H, W = self.patch_embed3(x_rgb)
+        x_e, _, _ = self.extra_patch_embed3(x_e)
+        for i, blk in enumerate(self.block3):
+            x_rgb = blk(x_rgb, H, W)
+        for i, blk in enumerate(self.extra_block3):
+            x_e = blk(x_e, H, W)
+        x_rgb = self.norm3(x_rgb)
+        x_e = self.extra_norm3(x_e)
+        x_rgb = x_rgb.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+        x_e = x_e.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+        x_rgb, x_e = self.FRMs[2](x_rgb, x_e)
+        x_fused = self.FFMs[2](x_rgb, x_e)
+        outs.append(x_fused)
+        # stage 4
+        x_rgb, H, W = self.patch_embed4(x_rgb)
+        x_e, _, _ = self.extra_patch_embed4(x_e)
+        for i, blk in enumerate(self.block4):
+            x_rgb = blk(x_rgb, H, W)
+        for i, blk in enumerate(self.extra_block4):
+            x_e = blk(x_e, H, W)
+        x_rgb = self.norm4(x_rgb)
+        x_e = self.extra_norm4(x_e)
+        x_rgb = x_rgb.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+        x_e = x_e.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
+        x_rgb, x_e = self.FRMs[3](x_rgb, x_e)
+        x_fused = self.FFMs[3](x_rgb, x_e)
+        outs.append(x_fused)
+        return outs
+    def forward(self, x_rgb, x_e):
+        out = self.forward_features(x_rgb, x_e)
+        return out
+def load_dualpath_model(model, model_file):
+    # load raw state_dict
+    t_start = time.time()
+    if isinstance(model_file, str):
+        raw_state_dict = torch.load(model_file, map_location=torch.device('cpu'))
+        #raw_state_dict = torch.load(model_file)
+        if 'model' in raw_state_dict.keys():
+            raw_state_dict = raw_state_dict['model']
+    else:
+        raw_state_dict = model_file
+    state_dict = {}
+    for k, v in raw_state_dict.items():
+        if k.find('patch_embed') >= 0:
+            state_dict[k] = v
+            state_dict[k.replace('patch_embed', 'extra_patch_embed')] = v
+        elif k.find('block') >= 0:
+            state_dict[k] = v
+            state_dict[k.replace('block', 'extra_block')] = v
+        elif k.find('norm') >= 0:
+            state_dict[k] = v
+            state_dict[k.replace('norm', 'extra_norm')] = v
+    t_ioend = time.time()
+    model.load_state_dict(state_dict, strict=False)
+    del state_dict
+    t_end = time.time()
+    logger.info(
+        "Load model, Time usage:\n\tIO: {}, initialize parameters: {}".format(
+            t_ioend - t_start, t_end - t_ioend))
+class mit_b0(RGBXTransformer):
+    def __init__(self, fuse_cfg=None, stride0=4, **kwargs):
+        super(mit_b0, self).__init__(
+            patch_size=4, embed_dims=[32, 64, 160, 256], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4],
+            qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[2, 2, 2, 2], sr_ratios=[8, 4, 2, 1],
+            drop_rate=0.0, drop_path_rate=0.1, stride0=stride0)
+class mit_b1(RGBXTransformer):
+    def __init__(self, fuse_cfg=None, stride0=4, **kwargs):
+        super(mit_b1, self).__init__(
+            patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4],
+            qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[2, 2, 2, 2], sr_ratios=[8, 4, 2, 1],
+            drop_rate=0.0, drop_path_rate=0.1, stride0=stride0)
+class mit_b2(RGBXTransformer):
+    def __init__(self, fuse_cfg=None, stride0=4, **kwargs):
+        super(mit_b2, self).__init__(
+            patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4],
+            qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 4, 6, 3], sr_ratios=[8, 4, 2, 1],
+            drop_rate=0.0, drop_path_rate=0.1, stride0=stride0)
+class mit_b3(RGBXTransformer):
+    def __init__(self, fuse_cfg=None, stride0=4, **kwargs):
+        super(mit_b3, self).__init__(
+            patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4],
+            qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 4, 18, 3], sr_ratios=[8, 4, 2, 1],
+            drop_rate=0.0, drop_path_rate=0.1, stride0=stride0)
+class mit_b4(RGBXTransformer):
+    def __init__(self, fuse_cfg=None, stride0=4, **kwargs):
+        super(mit_b4, self).__init__(
+            patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4],
+            qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 8, 27, 3], sr_ratios=[8, 4, 2, 1],
+            drop_rate=0.0, drop_path_rate=0.1, stride0=stride0)
+class mit_b5(RGBXTransformer):
+    def __init__(self, fuse_cfg=None, stride0=4, **kwargs):
+        super(mit_b5, self).__init__(
+            patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4],
+            qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 6, 40, 3], sr_ratios=[8, 4, 2, 1],
+            drop_rate=0.0, drop_path_rate=0.1, stride0=stride0)

trufor_native/models/cmx/layer_utils.py ADDED Viewed

	@@ -0,0 +1,45 @@

+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Copyright (c) 2023 Image Processing Research Group of University Federico II of Naples ('GRIP-UNINA').
+#
+# All rights reserved.
+# This work should only be used for nonprofit purposes.
+#
+# By downloading and/or using any of these files, you implicitly agree to all the
+# terms of the license, as specified in the document LICENSE.txt
+# (included in this package) and online at
+# http://www.grip.unina.it/download/LICENSE_OPEN.txt
+"""
+Created in September 2022
+@author: davide.cozzolino
+"""
+import torch
+import torch.nn.functional as F
+def weighted_statistics_pooling(x, log_w=None):
+        b = x.shape[0]
+        c = x.shape[1]
+        x = x.view(b,c,-1)
+        if log_w is None:
+            log_w = torch.zeros((b,1,x.shape[-1]), device=x.device)
+        else:
+            assert log_w.shape[0]==b
+            assert log_w.shape[1]==1
+            log_w = log_w.view(b,1,-1)
+            assert log_w.shape[-1]==x.shape[-1]
+        log_w = F.log_softmax(log_w, dim=-1)
+        x_min = -torch.logsumexp(log_w-x, dim=-1)
+        x_max =  torch.logsumexp(log_w+x, dim=-1)
+        w = torch.exp(log_w)
+        x_avg = torch.sum(w*x  , dim=-1)
+        x_msq = torch.sum(w*x*x, dim=-1)
+        x = torch.cat((x_min, x_max, x_avg, x_msq), dim=1)
+        return x

trufor_native/models/cmx/net_utils.py ADDED Viewed

	@@ -0,0 +1,193 @@

+import torch
+import torch.nn as nn
+from timm.models.layers import trunc_normal_
+import math
+# Feature Rectify Module
+class ChannelWeights(nn.Module):
+    def __init__(self, dim, reduction=1):
+        super(ChannelWeights, self).__init__()
+        self.dim = dim
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.max_pool = nn.AdaptiveMaxPool2d(1)
+        self.mlp = nn.Sequential(
+                    nn.Linear(self.dim * 4, self.dim * 4 // reduction),
+                    nn.ReLU(inplace=True),
+                    nn.Linear(self.dim * 4 // reduction, self.dim * 2),
+                    nn.Sigmoid())
+    def forward(self, x1, x2):
+        B, _, H, W = x1.shape
+        x = torch.cat((x1, x2), dim=1)
+        avg = self.avg_pool(x).view(B, self.dim * 2)
+        max = self.max_pool(x).view(B, self.dim * 2)
+        y = torch.cat((avg, max), dim=1) # B 4C
+        y = self.mlp(y).view(B, self.dim * 2, 1)
+        channel_weights = y.reshape(B, 2, self.dim, 1, 1).permute(1, 0, 2, 3, 4) # 2 B C 1 1
+        return channel_weights
+class SpatialWeights(nn.Module):
+    def __init__(self, dim, reduction=1):
+        super(SpatialWeights, self).__init__()
+        self.dim = dim
+        self.mlp = nn.Sequential(
+                    nn.Conv2d(self.dim * 2, self.dim // reduction, kernel_size=1),
+                    nn.ReLU(inplace=True),
+                    nn.Conv2d(self.dim // reduction, 2, kernel_size=1),
+                    nn.Sigmoid())
+    def forward(self, x1, x2):
+        B, _, H, W = x1.shape
+        x = torch.cat((x1, x2), dim=1) # B 2C H W
+        spatial_weights = self.mlp(x).reshape(B, 2, 1, H, W).permute(1, 0, 2, 3, 4) # 2 B 1 H W
+        return spatial_weights
+class FeatureRectifyModule(nn.Module):
+    def __init__(self, dim, reduction=1, lambda_c=.5, lambda_s=.5):
+        super(FeatureRectifyModule, self).__init__()
+        self.lambda_c = lambda_c
+        self.lambda_s = lambda_s
+        self.channel_weights = ChannelWeights(dim=dim, reduction=reduction)
+        self.spatial_weights = SpatialWeights(dim=dim, reduction=reduction)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+    def forward(self, x1, x2):
+        channel_weights = self.channel_weights(x1, x2)
+        spatial_weights = self.spatial_weights(x1, x2)
+        out_x1 = x1 + self.lambda_c * channel_weights[1] * x2 + self.lambda_s * spatial_weights[1] * x2
+        out_x2 = x2 + self.lambda_c * channel_weights[0] * x1 + self.lambda_s * spatial_weights[0] * x1
+        return out_x1, out_x2
+# Stage 1
+class CrossAttention(nn.Module):
+    def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None):
+        super(CrossAttention, self).__init__()
+        assert dim % num_heads == 0, f"dim {dim} should be divided by num_heads {num_heads}."
+        self.dim = dim
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim ** -0.5
+        self.kv1 = nn.Linear(dim, dim * 2, bias=qkv_bias)
+        self.kv2 = nn.Linear(dim, dim * 2, bias=qkv_bias)
+    def forward(self, x1, x2):
+        B, N, C = x1.shape
+        q1 = x1.reshape(B, -1, self.num_heads, C // self.num_heads).permute(0, 2, 1, 3).contiguous()
+        q2 = x2.reshape(B, -1, self.num_heads, C // self.num_heads).permute(0, 2, 1, 3).contiguous()
+        k1, v1 = self.kv1(x1).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4).contiguous()
+        k2, v2 = self.kv2(x2).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4).contiguous()
+        # q,k,v  B H N C
+        ctx1 = (k1.transpose(-2, -1) @ v1) * self.scale  # B H C C
+        ctx1 = ctx1.softmax(dim=-2)
+        ctx2 = (k2.transpose(-2, -1) @ v2) * self.scale  # B H C C
+        ctx2 = ctx2.softmax(dim=-2)
+        x1 = (q1 @ ctx2).permute(0, 2, 1, 3).reshape(B, N, C).contiguous()
+        x2 = (q2 @ ctx1).permute(0, 2, 1, 3).reshape(B, N, C).contiguous()
+        return x1, x2
+class CrossPath(nn.Module):
+    def __init__(self, dim, reduction=1, num_heads=None, norm_layer=nn.LayerNorm):
+        super().__init__()
+        self.channel_proj1 = nn.Linear(dim, dim // reduction * 2)
+        self.channel_proj2 = nn.Linear(dim, dim // reduction * 2)
+        self.act1 = nn.ReLU(inplace=True)
+        self.act2 = nn.ReLU(inplace=True)
+        self.cross_attn = CrossAttention(dim // reduction, num_heads=num_heads)
+        self.end_proj1 = nn.Linear(dim // reduction * 2, dim)
+        self.end_proj2 = nn.Linear(dim // reduction * 2, dim)
+        self.norm1 = norm_layer(dim)
+        self.norm2 = norm_layer(dim)
+    def forward(self, x1, x2):
+        y1, u1 = self.act1(self.channel_proj1(x1)).chunk(2, dim=-1)
+        y2, u2 = self.act2(self.channel_proj2(x2)).chunk(2, dim=-1)
+        v1, v2 = self.cross_attn(u1, u2)
+        y1 = torch.cat((y1, v1), dim=-1)
+        y2 = torch.cat((y2, v2), dim=-1)
+        out_x1 = self.norm1(x1 + self.end_proj1(y1))
+        out_x2 = self.norm2(x2 + self.end_proj2(y2))
+        return out_x1, out_x2
+# Stage 2
+class ChannelEmbed(nn.Module):
+    def __init__(self, in_channels, out_channels, reduction=1, norm_layer=nn.BatchNorm2d):
+        super(ChannelEmbed, self).__init__()
+        self.out_channels = out_channels
+        self.residual = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
+        self.channel_embed = nn.Sequential(
+                        nn.Conv2d(in_channels, out_channels//reduction, kernel_size=1, bias=True),
+                        nn.Conv2d(out_channels//reduction, out_channels//reduction, kernel_size=3, stride=1, padding=1, bias=True, groups=out_channels//reduction),
+                        nn.ReLU(inplace=True),
+                        nn.Conv2d(out_channels//reduction, out_channels, kernel_size=1, bias=True),
+                        norm_layer(out_channels)
+                        )
+        self.norm = norm_layer(out_channels)
+    def forward(self, x, H, W):
+        B, N, _C = x.shape
+        x = x.permute(0, 2, 1).reshape(B, _C, H, W).contiguous()
+        residual = self.residual(x)
+        x = self.channel_embed(x)
+        out = self.norm(residual + x)
+        return out
+class FeatureFusionModule(nn.Module):
+    def __init__(self, dim, reduction=1, num_heads=None, norm_layer=nn.BatchNorm2d):
+        super().__init__()
+        self.cross = CrossPath(dim=dim, reduction=reduction, num_heads=num_heads)
+        self.channel_emb = ChannelEmbed(in_channels=dim*2, out_channels=dim, reduction=reduction, norm_layer=norm_layer)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+        elif isinstance(m, nn.Conv2d):
+            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+            fan_out //= m.groups
+            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+            if m.bias is not None:
+                m.bias.data.zero_()
+    def forward(self, x1, x2):
+        B, C, H, W = x1.shape
+        x1 = x1.flatten(2).transpose(1, 2)
+        x2 = x2.flatten(2).transpose(1, 2)
+        x1, x2 = self.cross(x1, x2)
+        merge = torch.cat((x1, x2), dim=-1)
+        merge = self.channel_emb(merge, H, W)
+        return merge

trufor_native/models/cmx/utils/__init__.py ADDED Viewed

File without changes

trufor_native/models/cmx/utils/init_func.py ADDED Viewed

	@@ -0,0 +1,58 @@

+#!/usr/bin/env python3
+# encoding: utf-8
+# @Time    : 2018/9/28 下午12:13
+# @Author  : yuchangqian
+# @Contact : changqian_yu@163.com
+# @File    : init_func.py.py
+import torch
+import torch.nn as nn
+def __init_weight(feature, conv_init, norm_layer, bn_eps, bn_momentum,
+                  **kwargs):
+    for name, m in feature.named_modules():
+        if isinstance(m, (nn.Conv1d, nn.Conv2d, nn.Conv3d)):
+            conv_init(m.weight, **kwargs)
+        elif isinstance(m, norm_layer):
+            m.eps = bn_eps
+            m.momentum = bn_momentum
+            nn.init.constant_(m.weight, 1)
+            nn.init.constant_(m.bias, 0)
+def init_weight(module_list, conv_init, norm_layer, bn_eps, bn_momentum,
+                **kwargs):
+    if isinstance(module_list, list):
+        for feature in module_list:
+            __init_weight(feature, conv_init, norm_layer, bn_eps, bn_momentum,
+                          **kwargs)
+    else:
+        __init_weight(module_list, conv_init, norm_layer, bn_eps, bn_momentum,
+                      **kwargs)
+def group_weight(weight_group, module, norm_layer, lr):
+    group_decay = []
+    group_no_decay = []
+    count = 0
+    for m in module.modules():
+        if isinstance(m, nn.Linear):
+            group_decay.append(m.weight)
+            if m.bias is not None:
+                group_no_decay.append(m.bias)
+        elif isinstance(m, (nn.Conv1d, nn.Conv2d, nn.Conv3d, nn.ConvTranspose2d, nn.ConvTranspose3d)):
+            group_decay.append(m.weight)
+            if m.bias is not None:
+                group_no_decay.append(m.bias)
+        elif isinstance(m, norm_layer) or isinstance(m, nn.BatchNorm1d) or isinstance(m, nn.BatchNorm2d) \
+                or isinstance(m, nn.BatchNorm3d) or isinstance(m, nn.GroupNorm) or isinstance(m, nn.LayerNorm):
+            if m.weight is not None:
+                group_no_decay.append(m.weight)
+            if m.bias is not None:
+                group_no_decay.append(m.bias)
+        elif isinstance(m, nn.Parameter):
+            group_decay.append(m)
+    assert len(list(module.parameters())) >= len(group_decay) + len(group_no_decay)
+    weight_group.append(dict(params=group_decay, lr=lr))
+    weight_group.append(dict(params=group_no_decay, weight_decay=.0, lr=lr))
+    return weight_group

trufor_runner.py ADDED Viewed

	@@ -0,0 +1,309 @@

+from __future__ import annotations
+import logging
+import os
+import shutil
+import subprocess
+import tempfile
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Dict, Optional
+import numpy as np
+from PIL import Image
+LOGGER = logging.getLogger(__name__)
+class TruForUnavailableError(RuntimeError):
+    """Raised when the TruFor assets are missing or inference fails."""
+@dataclass
+class TruForResult:
+    score: Optional[float]
+    map_overlay: Optional[Image.Image]
+    confidence_overlay: Optional[Image.Image]
+    raw_scores: Dict[str, float]
+class TruForEngine:
+    """Wrapper that executes TruFor inference through docker or python backends."""
+    def __init__(
+        self,
+        repo_root: Optional[Path] = None,
+        weights_path: Optional[Path] = None,
+        device: str = "cpu",
+    ) -> None:
+        self.base_dir = Path(__file__).resolve().parent
+        self.device = device
+        self.backend: Optional[str] = None
+        self.status_message = "TruFor backend not initialized."
+        backend_pref = os.environ.get("TRUFOR_BACKEND", "auto").lower()
+        if backend_pref not in {"auto", "native", "docker"}:
+            backend_pref = "auto"
+        errors: list[str] = []
+        if backend_pref in {"auto", "native"}:
+            try:
+                self._configure_native_backend(repo_root, weights_path)
+                self.backend = "native"
+                self.status_message = "TruFor ready (bundled python backend)."
+            except TruForUnavailableError as exc:
+                errors.append(f"Native backend unavailable: {exc}")
+                if backend_pref == "native":
+                    raise
+        if self.backend is None and backend_pref in {"auto", "docker"}:
+            try:
+                self._configure_docker_backend()
+                self.backend = "docker"
+                self.status_message = f'TruFor ready (docker image "{self.docker_image}").'
+            except TruForUnavailableError as exc:
+                errors.append(f"Docker backend unavailable: {exc}")
+                if backend_pref == "docker":
+                    raise
+        if self.backend is None:
+            raise TruForUnavailableError(" | ".join(errors) if errors else "TruFor backend unavailable.")
+    # ------------------------------------------------------------------
+    # Backend configuration helpers
+    # ------------------------------------------------------------------
+    def _configure_docker_backend(self) -> None:
+        if shutil.which("docker") is None:
+            raise TruForUnavailableError("docker CLI not found on PATH.")
+        test_docker_dir = self.base_dir / "test_docker"
+        if not test_docker_dir.exists():
+            raise TruForUnavailableError("test_docker directory not found in workspace.")
+        image_name = os.environ.get("TRUFOR_DOCKER_IMAGE", "trufor")
+        inspect = subprocess.run(
+            ["docker", "image", "inspect", image_name],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            text=True,
+            check=False,
+        )
+        if inspect.returncode != 0:
+            raise TruForUnavailableError(
+                f'Docker image "{image_name}" not found. Build it with "bash test_docker/docker_build.sh".'
+            )
+        weights_candidate = Path(os.environ.get("TRUFOR_DOCKER_WEIGHTS", self.base_dir / "weights")).expanduser()
+        weight_file = weights_candidate / "trufor.pth.tar"
+        self.docker_weights_dir: Optional[Path]
+        self.docker_weights_dir = weight_file.parent if weight_file.exists() else None
+        self.docker_runtime = os.environ.get("TRUFOR_DOCKER_RUNTIME")
+        gpu_pref = os.environ.get("TRUFOR_DOCKER_GPU")
+        if gpu_pref is None:
+            gpu_pref = "-1" if self.device == "cpu" else "0"
+        self.docker_gpu = gpu_pref
+        gpus_arg = os.environ.get("TRUFOR_DOCKER_GPUS_ARG")
+        if not gpus_arg and gpu_pref not in {"-1", "cpu", "none"}:
+            gpus_arg = "all"
+        self.docker_gpus_arg = gpus_arg
+        self.docker_image = image_name
+    def _configure_native_backend(self, _repo_root: Optional[Path], weights_path: Optional[Path]) -> None:
+        try:
+            from trufor_native import TruForBundledModel
+        except ImportError as exc:  # pragma: no cover - packaging guard
+            raise TruForUnavailableError("Bundled TruFor modules are not available.") from exc
+        default_weights = self.base_dir / "weights" / "trufor.pth.tar"
+        weight_candidate = weights_path or os.environ.get("TRUFOR_WEIGHTS") or default_weights
+        weight_path = Path(weight_candidate).expanduser()
+        if not weight_path.exists():
+            raise TruForUnavailableError(
+                f"TruFor weights missing at {weight_path}. Place trufor.pth.tar under weights/ or set TRUFOR_WEIGHTS."
+            )
+        try:
+            self.native_model = TruForBundledModel(weight_path, device=self.device)
+        except Exception as exc:  # pragma: no cover - propagate detailed failure
+            raise TruForUnavailableError(f"Failed to initialise bundled TruFor model: {exc}") from exc
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+    def infer(self, image: Image.Image) -> TruForResult:
+        if image is None:
+            raise TruForUnavailableError("No image supplied to TruFor inference.")
+        if self.backend == "docker":
+            return self._infer_docker(image)
+        if self.backend == "native":
+            return self._infer_native(image)
+        raise TruForUnavailableError("TruFor backend not configured.")
+    # ------------------------------------------------------------------
+    # Inference helpers
+    # ------------------------------------------------------------------
+    def _infer_native(self, image: Image.Image) -> TruForResult:
+        outputs = self.native_model.predict(image)
+        overlays: Dict[str, Optional[Image.Image]] = {"map": None, "conf": None}
+        try:
+            overlays["map"] = self._apply_heatmap(image, outputs.tamper_map)
+        except Exception as exc:  # pragma: no cover - visualisation fallback
+            LOGGER.debug("Failed to build tamper heatmap: %s", exc)
+        if outputs.confidence_map is not None:
+            try:
+                overlays["conf"] = self._apply_heatmap(image, outputs.confidence_map)
+            except Exception as exc:  # pragma: no cover
+                LOGGER.debug("Failed to build confidence heatmap: %s", exc)
+        raw_scores: Dict[str, float] = {
+            "tamper_mean": float(np.mean(outputs.tamper_map)),
+            "tamper_max": float(np.max(outputs.tamper_map)),
+        }
+        if outputs.confidence_map is not None:
+            raw_scores["confidence_mean"] = float(np.mean(outputs.confidence_map))
+            raw_scores["confidence_max"] = float(np.max(outputs.confidence_map))
+        if outputs.detection_score is not None:
+            raw_scores["tamper_score"] = float(outputs.detection_score)
+        return TruForResult(
+            score=outputs.detection_score,
+            map_overlay=overlays["map"],
+            confidence_overlay=overlays["conf"],
+            raw_scores=raw_scores,
+        )
+    def _infer_docker(self, image: Image.Image) -> TruForResult:
+        with tempfile.TemporaryDirectory(prefix="trufor_docker_") as workdir:
+            workdir_path = Path(workdir)
+            input_dir = workdir_path / "data"
+            output_dir = workdir_path / "data_out"
+            input_dir.mkdir(parents=True, exist_ok=True)
+            output_dir.mkdir(parents=True, exist_ok=True)
+            input_path = input_dir / "input.png"
+            image.convert("RGB").save(input_path)
+            cmd = ["docker", "run", "--rm"]
+            if self.docker_runtime:
+                cmd.extend(["--runtime", self.docker_runtime])
+            gpu_flag = str(self.docker_gpu)
+            if gpu_flag.lower() in {"cpu", "none"}:
+                gpu_flag = "-1"
+            if gpu_flag != "-1" and self.docker_gpus_arg:
+                cmd.extend(["--gpus", self.docker_gpus_arg])
+            cmd.extend([
+                "-v",
+                f"{input_dir.resolve()}:/data:ro",
+                "-v",
+                f"{output_dir.resolve()}:/data_out:rw",
+            ])
+            if self.docker_weights_dir is not None:
+                cmd.extend([
+                    "-v",
+                    f"{self.docker_weights_dir.resolve()}:/weights:ro",
+                ])
+            cmd.append(self.docker_image)
+            cmd.extend(
+                [
+                    "-gpu",
+                    gpu_flag,
+                    "-in",
+                    "data/input.png",
+                    "-out",
+                    "data_out",
+                ]
+            )
+            LOGGER.debug("Running TruFor docker command: %s", " ".join(cmd))
+            result = subprocess.run(
+                cmd,
+                text=True,
+                capture_output=True,
+                check=False,
+            )
+            return self._process_results(result, output_dir, image)
+    # ------------------------------------------------------------------
+    # Result parsing
+    # ------------------------------------------------------------------
+    def _process_results(self, run_result: subprocess.CompletedProcess[str], output_dir: Path, image: Image.Image) -> TruForResult:
+        if run_result.returncode != 0:
+            stderr_tail = "\n".join(run_result.stderr.strip().splitlines()[-8:]) if run_result.stderr else ""
+            LOGGER.error("TruFor stderr: %s", stderr_tail)
+            raise TruForUnavailableError(
+                "TruFor inference failed. Inspect dependencies and stderr:\n" + stderr_tail
+            )
+        npz_files = list(output_dir.rglob("*.npz"))
+        if not npz_files:
+            stdout_tail = "\n".join(run_result.stdout.strip().splitlines()[-8:]) if run_result.stdout else ""
+            raise TruForUnavailableError(
+                "TruFor inference produced no output files. Stdout tail:\n" + stdout_tail
+            )
+        data = np.load(npz_files[0], allow_pickle=False)
+        tamper_map = data.get("map")
+        conf_map = data.get("conf")
+        score = float(data["score"]) if "score" in data.files else None
+        overlays: Dict[str, Optional[Image.Image]] = {"map": None, "conf": None}
+        try:
+            overlays["map"] = self._apply_heatmap(image, tamper_map) if tamper_map is not None else None
+        except Exception as exc:  # pragma: no cover
+            LOGGER.debug("Failed to build tamper heatmap: %s", exc)
+        try:
+            overlays["conf"] = self._apply_heatmap(image, conf_map) if conf_map is not None else None
+        except Exception as exc:  # pragma: no cover
+            LOGGER.debug("Failed to build confidence heatmap: %s", exc)
+        raw_scores: Dict[str, float] = {}
+        if score is not None:
+            raw_scores["tamper_score"] = score
+        if tamper_map is not None:
+            raw_scores["tamper_mean"] = float(np.mean(tamper_map))
+            raw_scores["tamper_max"] = float(np.max(tamper_map))
+        if conf_map is not None:
+            raw_scores["confidence_mean"] = float(np.mean(conf_map))
+            raw_scores["confidence_max"] = float(np.max(conf_map))
+        return TruForResult(
+            score=score,
+            map_overlay=overlays["map"],
+            confidence_overlay=overlays["conf"],
+            raw_scores=raw_scores,
+        )
+    @staticmethod
+    def _apply_heatmap(base: Image.Image, data: np.ndarray, alpha: float = 0.55) -> Image.Image:
+        base_rgb = base.convert("RGB")
+        if data is None or data.ndim != 2:
+            raise ValueError("Expected a 2D map from TruFor")
+        data = np.asarray(data, dtype=np.float32)
+        if np.allclose(data.max(), data.min()):
+            norm = np.zeros_like(data, dtype=np.float32)
+        else:
+            norm = (data - data.min()) / (data.max() - data.min())
+        heat = np.zeros((*norm.shape, 3), dtype=np.uint8)
+        heat[..., 0] = np.clip(norm * 255, 0, 255).astype(np.uint8)
+        heat[..., 1] = np.clip(np.sqrt(norm) * 255, 0, 255).astype(np.uint8)
+        heat[..., 2] = np.clip((1.0 - norm) * 255, 0, 255).astype(np.uint8)
+        heat_img = Image.fromarray(heat, mode="RGB").resize(base_rgb.size, Image.BILINEAR)
+        return Image.blend(base_rgb, heat_img, alpha)