Eval to infer.py

Browse files

Files changed (6) hide show

configs/default.yaml +5 -4
infer.py +305 -4
scripts/pull_and_preprocess_wireseghr_dataset.py +40 -26
scripts/setup_script.sh +3 -3
src/wireseghr/data/ttpla_to_masks.py +37 -12
train.py +74 -171

configs/default.yaml CHANGED Viewed

@@ -7,7 +7,7 @@ coarse:
   test_size: 1024
 fine:
-  patch_size: 768
   overlap: 128
 conditioning:
@@ -23,12 +23,13 @@ label:
 inference:
   alpha: 0.01
-  prob_threshold: 0.3 # was 0.5, not actually mentioned in the paper.
   stitch: avg_logits
 eval:
   max_samples: 16
-  fine_batch: 48
 optim:
   iters: 2000
@@ -44,7 +45,7 @@ seed: 42
 out_dir: runs/wireseghr
 eval_interval: 100
 ckpt_interval: 300
-resume: runs/wireseghr/ckpt_300.pt  # optional
 # dataset paths (placeholders)
 data:

   test_size: 1024
 fine:
+  patch_size: 512
   overlap: 128
 conditioning:
 inference:
   alpha: 0.01
+  prob_threshold: 0.5  # default inference threshold per paper tuning
+  fine_patch_size: 1024
   stitch: avg_logits
 eval:
   max_samples: 16
+  fine_batch: 32
 optim:
   iters: 2000
 out_dir: runs/wireseghr
 eval_interval: 100
 ckpt_interval: 300
+resume: runs/wireseghr/ckpt_1800.pt  # optional
 # dataset paths (placeholders)
 data:

infer.py CHANGED Viewed

@@ -1,15 +1,260 @@
 import argparse
 import os
 import pprint
 import yaml
 def main():
-    parser = argparse.ArgumentParser(description="WireSegHR inference (skeleton)")
     parser.add_argument(
         "--config", type=str, default="configs/default.yaml", help="Path to YAML config"
     )
     parser.add_argument("--image", type=str, required=False, help="Path to input image")
     args = parser.parse_args()
     cfg_path = args.config
@@ -21,10 +266,66 @@ def main():
     print("[WireSegHR][infer] Loaded config from:", cfg_path)
     pprint.pprint(cfg)
-    print("[WireSegHR][infer] Image:", args.image)
-    print(
-        "[WireSegHR][infer] Skeleton OK. Implement inference per SEGMENTATION_PLAN.md."
     )
 if __name__ == "__main__":

 import argparse
 import os
 import pprint
+from typing import List, Tuple, Optional
 import yaml
+import numpy as np
+import cv2
+import torch
+import torch.nn.functional as F
+from torch.amp import autocast
+from src.wireseghr.model import WireSegHR
+def _pad_for_minmax(kernel: int) -> Tuple[int, int, int, int]:
+    # Replicate the padding logic from train.validate for even/odd kernels
+    if (kernel % 2) == 0:
+        return (kernel // 2 - 1, kernel // 2, kernel // 2 - 1, kernel // 2)
+    else:
+        return (kernel // 2, kernel // 2, kernel // 2, kernel // 2)
+@torch.no_grad()
+def _coarse_forward(
+    model: WireSegHR,
+    img_rgb: np.ndarray,
+    coarse_size: int,
+    minmax_enable: bool,
+    minmax_kernel: int,
+    device: torch.device,
+    amp_flag: bool,
+    amp_dtype,
+) -> Tuple[np.ndarray, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+    # Convert to tensor on device
+    t_img = (
+        torch.from_numpy(np.transpose(img_rgb, (2, 0, 1)))
+        .unsqueeze(0)
+        .to(device)
+        .float()
+    )  # 1x3xHxW
+    H = img_rgb.shape[0]
+    W = img_rgb.shape[1]
+    rgb_c = F.interpolate(
+        t_img, size=(coarse_size, coarse_size), mode="bilinear", align_corners=False
+    )[0]
+    y_t = 0.299 * t_img[:, 0:1] + 0.587 * t_img[:, 1:2] + 0.114 * t_img[:, 2:3]
+    if minmax_enable:
+        pad = _pad_for_minmax(minmax_kernel)
+        y_p = F.pad(y_t, pad, mode="replicate")
+        y_max_full = F.max_pool2d(y_p, kernel_size=minmax_kernel, stride=1)
+        y_min_full = -F.max_pool2d(-y_p, kernel_size=minmax_kernel, stride=1)
+    else:
+        y_min_full = y_t
+        y_max_full = y_t
+    y_min_c = F.interpolate(
+        y_min_full,
+        size=(coarse_size, coarse_size),
+        mode="bilinear",
+        align_corners=False,
+    )[0]
+    y_max_c = F.interpolate(
+        y_max_full,
+        size=(coarse_size, coarse_size),
+        mode="bilinear",
+        align_corners=False,
+    )[0]
+    zeros_c = torch.zeros(1, coarse_size, coarse_size, device=device)
+    x_t = torch.cat([rgb_c, y_min_c, y_max_c, zeros_c], dim=0).unsqueeze(0)
+    with autocast(device_type=device.type, dtype=amp_dtype, enabled=amp_flag):
+        logits_c, cond_map = model.forward_coarse(x_t)
+    prob = torch.softmax(logits_c, dim=1)[:, 1:2]
+    prob_up = (
+        F.interpolate(prob, size=(H, W), mode="bilinear", align_corners=False)[0, 0]
+        .detach()
+        .cpu()
+        .numpy()
+    )
+    return prob_up, cond_map, t_img, y_min_full, y_max_full
+@torch.no_grad()
+def _tiled_fine_forward(
+    model: WireSegHR,
+    t_img: torch.Tensor,  # 1x3xHxW on device
+    cond_map: torch.Tensor,  # 1x1xhxw
+    y_min_full: torch.Tensor,  # 1x1xHxW
+    y_max_full: torch.Tensor,  # 1x1xHxW
+    patch_size: int,
+    overlap: int,
+    fine_batch: int,
+    device: torch.device,
+    amp_flag: bool,
+    amp_dtype,
+) -> np.ndarray:
+    H = int(t_img.shape[2])
+    W = int(t_img.shape[3])
+    P = patch_size
+    stride = P - overlap
+    assert stride > 0
+    assert H >= P and W >= P
+    prob_sum_t = torch.zeros((H, W), device=device, dtype=torch.float32)
+    weight_t = torch.zeros((H, W), device=device, dtype=torch.float32)
+    hc4, wc4 = cond_map.shape[2], cond_map.shape[3]
+    ys = list(range(0, H - P + 1, stride))
+    if ys[-1] != (H - P):
+        ys.append(H - P)
+    xs = list(range(0, W - P + 1, stride))
+    if xs[-1] != (W - P):
+        xs.append(W - P)
+    coords: List[Tuple[int, int]] = []
+    for y0 in ys:
+        for x0 in xs:
+            coords.append((y0, x0))
+    for i0 in range(0, len(coords), fine_batch):
+        batch_coords = coords[i0 : i0 + fine_batch]
+        xs_list: List[torch.Tensor] = []
+        for y0, x0 in batch_coords:
+            y1, x1 = y0 + P, x0 + P
+            # Map to cond grid
+            y0c = (y0 * hc4) // H
+            y1c = ((y1 * hc4) + H - 1) // H
+            x0c = (x0 * wc4) // W
+            x1c = ((x1 * wc4) + W - 1) // W
+            cond_sub = cond_map[:, :, y0c:y1c, x0c:x1c].float()
+            cond_patch = F.interpolate(
+                cond_sub, size=(P, P), mode="bilinear", align_corners=False
+            ).squeeze(1)  # 1xPxP
+            rgb_t = t_img[0, :, y0:y1, x0:x1]  # 3xPxP
+            ymin_t = y_min_full[0, 0, y0:y1, x0:x1].float().unsqueeze(0)  # 1xPxP
+            ymax_t = y_max_full[0, 0, y0:y1, x0:x1].float().unsqueeze(0)  # 1xPxP
+            x_f = torch.cat([rgb_t, ymin_t, ymax_t, cond_patch], dim=0).unsqueeze(0)
+            xs_list.append(x_f)
+        x_f_batch = torch.cat(xs_list, dim=0)  # Bx6xPxP
+        with autocast(device_type=device.type, dtype=amp_dtype, enabled=amp_flag):
+            logits_f = model.forward_fine(x_f_batch)
+            prob_f = torch.softmax(logits_f, dim=1)[:, 1:2]
+            prob_f_up = F.interpolate(
+                prob_f, size=(P, P), mode="bilinear", align_corners=False
+            )[:, 0, :, :]  # BxPxP
+        for bi, (y0, x0) in enumerate(batch_coords):
+            y1, x1 = y0 + P, x0 + P
+            prob_sum_t[y0:y1, x0:x1] += prob_f_up[bi]
+            weight_t[y0:y1, x0:x1] += 1.0
+    prob_full = (prob_sum_t / weight_t).detach().cpu().numpy()
+    return prob_full
+def _build_model_from_cfg(cfg: dict, device: torch.device) -> WireSegHR:
+    pretrained_flag = bool(cfg.get("pretrained", False))
+    model = WireSegHR(
+        backbone=cfg["backbone"], in_channels=6, pretrained=pretrained_flag
+    )
+    model = model.to(device)
+    return model
+@torch.no_grad()
+def infer_image(
+    model: WireSegHR,
+    img_path: str,
+    cfg: dict,
+    device: torch.device,
+    amp_flag: bool,
+    amp_dtype,
+    out_dir: Optional[str] = None,
+    save_prob: bool = False,
+    prob_thresh: Optional[float] = None,
+) -> Tuple[np.ndarray, np.ndarray]:
+    assert os.path.isfile(img_path), f"Image not found: {img_path}"
+    bgr = cv2.imread(img_path, cv2.IMREAD_COLOR)
+    assert bgr is not None, f"Failed to read {img_path}"
+    rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
+    coarse_size = int(cfg["coarse"]["test_size"])
+    patch_size = int(cfg["inference"]["fine_patch_size"])  # 1024 for inference
+    overlap = int(cfg["fine"]["overlap"])
+    minmax_enable = bool(cfg["minmax"]["enable"])
+    minmax_kernel = int(cfg["minmax"]["kernel"])
+    if prob_thresh is None:
+        prob_thresh = float(cfg["inference"]["prob_threshold"])
+    prob_c, cond_map, t_img, y_min_full, y_max_full = _coarse_forward(
+        model,
+        rgb,
+        coarse_size,
+        minmax_enable,
+        minmax_kernel,
+        device,
+        amp_flag,
+        amp_dtype,
+    )
+    prob_f = _tiled_fine_forward(
+        model,
+        t_img,
+        cond_map,
+        y_min_full,
+        y_max_full,
+        patch_size,
+        overlap,
+        int(cfg.get("eval", {}).get("fine_batch", 16)),
+        device,
+        amp_flag,
+        amp_dtype,
+    )
+    pred = (prob_f > prob_thresh).astype(np.uint8) * 255
+    if out_dir is not None:
+        os.makedirs(out_dir, exist_ok=True)
+        stem = os.path.splitext(os.path.basename(img_path))[0]
+        out_mask = os.path.join(out_dir, f"{stem}_pred.png")
+        cv2.imwrite(out_mask, pred)
+        if save_prob:
+            out_prob = os.path.join(out_dir, f"{stem}_prob.npy")
+            np.save(out_prob, prob_f.astype(np.float32))
+    return pred, prob_f
 def main():
+    parser = argparse.ArgumentParser(description="WireSegHR inference")
     parser.add_argument(
         "--config", type=str, default="configs/default.yaml", help="Path to YAML config"
     )
     parser.add_argument("--image", type=str, required=False, help="Path to input image")
+    parser.add_argument(
+        "--images_dir",
+        type=str,
+        required=False,
+        help="Directory with .jpg/.jpeg images",
+    )
+    parser.add_argument(
+        "--out", type=str, default="outputs/infer", help="Directory to save predictions"
+    )
+    parser.add_argument(
+        "--ckpt",
+        type=str,
+        default="",
+        help="Optional checkpoint (.pt) with model state",
+    )
+    parser.add_argument(
+        "--save_prob", action="store_true", help="Also save probability .npy"
+    )
     args = parser.parse_args()
     cfg_path = args.config
     print("[WireSegHR][infer] Loaded config from:", cfg_path)
     pprint.pprint(cfg)
+    assert (args.image is not None) ^ (args.images_dir is not None), (
+        "Provide exactly one of --image or --images_dir"
+    )
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    precision = str(cfg["optim"].get("precision", "fp32")).lower()
+    assert precision in ("fp32", "fp16", "bf16")
+    amp_enabled = (device.type == "cuda") and (precision in ("fp16", "bf16"))
+    amp_dtype = (
+        torch.float16
+        if precision == "fp16"
+        else (torch.bfloat16 if precision == "bf16" else None)
+    )
+    model = _build_model_from_cfg(cfg, device)
+    ckpt_path = args.ckpt if args.ckpt else cfg.get("resume", "")
+    if ckpt_path:
+        assert os.path.isfile(ckpt_path), f"Checkpoint not found: {ckpt_path}"
+        print(f"[WireSegHR][infer] Loading checkpoint: {ckpt_path}")
+        state = torch.load(ckpt_path, map_location=device)
+        model.load_state_dict(state["model"])
+    model.eval()
+    if args.image is not None:
+        infer_image(
+            model,
+            args.image,
+            cfg,
+            device,
+            amp_enabled,
+            amp_dtype,
+            out_dir=args.out,
+            save_prob=args.save_prob,
+        )
+        print("[WireSegHR][infer] Done.")
+        return
+    # Directory mode
+    img_dir = args.images_dir
+    assert os.path.isdir(img_dir), f"Not a directory: {img_dir}"
+    img_files = sorted(
+        [p for p in os.listdir(img_dir) if p.lower().endswith((".jpg", ".jpeg"))]
     )
+    assert len(img_files) > 0, f"No .jpg/.jpeg in {img_dir}"
+    os.makedirs(args.out, exist_ok=True)
+    for name in img_files:
+        path = os.path.join(img_dir, name)
+        infer_image(
+            model,
+            path,
+            cfg,
+            device,
+            amp_enabled,
+            amp_dtype,
+            out_dir=args.out,
+            save_prob=args.save_prob,
+        )
+    print("[WireSegHR][infer] Done.")
 if __name__ == "__main__":

scripts/pull_and_preprocess_wireseghr_dataset.py CHANGED Viewed

@@ -80,7 +80,11 @@ def download_folder(folder_id, dest, service_account_json, workers: int):
     for meta in files_with_paths:
         out_path = os.path.join(dest, meta["rel_path"])
         os.makedirs(os.path.dirname(out_path), exist_ok=True)
-        if meta["size"] > 0 and os.path.exists(out_path) and os.path.getsize(out_path) == meta["size"]:
             skipped += 1
             continue
         tasks.append((meta["id"], out_path))
@@ -98,7 +102,9 @@ def download_folder(folder_id, dest, service_account_json, workers: int):
     with ThreadPoolExecutor(max_workers=workers) as ex:
         futures = [ex.submit(_download_one, fid, path) for fid, path in tasks]
-        for _ in tqdm(as_completed(futures), total=len(futures), desc="Downloading", unit="file"):
             pass
@@ -137,7 +143,9 @@ def pull(args=None):
 def _index_numeric_pairs(images_dir: Path, masks_dir: Path):
-    assert images_dir.exists() and images_dir.is_dir(), f"Missing images_dir: {images_dir}"
     assert masks_dir.exists() and masks_dir.is_dir(), f"Missing masks_dir: {masks_dir}"
     img_files = sorted([p for p in images_dir.glob("*.jpg") if p.is_file()])
     img_files += sorted([p for p in images_dir.glob("*.jpeg") if p.is_file()])
@@ -247,7 +255,9 @@ if __name__ == "__main__":
     subs = top.add_subparsers(dest="cmd", required=True)
     sp_pull = subs.add_parser("pull", help="Download dataset from Google Drive")
-    sp_pull.add_argument("--folder-id", dest="folder_id", default="1fgy3wn_yuHEeMNbfiHNVl1-jEdYOfu6p")
     sp_pull.add_argument("--output-dir", dest="output_dir", default="dataset/")
     sp_pull.add_argument("--service-account", default="secrets/drive-json.json")
     sp_pull.add_argument("--workers", type=int, default=8)
@@ -265,26 +275,30 @@ if __name__ == "__main__":
     ns = top.parse_args()
     if ns.cmd == "pull":
-        pull([
-            "--folder-id",
-            ns.folder_id,
-            "--output-dir",
-            ns.output_dir,
-            "--service-account",
-            ns.service_account,
-            "--workers",
-            str(ns.workers),
-        ])
     elif ns.cmd == "split_test_train_val":
-        split_test_train_val([
-            "--images-dir",
-            ns.images_dir,
-            "--masks-dir",
-            ns.masks_dir,
-            "--out-dir",
-            ns.out_dir,
-            "--seed",
-            str(ns.seed),
-            "--link-method",
-            ns.link_method,
-        ])

     for meta in files_with_paths:
         out_path = os.path.join(dest, meta["rel_path"])
         os.makedirs(os.path.dirname(out_path), exist_ok=True)
+        if (
+            meta["size"] > 0
+            and os.path.exists(out_path)
+            and os.path.getsize(out_path) == meta["size"]
+        ):
             skipped += 1
             continue
         tasks.append((meta["id"], out_path))
     with ThreadPoolExecutor(max_workers=workers) as ex:
         futures = [ex.submit(_download_one, fid, path) for fid, path in tasks]
+        for _ in tqdm(
+            as_completed(futures), total=len(futures), desc="Downloading", unit="file"
+        ):
             pass
 def _index_numeric_pairs(images_dir: Path, masks_dir: Path):
+    assert images_dir.exists() and images_dir.is_dir(), (
+        f"Missing images_dir: {images_dir}"
+    )
     assert masks_dir.exists() and masks_dir.is_dir(), f"Missing masks_dir: {masks_dir}"
     img_files = sorted([p for p in images_dir.glob("*.jpg") if p.is_file()])
     img_files += sorted([p for p in images_dir.glob("*.jpeg") if p.is_file()])
     subs = top.add_subparsers(dest="cmd", required=True)
     sp_pull = subs.add_parser("pull", help="Download dataset from Google Drive")
+    sp_pull.add_argument(
+        "--folder-id", dest="folder_id", default="1fgy3wn_yuHEeMNbfiHNVl1-jEdYOfu6p"
+    )
     sp_pull.add_argument("--output-dir", dest="output_dir", default="dataset/")
     sp_pull.add_argument("--service-account", default="secrets/drive-json.json")
     sp_pull.add_argument("--workers", type=int, default=8)
     ns = top.parse_args()
     if ns.cmd == "pull":
+        pull(
+            [
+                "--folder-id",
+                ns.folder_id,
+                "--output-dir",
+                ns.output_dir,
+                "--service-account",
+                ns.service_account,
+                "--workers",
+                str(ns.workers),
+            ]
+        )
     elif ns.cmd == "split_test_train_val":
+        split_test_train_val(
+            [
+                "--images-dir",
+                ns.images_dir,
+                "--masks-dir",
+                ns.masks_dir,
+                "--out-dir",
+                ns.out_dir,
+                "--seed",
+                str(ns.seed),
+                "--link-method",
+                ns.link_method,
+            ]
+        )

scripts/setup_script.sh CHANGED Viewed

@@ -5,9 +5,9 @@ set -euo pipefail
 # 0) Setup env (includes gdown used by scripts/pull_ttpla.sh)
 pip install uv
-uv venv || true
-source .venv/bin/activate
-pip install uv
 uv pip install -r requirements.txt
 uv pip install gdown

 # 0) Setup env (includes gdown used by scripts/pull_ttpla.sh)
 pip install uv
+# uv venv || true # note: don't create new venv since one exists in vast.ai pytorch image.
+# source .venv/bin/activate
+# pip install uv
 uv pip install -r requirements.txt
 uv pip install gdown

src/wireseghr/data/ttpla_to_masks.py CHANGED Viewed

@@ -9,7 +9,9 @@ from PIL import Image, ImageDraw
 import numpy as np
-def _rasterize_cable_mask(shapes: List[dict], height: int, width: int, label: str) -> np.ndarray:
     """Rasterize polygons with given label into a binary mask of shape (H, W), values {0,255}.
     Expects LabelMe-style annotations with shape entries containing keys:
@@ -33,7 +35,7 @@ def _rasterize_cable_mask(shapes: List[dict], height: int, width: int, label: st
         pts[:, 0] = np.clip(pts[:, 0], 0, width - 1)
         pts[:, 1] = np.clip(pts[:, 1], 0, height - 1)
         # PIL expects list of (x, y) tuples
-        pts_list = [ (int(p[0]), int(p[1])) for p in pts ]
         draw.polygon(pts_list, outline=255, fill=255)
     mask = np.asarray(mask_img, dtype=np.uint8)
@@ -46,12 +48,14 @@ def _convert_one(json_path: Path, out_dir: Path, label: str) -> Path | None:
     shapes = data["shapes"]
     H = int(data["imageHeight"])  # required by given JSON
-    W = int(data["imageWidth"])   # required by given JSON
     image_path = Path(data["imagePath"])  # e.g. "1_00186.jpg"
     # WireSegDataset expects numeric filename stems. Derive a numeric-only stem.
     stem_raw = image_path.stem
     out_stem = "".join([c for c in stem_raw if c.isdigit()])
-    assert out_stem.isdigit() and len(out_stem) > 0, f"Non-numeric stem derived from {stem_raw}"
     mask = _rasterize_cable_mask(shapes, H, W, label)
@@ -62,7 +66,12 @@ def _convert_one(json_path: Path, out_dir: Path, label: str) -> Path | None:
     return out_path
-def convert_ttpla_jsons_to_masks(input_path: str | Path, output_dir: str | Path, label: str = "cable", recursive: bool = True) -> List[Path]:
     """Convert TTPLA LabelMe JSON annotations into binary masks matching WireSegHR conventions.
     - input_path: directory containing JSONs (or a single .json file)
@@ -76,11 +85,15 @@ def convert_ttpla_jsons_to_masks(input_path: str | Path, output_dir: str | Path,
     output_p = Path(output_dir)
     if input_p.is_file():
-        assert input_p.suffix.lower() == ".json", f"Expected a .json file, got: {input_p}"
         out = _convert_one(input_p, output_p, label)
         return [out] if out else []
-    assert input_p.is_dir(), f"Input path must be a directory or a .json file: {input_p}"
     json_iter: Iterable[Path]
     if recursive:
@@ -97,11 +110,23 @@ def convert_ttpla_jsons_to_masks(input_path: str | Path, output_dir: str | Path,
 def main(argv: List[str] | None = None) -> None:
-    parser = argparse.ArgumentParser(description="Convert TTPLA LabelMe JSONs to WireSegHR-style binary masks")
-    parser.add_argument("--input", required=True, help="Path to a directory of JSONs or a single JSON file")
-    parser.add_argument("--output", required=True, help="Output directory for PNG masks")
-    parser.add_argument("--label", default="cable", help="Label to rasterize (default: cable)")
-    parser.add_argument("--no-recursive", action="store_true", help="Do not search subdirectories")
     args = parser.parse_args(argv)
     convert_ttpla_jsons_to_masks(

 import numpy as np
+def _rasterize_cable_mask(
+    shapes: List[dict], height: int, width: int, label: str
+) -> np.ndarray:
     """Rasterize polygons with given label into a binary mask of shape (H, W), values {0,255}.
     Expects LabelMe-style annotations with shape entries containing keys:
         pts[:, 0] = np.clip(pts[:, 0], 0, width - 1)
         pts[:, 1] = np.clip(pts[:, 1], 0, height - 1)
         # PIL expects list of (x, y) tuples
+        pts_list = [(int(p[0]), int(p[1])) for p in pts]
         draw.polygon(pts_list, outline=255, fill=255)
     mask = np.asarray(mask_img, dtype=np.uint8)
     shapes = data["shapes"]
     H = int(data["imageHeight"])  # required by given JSON
+    W = int(data["imageWidth"])  # required by given JSON
     image_path = Path(data["imagePath"])  # e.g. "1_00186.jpg"
     # WireSegDataset expects numeric filename stems. Derive a numeric-only stem.
     stem_raw = image_path.stem
     out_stem = "".join([c for c in stem_raw if c.isdigit()])
+    assert out_stem.isdigit() and len(out_stem) > 0, (
+        f"Non-numeric stem derived from {stem_raw}"
+    )
     mask = _rasterize_cable_mask(shapes, H, W, label)
     return out_path
+def convert_ttpla_jsons_to_masks(
+    input_path: str | Path,
+    output_dir: str | Path,
+    label: str = "cable",
+    recursive: bool = True,
+) -> List[Path]:
     """Convert TTPLA LabelMe JSON annotations into binary masks matching WireSegHR conventions.
     - input_path: directory containing JSONs (or a single .json file)
     output_p = Path(output_dir)
     if input_p.is_file():
+        assert input_p.suffix.lower() == ".json", (
+            f"Expected a .json file, got: {input_p}"
+        )
         out = _convert_one(input_p, output_p, label)
         return [out] if out else []
+    assert input_p.is_dir(), (
+        f"Input path must be a directory or a .json file: {input_p}"
+    )
     json_iter: Iterable[Path]
     if recursive:
 def main(argv: List[str] | None = None) -> None:
+    parser = argparse.ArgumentParser(
+        description="Convert TTPLA LabelMe JSONs to WireSegHR-style binary masks"
+    )
+    parser.add_argument(
+        "--input",
+        required=True,
+        help="Path to a directory of JSONs or a single JSON file",
+    )
+    parser.add_argument(
+        "--output", required=True, help="Output directory for PNG masks"
+    )
+    parser.add_argument(
+        "--label", default="cable", help="Label to rasterize (default: cable)"
+    )
+    parser.add_argument(
+        "--no-recursive", action="store_true", help="Do not search subdirectories"
+    )
     args = parser.parse_args(argv)
     convert_ttpla_jsons_to_masks(

train.py CHANGED Viewed

@@ -23,6 +23,7 @@ from src.wireseghr.data.dataset import WireSegDataset
 from src.wireseghr.model.label_downsample import downsample_label_maxpool
 from src.wireseghr.data.sampler import BalancedPatchSampler
 from src.wireseghr.metrics import compute_metrics
 class SizeBatchSampler:
@@ -40,7 +41,7 @@ class SizeBatchSampler:
         self._len = 0
         for hw, idxs in bins.items():
             _ = hw  # unused, clarity
-            self._len += (len(idxs) // self.batch_size)
     def __len__(self) -> int:
         return self._len
@@ -54,7 +55,9 @@ class SizeBatchSampler:
             pool = list(bins[hw])
             random.shuffle(pool)
             # Yield only full batches to keep fixed batch size and same-size assumption
-            for i in range(0, len(pool) - (len(pool) % self.batch_size), self.batch_size):
                 yield pool[i : i + self.batch_size]
@@ -87,8 +90,10 @@ def main():
     # Config
     coarse_train = int(cfg["coarse"]["train_size"])  # 512
-    patch_size = int(cfg["fine"]["patch_size"])  # 768
     overlap = int(cfg["fine"]["overlap"])  # e.g., 128
     eval_cfg = cfg.get("eval", {})
     eval_fine_batch = int(eval_cfg.get("fine_batch", 16))
     assert eval_fine_batch >= 1
@@ -107,15 +112,17 @@ def main():
     if amp_enabled:
         cc_major, cc_minor = torch.cuda.get_device_capability()
         if precision == "fp16":
-            assert (
-                cc_major >= 7
-            ), f"fp16 requires Volta (SM 7.0)+; current SM {cc_major}.{cc_minor}"
         elif precision == "bf16":
-            assert (
-                cc_major >= 8
-            ), f"bf16 requires Ampere (SM 8.0)+; current SM {cc_major}.{cc_minor}"
     amp_dtype = (
-        torch.float16 if precision == "fp16" else (torch.bfloat16 if precision == "bf16" else None)
     )
     # Housekeeping
@@ -135,7 +142,9 @@ def main():
     num_workers = int(loader_cfg.get("num_workers", 4))
     prefetch_factor = int(loader_cfg.get("prefetch_factor", 2))
     pin_memory = bool(loader_cfg.get("pin_memory", True))
-    persistent_workers = bool(loader_cfg.get("persistent_workers", True)) if num_workers > 0 else False
     batch_sampler = SizeBatchSampler(dset, batch_size)
     loader_kwargs = dict(
         batch_sampler=batch_sampler,
@@ -252,24 +261,34 @@ def main():
         # Eval & Checkpoint
         if (step % eval_interval == 0) and (dset_val is not None):
             # Free training-step tensors before eval to lower peak memory
-            del x_fine, logits_coarse, cond_map, logits_fine, y_coarse, y_fine, loss_coarse, loss_fine, loss
             torch.cuda.empty_cache()
             model.eval()
             print(
-                f"[WireSegHR][train] Eval starting... val_size={len(dset_val)} max={eval_max_samples} patch={patch_size} overlap={overlap} stride={patch_size - overlap} fine_batch={eval_fine_batch}",
                 flush=True,
             )
             val_stats = validate(
                 model,
                 dset_val,
-                coarse_train,
                 device,
                 amp_enabled,
                 amp_dtype,
                 prob_thresh,
                 mm_enable,
                 mm_kernel,
-                patch_size,
                 overlap,
                 eval_fine_batch,
                 eval_max_samples,
@@ -306,7 +325,7 @@ def main():
                 save_test_visuals(
                     model,
                     dset_test,
-                    coarse_train,
                     device,
                     os.path.join(out_dir, f"test_vis_{step}"),
                     amp_enabled,
@@ -604,52 +623,16 @@ def validate(
         img = item["image"].astype(np.float32) / 255.0  # HxWx3
         mask = item["mask"].astype(np.uint8)
         H, W = mask.shape
-        # Build coarse input (zeros for cond+loc) on GPU
-        t_img = (
-            torch.from_numpy(np.transpose(img, (2, 0, 1)))
-            .unsqueeze(0)
-            .to(device)
-            .float()
-        )
-        rgb_c = F.interpolate(
-            t_img, size=(coarse_size, coarse_size), mode="bilinear", align_corners=False
-        )[0]
-        y_t = 0.299 * t_img[:, 0:1] + 0.587 * t_img[:, 1:2] + 0.114 * t_img[:, 2:3]
-        if minmax_enable:
-            # Asymmetric padding for even kernel to keep same HxW
-            k = int(minmax_kernel)
-            if (k % 2) == 0:
-                pad = (k // 2 - 1, k // 2, k // 2 - 1, k // 2)
-            else:
-                pad = (k // 2, k // 2, k // 2, k // 2)
-            y_p = F.pad(y_t, pad, mode="replicate")
-            y_max_full = F.max_pool2d(y_p, kernel_size=k, stride=1)
-            y_min_full = -F.max_pool2d(-y_p, kernel_size=k, stride=1)
-        else:
-            y_min_full = y_t
-            y_max_full = y_t
-        y_min_c = F.interpolate(
-            y_min_full,
-            size=(coarse_size, coarse_size),
-            mode="bilinear",
-            align_corners=False,
-        )[0]
-        y_max_c = F.interpolate(
-            y_max_full,
-            size=(coarse_size, coarse_size),
-            mode="bilinear",
-            align_corners=False,
-        )[0]
-        zeros_c = torch.zeros(1, coarse_size, coarse_size, device=device)
-        x_t = torch.cat([rgb_c, y_min_c, y_max_c, zeros_c], dim=0).unsqueeze(0)
-        with autocast(device_type=device.type, dtype=amp_dtype, enabled=amp_flag):
-            logits_c, cond_map = model.forward_coarse(x_t)
-        prob = torch.softmax(logits_c, dim=1)[:, 1:2]
-        prob_up = (
-            F.interpolate(prob, size=(H, W), mode="bilinear", align_corners=False)[0, 0]
-            .detach()
-            .cpu()
-            .numpy()
         )
         # Coarse metrics
         pred_coarse = (prob_up > prob_thresh).astype(np.uint8)
@@ -657,75 +640,30 @@ def validate(
         for k in coarse_sum:
             coarse_sum[k] += m_c[k]
-        # Fine-stage tiled inference and stitching (BATCHED)
-        P = fine_patch_size
-        stride = P - fine_overlap
-        assert stride > 0
-        assert H >= P and W >= P
-        # Accumulate on device to avoid CPU<->GPU thrash
-        prob_sum_t = torch.zeros((H, W), device=device, dtype=torch.float32)
-        weight_t = torch.zeros((H, W), device=device, dtype=torch.float32)
-        # Prepare min/max on full-res (already computed above as y_min_full/y_max_full)
-        hc4, wc4 = cond_map.shape[2], cond_map.shape[3]
-        ys = list(range(0, max(H - P, 0) + 1, stride))
         if ys[-1] != (H - P):
             ys.append(H - P)
-        xs = list(range(0, max(W - P, 0) + 1, stride))
         if xs[-1] != (W - P):
             xs.append(W - P)
-        coords: List[Tuple[int, int]] = []
-        for y0 in ys:
-            for x0 in xs:
-                coords.append((y0, x0))
-        total_tiles += len(coords)
-        total_batches = (len(coords) + fine_batch - 1) // fine_batch
-        for i0 in range(0, len(coords), fine_batch):
-            batch_coords = coords[i0 : i0 + fine_batch]
-            xs_list: List[torch.Tensor] = []
-            batch_idx = i0 // fine_batch
-            if total_batches > 0 and (batch_idx % max(1, total_batches // 10) == 0):
-                print(
-                    f"[Eval] Img {i+1}/{target_n} | Tile batch {batch_idx+1}/{total_batches}",
-                    flush=True,
-                )
-            for (y0, x0) in batch_coords:
-                y1, x1 = y0 + P, x0 + P
-                # Cond crop mapping (same as training _build_fine_inputs)
-                y0c = (y0 * hc4) // H
-                y1c = ((y1 * hc4) + H - 1) // H
-                x0c = (x0 * wc4) // W
-                x1c = ((x1 * wc4) + W - 1) // W
-                cond_sub = cond_map[:, :, y0c:y1c, x0c:x1c].float()
-                cond_patch = F.interpolate(
-                    cond_sub, size=(P, P), mode="bilinear", align_corners=False
-                ).squeeze(1)  # 1xPxP
-                # Build fine input channels directly from on-device tensors
-                rgb_t = t_img[0, :, y0:y1, x0:x1]  # 3xPxP
-                ymin_t = y_min_full[0, 0, y0:y1, x0:x1].float().unsqueeze(0)  # 1xPxP
-                ymax_t = y_max_full[0, 0, y0:y1, x0:x1].float().unsqueeze(0)  # 1xPxP
-                x_f = torch.cat([rgb_t, ymin_t, ymax_t, cond_patch], dim=0).unsqueeze(0)
-                xs_list.append(x_f)
-            x_f_batch = torch.cat(xs_list, dim=0)  # Bx6xPxP
-            with autocast(device_type=device.type, dtype=amp_dtype, enabled=amp_flag):
-                logits_f = model.forward_fine(x_f_batch)
-                prob_f = torch.softmax(logits_f, dim=1)[:, 1:2]
-                prob_f_up = F.interpolate(
-                    prob_f, size=(P, P), mode="bilinear", align_corners=False
-                )[:, 0, :, :]  # BxPxP
-            for bi, (y0, x0) in enumerate(batch_coords):
-                y1, x1 = y0 + P, x0 + P
-                prob_sum_t[y0:y1, x0:x1] += prob_f_up[bi]
-                weight_t[y0:y1, x0:x1] += 1.0
-        prob_full = (prob_sum_t / weight_t).detach().cpu().numpy()
         pred_fine = (prob_full > prob_thresh).astype(np.uint8)
         m_f = compute_metrics(pred_fine, mask)
         for k in metrics_sum:
@@ -773,50 +711,15 @@ def save_test_visuals(
         item = dset_test[i]
         img = item["image"].astype(np.float32) / 255.0
         H, W = img.shape[:2]
-        t_img = (
-            torch.from_numpy(np.transpose(img, (2, 0, 1)))
-            .unsqueeze(0)
-            .to(device)
-            .float()
-        )
-        rgb_c = F.interpolate(
-            t_img, size=(coarse_size, coarse_size), mode="bilinear", align_corners=False
-        )[0]
-        y_t = 0.299 * t_img[:, 0:1] + 0.587 * t_img[:, 1:2] + 0.114 * t_img[:, 2:3]
-        if minmax_enable:
-            k = int(minmax_kernel)
-            if (k % 2) == 0:
-                pad = (k // 2 - 1, k // 2, k // 2 - 1, k // 2)
-            else:
-                pad = (k // 2, k // 2, k // 2, k // 2)
-            y_p = F.pad(y_t, pad, mode="replicate")
-            y_max_full = F.max_pool2d(y_p, kernel_size=k, stride=1)
-            y_min_full = -F.max_pool2d(-y_p, kernel_size=k, stride=1)
-        else:
-            y_min_full = y_t
-            y_max_full = y_t
-        y_min_c = F.interpolate(
-            y_min_full,
-            size=(coarse_size, coarse_size),
-            mode="bilinear",
-            align_corners=False,
-        )[0]
-        y_max_c = F.interpolate(
-            y_max_full,
-            size=(coarse_size, coarse_size),
-            mode="bilinear",
-            align_corners=False,
-        )[0]
-        zeros_c = torch.zeros(1, coarse_size, coarse_size, device=device)
-        x_t = torch.cat([rgb_c, y_min_c, y_max_c, zeros_c], dim=0).unsqueeze(0)
-        with autocast(device_type=device.type, dtype=None, enabled=amp_flag):
-            logits_c, _ = model.forward_coarse(x_t)
-        prob = torch.softmax(logits_c, dim=1)[:, 1:2]
-        prob_up = (
-            F.interpolate(prob, size=(H, W), mode="bilinear", align_corners=False)[0, 0]
-            .detach()
-            .cpu()
-            .numpy()
         )
         pred = (prob_up > prob_thresh).astype(np.uint8) * 255
         # Save input and prediction

 from src.wireseghr.model.label_downsample import downsample_label_maxpool
 from src.wireseghr.data.sampler import BalancedPatchSampler
 from src.wireseghr.metrics import compute_metrics
+from infer import _coarse_forward, _tiled_fine_forward
 class SizeBatchSampler:
         self._len = 0
         for hw, idxs in bins.items():
             _ = hw  # unused, clarity
+            self._len += len(idxs) // self.batch_size
     def __len__(self) -> int:
         return self._len
             pool = list(bins[hw])
             random.shuffle(pool)
             # Yield only full batches to keep fixed batch size and same-size assumption
+            for i in range(
+                0, len(pool) - (len(pool) % self.batch_size), self.batch_size
+            ):
                 yield pool[i : i + self.batch_size]
     # Config
     coarse_train = int(cfg["coarse"]["train_size"])  # 512
+    coarse_test = int(cfg["coarse"]["test_size"])  # use higher res for eval/infer
+    patch_size = int(cfg["fine"]["patch_size"])  # training fine patch size
     overlap = int(cfg["fine"]["overlap"])  # e.g., 128
+    eval_patch_size = int(cfg["inference"]["fine_patch_size"])  # 1024 for eval/infer
     eval_cfg = cfg.get("eval", {})
     eval_fine_batch = int(eval_cfg.get("fine_batch", 16))
     assert eval_fine_batch >= 1
     if amp_enabled:
         cc_major, cc_minor = torch.cuda.get_device_capability()
         if precision == "fp16":
+            assert cc_major >= 7, (
+                f"fp16 requires Volta (SM 7.0)+; current SM {cc_major}.{cc_minor}"
+            )
         elif precision == "bf16":
+            assert cc_major >= 8, (
+                f"bf16 requires Ampere (SM 8.0)+; current SM {cc_major}.{cc_minor}"
+            )
     amp_dtype = (
+        torch.float16
+        if precision == "fp16"
+        else (torch.bfloat16 if precision == "bf16" else None)
     )
     # Housekeeping
     num_workers = int(loader_cfg.get("num_workers", 4))
     prefetch_factor = int(loader_cfg.get("prefetch_factor", 2))
     pin_memory = bool(loader_cfg.get("pin_memory", True))
+    persistent_workers = (
+        bool(loader_cfg.get("persistent_workers", True)) if num_workers > 0 else False
+    )
     batch_sampler = SizeBatchSampler(dset, batch_size)
     loader_kwargs = dict(
         batch_sampler=batch_sampler,
         # Eval & Checkpoint
         if (step % eval_interval == 0) and (dset_val is not None):
             # Free training-step tensors before eval to lower peak memory
+            del (
+                x_fine,
+                logits_coarse,
+                cond_map,
+                logits_fine,
+                y_coarse,
+                y_fine,
+                loss_coarse,
+                loss_fine,
+                loss,
+            )
             torch.cuda.empty_cache()
             model.eval()
             print(
+                f"[WireSegHR][train] Eval starting... val_size={len(dset_val)} max={eval_max_samples} patch={eval_patch_size} overlap={overlap} stride={eval_patch_size - overlap} fine_batch={eval_fine_batch}",
                 flush=True,
             )
             val_stats = validate(
                 model,
                 dset_val,
+                coarse_test,
                 device,
                 amp_enabled,
                 amp_dtype,
                 prob_thresh,
                 mm_enable,
                 mm_kernel,
+                eval_patch_size,
                 overlap,
                 eval_fine_batch,
                 eval_max_samples,
                 save_test_visuals(
                     model,
                     dset_test,
+                    coarse_test,
                     device,
                     os.path.join(out_dir, f"test_vis_{step}"),
                     amp_enabled,
         img = item["image"].astype(np.float32) / 255.0  # HxWx3
         mask = item["mask"].astype(np.uint8)
         H, W = mask.shape
+        # Reuse inference coarse pass
+        prob_up, cond_map, t_img, y_min_full, y_max_full = _coarse_forward(
+            model,
+            img,
+            coarse_size,
+            minmax_enable,
+            int(minmax_kernel),
+            device,
+            amp_flag,
+            amp_dtype,
         )
         # Coarse metrics
         pred_coarse = (prob_up > prob_thresh).astype(np.uint8)
         for k in coarse_sum:
             coarse_sum[k] += m_c[k]
+        # Fine-stage via helper (batched and stitched)
+        prob_full = _tiled_fine_forward(
+            model,
+            t_img,
+            cond_map,
+            y_min_full,
+            y_max_full,
+            int(fine_patch_size),
+            int(fine_overlap),
+            int(fine_batch),
+            device,
+            amp_flag,
+            amp_dtype,
+        )
+        # Track tiles for throughput parity
+        P = int(fine_patch_size)
+        stride = P - int(fine_overlap)
+        ys = list(range(0, H - P + 1, stride))
         if ys[-1] != (H - P):
             ys.append(H - P)
+        xs = list(range(0, W - P + 1, stride))
         if xs[-1] != (W - P):
             xs.append(W - P)
+        total_tiles += len(ys) * len(xs)
         pred_fine = (prob_full > prob_thresh).astype(np.uint8)
         m_f = compute_metrics(pred_fine, mask)
         for k in metrics_sum:
         item = dset_test[i]
         img = item["image"].astype(np.float32) / 255.0
         H, W = img.shape[:2]
+        prob_up, _cond_map, _t_img, _ymin, _ymax = _coarse_forward(
+            model,
+            img,
+            int(coarse_size),
+            bool(minmax_enable),
+            int(minmax_kernel),
+            device,
+            bool(amp_flag),
+            None,
         )
         pred = (prob_up > prob_thresh).astype(np.uint8) * 255
         # Save input and prediction