Spaces:
Running
Running
Commit
·
bc00fab
1
Parent(s):
fdebd07
Fix incorrect camera K matrix in desktop app
Browse filesThe bundled SDK v1.3.1 scales camera intrinsics incorrectly (cx=213 instead of 640),
causing large tracking errors. Added automatic detection and correction.
TODO: code refactor to remove some added complexity/tech debt and remove verbose logging from bug fixing in this latest commit
WIP head bug fixing - logging
- reachy_phone_home/main.py +210 -1
reachy_phone_home/main.py
CHANGED
|
@@ -10,6 +10,7 @@ from pathlib import Path
|
|
| 10 |
from typing import Callable, Optional
|
| 11 |
|
| 12 |
import cv2
|
|
|
|
| 13 |
from scipy.spatial.transform import Rotation as R
|
| 14 |
from ultralytics import YOLO
|
| 15 |
|
|
@@ -48,6 +49,11 @@ class PhoneFollower:
|
|
| 48 |
x1, y1, x2, y2 = _box_xyxy(box)
|
| 49 |
cx = int((x1 + x2) / 2)
|
| 50 |
cy = int(y1 + (y2 - y1) * y_ratio)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
self.update_xy(reachy, cx, cy)
|
| 52 |
|
| 53 |
def update_xy(self, reachy: ReachyMini, cx: int, cy: int) -> None:
|
|
@@ -57,7 +63,55 @@ class PhoneFollower:
|
|
| 57 |
and abs(cy - self.last_y) < self.move_threshold_px
|
| 58 |
):
|
| 59 |
return
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
self.last_x = cx
|
| 62 |
self.last_y = cy
|
| 63 |
|
|
@@ -195,6 +249,112 @@ class TrackerConfig:
|
|
| 195 |
phone_home_pitch_deg: float = 25.0
|
| 196 |
|
| 197 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
def run_tracker(
|
| 199 |
reachy: ReachyMini,
|
| 200 |
config: TrackerConfig,
|
|
@@ -208,6 +368,21 @@ def run_tracker(
|
|
| 208 |
logger.setLevel(logging.INFO)
|
| 209 |
WEB_UI.set_download_callback(lambda w: _resolve_weights(w, logger))
|
| 210 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
weights = _resolve_weights(config.weights, logger)
|
| 212 |
model = YOLO(weights)
|
| 213 |
|
|
@@ -254,6 +429,7 @@ def run_tracker(
|
|
| 254 |
last_curious = 0.0
|
| 255 |
heartbeat_count = 0
|
| 256 |
last_head_log = 0.0
|
|
|
|
| 257 |
mode = "tracking_phone"
|
| 258 |
mode_start = time.time()
|
| 259 |
quips_enabled = config.quips_enabled
|
|
@@ -364,6 +540,39 @@ def run_tracker(
|
|
| 364 |
|
| 365 |
if mode == "looking_down":
|
| 366 |
if not look_down_active:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 367 |
reachy.goto_target(
|
| 368 |
head=create_head_pose(
|
| 369 |
z=config.look_down_z_mm,
|
|
|
|
| 10 |
from typing import Callable, Optional
|
| 11 |
|
| 12 |
import cv2
|
| 13 |
+
import numpy as np
|
| 14 |
from scipy.spatial.transform import Rotation as R
|
| 15 |
from ultralytics import YOLO
|
| 16 |
|
|
|
|
| 49 |
x1, y1, x2, y2 = _box_xyxy(box)
|
| 50 |
cx = int((x1 + x2) / 2)
|
| 51 |
cy = int(y1 + (y2 - y1) * y_ratio)
|
| 52 |
+
logger = logging.getLogger("yolo26l_phone_use_tracker")
|
| 53 |
+
conf = float(box.conf[0].item()) if hasattr(box, "conf") else None
|
| 54 |
+
logger.info("[phone_box] xyxy=(%d,%d,%d,%d) center=(%d,%d) conf=%.3f",
|
| 55 |
+
x1, y1, x2, y2, cx, cy, conf if conf is not None else 0.0)
|
| 56 |
+
WEB_UI.append_log(f"[phone_box] xyxy=({x1},{y1},{x2},{y2}) center=({cx},{cy}) conf={conf}")
|
| 57 |
self.update_xy(reachy, cx, cy)
|
| 58 |
|
| 59 |
def update_xy(self, reachy: ReachyMini, cx: int, cy: int) -> None:
|
|
|
|
| 63 |
and abs(cy - self.last_y) < self.move_threshold_px
|
| 64 |
):
|
| 65 |
return
|
| 66 |
+
logger = logging.getLogger("yolo26l_phone_use_tracker")
|
| 67 |
+
try:
|
| 68 |
+
cam = reachy.media_manager.camera
|
| 69 |
+
res = cam.resolution if cam else "N/A"
|
| 70 |
+
active_K = cam.resized_K if cam else "N/A"
|
| 71 |
+
logger.info("[look_at] pixel=(%d, %d) resolution=%s", cx, cy, res)
|
| 72 |
+
logger.info("[look_at] active K:\n%s", active_K)
|
| 73 |
+
WEB_UI.append_log(f"[look_at] pixel=({cx}, {cy}) res={res}")
|
| 74 |
+
except Exception:
|
| 75 |
+
logger.info("[look_at] pixel=(%d, %d) camera info unavailable", cx, cy)
|
| 76 |
+
try:
|
| 77 |
+
head_before = reachy.get_current_head_pose()
|
| 78 |
+
logger.info("[look_at] head BEFORE:\n%s", head_before)
|
| 79 |
+
except Exception:
|
| 80 |
+
head_before = None
|
| 81 |
+
# --- Reproduce look_at_image internals for diagnostics ---
|
| 82 |
+
try:
|
| 83 |
+
cam = reachy.media_manager.camera
|
| 84 |
+
K_active = cam.resized_K
|
| 85 |
+
D_active = cam.D
|
| 86 |
+
points = np.array([[[cx, cy]]], dtype=np.float32)
|
| 87 |
+
x_n, y_n = cv2.undistortPoints(points, K_active, D_active)[0, 0]
|
| 88 |
+
ray_cam = np.array([x_n, y_n, 1.0])
|
| 89 |
+
ray_cam /= np.linalg.norm(ray_cam)
|
| 90 |
+
T_world_head = reachy.get_current_head_pose()
|
| 91 |
+
T_world_cam = T_world_head @ reachy.T_head_cam
|
| 92 |
+
R_wc = T_world_cam[:3, :3]
|
| 93 |
+
t_wc = T_world_cam[:3, 3]
|
| 94 |
+
ray_world = R_wc @ ray_cam
|
| 95 |
+
P_world = t_wc + ray_world
|
| 96 |
+
logger.info("[look_at_detail] undistorted (x_n, y_n) = (%.4f, %.4f)", x_n, y_n)
|
| 97 |
+
logger.info("[look_at_detail] ray_cam = %s", ray_cam)
|
| 98 |
+
logger.info("[look_at_detail] T_world_head:\n%s", T_world_head)
|
| 99 |
+
logger.info("[look_at_detail] T_world_cam :\n%s", T_world_cam)
|
| 100 |
+
logger.info("[look_at_detail] ray_world = %s", ray_world)
|
| 101 |
+
logger.info("[look_at_detail] P_world = %s", P_world)
|
| 102 |
+
WEB_UI.append_log(f"[look_at_detail] undist=({x_n:.4f},{y_n:.4f}) ray_cam={ray_cam} P_world={P_world}")
|
| 103 |
+
except Exception as e:
|
| 104 |
+
logger.warning("[look_at_detail] could not compute: %s", e)
|
| 105 |
+
# --- End internals ---
|
| 106 |
+
result_pose = reachy.look_at_image(cx, cy, duration=self.head_duration, perform_movement=True)
|
| 107 |
+
logger.info("[look_at] target pose returned:\n%s", result_pose)
|
| 108 |
+
WEB_UI.append_log(f"[look_at] target pose: {result_pose}")
|
| 109 |
+
try:
|
| 110 |
+
head_after = reachy.get_current_head_pose()
|
| 111 |
+
logger.info("[look_at] head AFTER:\n%s", head_after)
|
| 112 |
+
WEB_UI.append_log(f"[look_at] head after: {head_after}")
|
| 113 |
+
except Exception:
|
| 114 |
+
pass
|
| 115 |
self.last_x = cx
|
| 116 |
self.last_y = cy
|
| 117 |
|
|
|
|
| 249 |
phone_home_pitch_deg: float = 25.0
|
| 250 |
|
| 251 |
|
| 252 |
+
def _fix_camera_intrinsics(reachy: ReachyMini, logger: logging.Logger) -> None:
|
| 253 |
+
"""Work around an SDK bug where the wireless camera's K matrix is not
|
| 254 |
+
rescaled to match the actual frame resolution.
|
| 255 |
+
|
| 256 |
+
ReachyMiniWirelessCamSpecs inherits its K from ReachyMiniLiteCamSpecs
|
| 257 |
+
(calibrated at 1920x1080) but overrides default_resolution to 1280x720.
|
| 258 |
+
Because CameraBase.set_resolution computes scaling ratios relative to
|
| 259 |
+
default_resolution, the ratio ends up being 1.0 and K is left unscaled.
|
| 260 |
+
This causes look_at_image to compute incorrect rays, producing large
|
| 261 |
+
unexpected head rotations.
|
| 262 |
+
|
| 263 |
+
This function detects the mismatch by comparing the resolution implied by
|
| 264 |
+
the K matrix's principal point (cx ≈ width/2) against the actual frame
|
| 265 |
+
resolution. If they differ by more than 10 %, K is rescaled.
|
| 266 |
+
"""
|
| 267 |
+
cam = getattr(reachy, "media_manager", None)
|
| 268 |
+
if cam is not None:
|
| 269 |
+
cam = getattr(cam, "camera", None)
|
| 270 |
+
if cam is None:
|
| 271 |
+
logger.warning("[camera_diag] No camera found on media_manager")
|
| 272 |
+
return
|
| 273 |
+
|
| 274 |
+
specs = getattr(cam, "camera_specs", None)
|
| 275 |
+
current_K = getattr(cam, "resized_K", None)
|
| 276 |
+
|
| 277 |
+
# --- Diagnostic dump ---
|
| 278 |
+
logger.info("[camera_diag] camera type : %s", type(cam).__name__)
|
| 279 |
+
logger.info("[camera_diag] camera_specs type : %s",
|
| 280 |
+
type(specs).__name__ if specs is not None else None)
|
| 281 |
+
logger.info("[camera_diag] camera_specs name : %s",
|
| 282 |
+
getattr(specs, "name", None))
|
| 283 |
+
try:
|
| 284 |
+
logger.info("[camera_diag] resolution : %s", cam.resolution)
|
| 285 |
+
except RuntimeError:
|
| 286 |
+
logger.info("[camera_diag] resolution : NOT SET")
|
| 287 |
+
logger.info("[camera_diag] resized_K :\n%s", current_K)
|
| 288 |
+
if specs is not None:
|
| 289 |
+
logger.info("[camera_diag] specs.K :\n%s", specs.K)
|
| 290 |
+
logger.info("[camera_diag] specs.default_res : %s", specs.default_resolution)
|
| 291 |
+
WEB_UI.append_log(f"[camera_diag] camera={type(cam).__name__} "
|
| 292 |
+
f"specs={getattr(specs, 'name', None)} "
|
| 293 |
+
f"resized_K={current_K}")
|
| 294 |
+
# --- End diagnostic dump ---
|
| 295 |
+
|
| 296 |
+
if specs is None or current_K is None:
|
| 297 |
+
logger.warning("[camera_diag] specs or resized_K is None, skipping fix")
|
| 298 |
+
return
|
| 299 |
+
|
| 300 |
+
try:
|
| 301 |
+
frame_w, frame_h = cam.resolution # actual capture resolution
|
| 302 |
+
except RuntimeError:
|
| 303 |
+
logger.warning("[camera_diag] resolution not set yet, skipping fix")
|
| 304 |
+
return
|
| 305 |
+
|
| 306 |
+
# Check BOTH the active resized_K and the original specs.K for mismatches.
|
| 307 |
+
# The active K's principal point should be near the centre of the frame.
|
| 308 |
+
# If either is off by more than 10%, we need to fix.
|
| 309 |
+
active_cx = current_K[0, 2]
|
| 310 |
+
active_cy = current_K[1, 2]
|
| 311 |
+
expected_cx = frame_w / 2.0
|
| 312 |
+
expected_cy = frame_h / 2.0
|
| 313 |
+
|
| 314 |
+
active_ok = (
|
| 315 |
+
abs(active_cx - expected_cx) / expected_cx < 0.1
|
| 316 |
+
and abs(active_cy - expected_cy) / expected_cy < 0.1
|
| 317 |
+
)
|
| 318 |
+
|
| 319 |
+
if active_ok:
|
| 320 |
+
logger.info(
|
| 321 |
+
"[camera] K matrix looks correct for %dx%d frames "
|
| 322 |
+
"(cx=%.1f, cy=%.1f vs expected ~%.0f, ~%.0f) — no correction needed",
|
| 323 |
+
frame_w, frame_h, active_cx, active_cy, expected_cx, expected_cy,
|
| 324 |
+
)
|
| 325 |
+
return
|
| 326 |
+
|
| 327 |
+
# The active K is wrong. Recompute from specs.K, which should be
|
| 328 |
+
# calibrated for a resolution whose centre matches (specs.K cx, cy).
|
| 329 |
+
orig_K = specs.K
|
| 330 |
+
orig_cx = orig_K[0, 2]
|
| 331 |
+
orig_cy = orig_K[1, 2]
|
| 332 |
+
cal_w = orig_cx * 2 # approximate calibration width
|
| 333 |
+
cal_h = orig_cy * 2 # approximate calibration height
|
| 334 |
+
|
| 335 |
+
# Rescale from the calibration resolution to the actual frame resolution.
|
| 336 |
+
w_ratio = frame_w / cal_w
|
| 337 |
+
h_ratio = frame_h / cal_h
|
| 338 |
+
|
| 339 |
+
fixed_K = orig_K.copy()
|
| 340 |
+
fixed_K[0, 0] *= w_ratio # fx
|
| 341 |
+
fixed_K[1, 1] *= h_ratio # fy
|
| 342 |
+
fixed_K[0, 2] *= w_ratio # cx
|
| 343 |
+
fixed_K[1, 2] *= h_ratio # cy
|
| 344 |
+
|
| 345 |
+
cam.resized_K = fixed_K
|
| 346 |
+
logger.info(
|
| 347 |
+
"[camera] Fixed K matrix for %dx%d frames "
|
| 348 |
+
"(active cx,cy was %.1f,%.1f — expected ~%.0f,%.0f)",
|
| 349 |
+
frame_w, frame_h, active_cx, active_cy, expected_cx, expected_cy,
|
| 350 |
+
)
|
| 351 |
+
logger.info("[camera] Original specs.K:\n%s", orig_K)
|
| 352 |
+
logger.info("[camera] Broken resized_K:\n%s", current_K)
|
| 353 |
+
logger.info("[camera] Corrected K:\n%s", fixed_K)
|
| 354 |
+
WEB_UI.append_log(f"[camera] Fixed K: cx {active_cx:.1f}→{fixed_K[0,2]:.1f}, "
|
| 355 |
+
f"cy {active_cy:.1f}→{fixed_K[1,2]:.1f}")
|
| 356 |
+
|
| 357 |
+
|
| 358 |
def run_tracker(
|
| 359 |
reachy: ReachyMini,
|
| 360 |
config: TrackerConfig,
|
|
|
|
| 368 |
logger.setLevel(logging.INFO)
|
| 369 |
WEB_UI.set_download_callback(lambda w: _resolve_weights(w, logger))
|
| 370 |
|
| 371 |
+
_fix_camera_intrinsics(reachy, logger)
|
| 372 |
+
|
| 373 |
+
# --- SDK & transform diagnostics ---
|
| 374 |
+
try:
|
| 375 |
+
import reachy_mini as _rm
|
| 376 |
+
sdk_version = getattr(_rm, "__version__", "unknown")
|
| 377 |
+
except Exception:
|
| 378 |
+
sdk_version = "import_error"
|
| 379 |
+
logger.info("[sdk_diag] reachy_mini version : %s", sdk_version)
|
| 380 |
+
logger.info("[sdk_diag] T_head_cam :\n%s", reachy.T_head_cam)
|
| 381 |
+
logger.info("[sdk_diag] python executable : %s", os.path.realpath(os.sys.executable))
|
| 382 |
+
WEB_UI.append_log(f"[sdk_diag] version={sdk_version} python={os.path.realpath(os.sys.executable)}")
|
| 383 |
+
WEB_UI.append_log(f"[sdk_diag] T_head_cam={reachy.T_head_cam}")
|
| 384 |
+
# --- End SDK diagnostics ---
|
| 385 |
+
|
| 386 |
weights = _resolve_weights(config.weights, logger)
|
| 387 |
model = YOLO(weights)
|
| 388 |
|
|
|
|
| 429 |
last_curious = 0.0
|
| 430 |
heartbeat_count = 0
|
| 431 |
last_head_log = 0.0
|
| 432 |
+
first_look_down_logged = False
|
| 433 |
mode = "tracking_phone"
|
| 434 |
mode_start = time.time()
|
| 435 |
quips_enabled = config.quips_enabled
|
|
|
|
| 540 |
|
| 541 |
if mode == "looking_down":
|
| 542 |
if not look_down_active:
|
| 543 |
+
if not first_look_down_logged:
|
| 544 |
+
try:
|
| 545 |
+
current_pose = reachy.get_current_head_pose()
|
| 546 |
+
logger.info("[look_down] current head pose: %s", current_pose)
|
| 547 |
+
WEB_UI.append_log(f"[look_down] current head pose: {current_pose}")
|
| 548 |
+
except Exception:
|
| 549 |
+
logger.info("[look_down] current head pose: unavailable")
|
| 550 |
+
WEB_UI.append_log("[look_down] current head pose: unavailable")
|
| 551 |
+
target_pose = create_head_pose(
|
| 552 |
+
z=config.look_down_z_mm,
|
| 553 |
+
pitch=config.look_down_pitch_deg,
|
| 554 |
+
mm=True,
|
| 555 |
+
degrees=True,
|
| 556 |
+
)
|
| 557 |
+
logger.info("[look_down] target head pose: %s", target_pose)
|
| 558 |
+
WEB_UI.append_log(f"[look_down] target head pose: {target_pose}")
|
| 559 |
+
if not config.no_head:
|
| 560 |
+
try:
|
| 561 |
+
reachy.goto_target(
|
| 562 |
+
head=create_head_pose(),
|
| 563 |
+
duration=min(0.6, config.neutral_duration),
|
| 564 |
+
)
|
| 565 |
+
time.sleep(0.5)
|
| 566 |
+
try:
|
| 567 |
+
post_pose = reachy.get_current_head_pose()
|
| 568 |
+
logger.info("[look_down] post-neutral head pose: %s", post_pose)
|
| 569 |
+
WEB_UI.append_log(f"[look_down] post-neutral head pose: {post_pose}")
|
| 570 |
+
except Exception:
|
| 571 |
+
logger.info("[look_down] post-neutral head pose: unavailable")
|
| 572 |
+
WEB_UI.append_log("[look_down] post-neutral head pose: unavailable")
|
| 573 |
+
except Exception:
|
| 574 |
+
pass
|
| 575 |
+
first_look_down_logged = True
|
| 576 |
reachy.goto_target(
|
| 577 |
head=create_head_pose(
|
| 578 |
z=config.look_down_z_mm,
|