Spaces:
Sleeping
Sleeping
| # logic.py | |
| import os | |
| import json | |
| import cv2 | |
| import math | |
| import numpy as np | |
| from collections import deque | |
| from pathlib import Path | |
| import torch | |
| from torch.utils.data import Dataset | |
| from torchvision import transforms | |
| # --- Constants & Configs --- | |
| WAYPOINT_SCALE_FACTOR = 5.0 | |
| T1_FUTURE_TIME = 1.0 | |
| T2_FUTURE_TIME = 2.0 | |
| TRACKER_FREQUENCY = 10 | |
| MERGE_PERCENT = 0.4 | |
| PIXELS_PER_METER = 8 | |
| MAX_DISTANCE = 32 | |
| IMG_SIZE = MAX_DISTANCE * PIXELS_PER_METER * 2 | |
| EGO_CAR_X = IMG_SIZE // 2 | |
| EGO_CAR_Y = IMG_SIZE - (4.0 * PIXELS_PER_METER) | |
| reweight_array = np.ones((20, 20, 7)) | |
| last_valid_waypoints = None | |
| last_valid_theta = 0.0 | |
| class ControllerConfig: | |
| turn_KP, turn_KI, turn_KD, turn_n = 1.0, 0.1, 0.1, 20 | |
| speed_KP, speed_KI, speed_KD, speed_n = 0.5, 0.05, 0.1, 20 | |
| max_speed, max_throttle, clip_delta = 6.0, 0.75, 0.25 | |
| collision_buffer, detect_threshold = [0.0, 0.0], 0.04 | |
| brake_speed, brake_ratio = 0.4, 1.1 | |
| # --- Data Handling --- | |
| transform = transforms.Compose([ | |
| transforms.Resize((224, 224)), | |
| transforms.ToTensor(), | |
| transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), | |
| ]) | |
| lidar_transform = transforms.Compose([ | |
| transforms.Resize((112, 112)), | |
| transforms.ToTensor(), | |
| transforms.Normalize(mean=[0.5], std=[0.5]), | |
| ]) | |
| class LMDriveDataset(Dataset): | |
| def __init__(self, data_dir, transform=None, lidar_transform=None): | |
| self.data_dir = Path(data_dir) | |
| self.transform = transform | |
| self.lidar_transform = lidar_transform | |
| self.samples = [] | |
| measurement_dir = self.data_dir / "measurements" | |
| image_dir = self.data_dir / "rgb_full" | |
| measurement_files = sorted([f for f in os.listdir(measurement_dir) if f.endswith(".json")]) | |
| image_files = sorted([f for f in os.listdir(image_dir) if f.endswith(".jpg")]) | |
| num_samples = min(len(measurement_files), len(image_files)) | |
| for i in range(num_samples): | |
| frame_id = i | |
| measurement_path = str(measurement_dir / f"{frame_id:04d}.json") | |
| image_name = f"{frame_id:04d}.jpg" | |
| image_path = str(image_dir / image_name) | |
| if not os.path.exists(measurement_path) or not os.path.exists(image_path): | |
| continue | |
| with open(measurement_path, "r") as f: | |
| measurements_data = json.load(f) | |
| self.samples.append({ | |
| "image_path": image_path, | |
| "measurement_path": measurement_path, | |
| "frame_id": frame_id, | |
| "measurements": measurements_data | |
| }) | |
| def __len__(self): | |
| return len(self.samples) | |
| def __getitem__(self, idx): | |
| sample = self.samples[idx] | |
| full_image = cv2.imread(sample["image_path"]) | |
| if full_image is None: | |
| raise ValueError(f"Failed to load image: {sample['image_path']}") | |
| full_image = cv2.cvtColor(full_image, cv2.COLOR_BGR2RGB) | |
| front_image = full_image[:600, :800] | |
| left_image = full_image[600:1200, :800] | |
| right_image = full_image[1200:1800, :800] | |
| center_image = full_image[1800:2400, :800] | |
| front_image_tensor = self.transform(front_image) | |
| left_image_tensor = self.transform(left_image) | |
| right_image_tensor = self.transform(right_image) | |
| center_image_tensor = self.transform(center_image) | |
| lidar_path = str(self.data_dir / "lidar" / f"{sample['frame_id']:04d}.png") | |
| lidar = cv2.imread(lidar_path) | |
| if lidar is None: | |
| lidar = np.zeros((112, 112, 3), dtype=np.uint8) | |
| else: | |
| if len(lidar.shape) == 2: | |
| lidar = cv2.cvtColor(lidar, cv2.COLOR_GRAY2BGR) | |
| lidar = cv2.cvtColor(lidar, cv2.COLOR_BGR2RGB) | |
| lidar_tensor = self.lidar_transform(lidar) | |
| measurements_data = sample["measurements"] | |
| x = measurements_data.get("x", 0.0) | |
| y = measurements_data.get("y", 0.0) | |
| theta = measurements_data.get("theta", 0.0) | |
| speed = measurements_data.get("speed", 0.0) | |
| steer = measurements_data.get("steer", 0.0) | |
| throttle = measurements_data.get("throttle", 0.0) | |
| brake = int(measurements_data.get("brake", False)) | |
| command = measurements_data.get("command", 0) | |
| is_junction = int(measurements_data.get("is_junction", False)) | |
| should_brake = int(measurements_data.get("should_brake", 0)) | |
| x_command = measurements_data.get("x_command", 0.0) | |
| y_command = measurements_data.get("y_command", 0.0) | |
| target_point = torch.tensor([x_command, y_command], dtype=torch.float32) | |
| measurements = torch.tensor( | |
| [x, y, theta, speed, steer, throttle, brake, command, is_junction, should_brake], | |
| dtype=torch.float32 | |
| ) | |
| return { | |
| "rgb": front_image_tensor, "rgb_left": left_image_tensor, "rgb_right": right_image_tensor, | |
| "rgb_center": center_image_tensor, "lidar": lidar_tensor, | |
| "measurements": measurements, "target_point": target_point | |
| } | |
| # --- Object Tracking --- | |
| class TrackedObject: | |
| def __init__(self): | |
| self.last_step = 0 | |
| self.last_pos = [0, 0] | |
| self.historical_pos = deque(maxlen=10) | |
| self.historical_steps = deque(maxlen=10) | |
| self.historical_features = deque(maxlen=10) | |
| def update(self, step, object_info): | |
| self.last_step = step | |
| self.last_pos = object_info[:2] | |
| self.historical_pos.append(self.last_pos) | |
| self.historical_steps.append(step) | |
| if len(object_info) > 2: | |
| self.historical_features.append(object_info[2]) | |
| class Tracker: | |
| def __init__(self, frequency=10): | |
| self.tracks = [] | |
| self.alive_ids = [] | |
| self.frequency = frequency | |
| def update_and_predict(self, det_data, pos, theta, frame_num): | |
| det_data_weighted = det_data * reweight_array | |
| detected_objects = find_peak_box(det_data_weighted) | |
| objects_info = [] | |
| R = np.array([[np.cos(-theta), -np.sin(-theta)], [np.sin(-theta), np.cos(-theta)]]) | |
| for obj in detected_objects: | |
| i, j = obj['coords'] | |
| obj_data = obj['raw_data'] | |
| center_y, center_x = convert_grid_to_xy(i, j) | |
| center_x += obj_data[1] | |
| center_y += obj_data[2] | |
| loc = R.T.dot(np.array([center_x, center_y])) | |
| objects_info.append([loc[0] + pos[0], loc[1] + pos[1], obj_data[1:]]) | |
| updates_ids = self._update(objects_info, frame_num) | |
| speed_results, heading_results = self._predict(updates_ids) | |
| for k, poi in enumerate(updates_ids): | |
| i, j = poi | |
| if heading_results[k] is not None: | |
| factor = MERGE_PERCENT * 0.1 | |
| det_data[i, j, 3] = heading_results[k] * factor + det_data[i, j, 3] * (1 - factor) | |
| if speed_results[k] is not None: | |
| factor = MERGE_PERCENT * 0.1 | |
| det_data[i, j, 6] = speed_results[k] * factor + det_data[i, j, 6] * (1 - factor) | |
| return det_data | |
| def _update(self, objects_info, step): | |
| latest_ids = [] | |
| if len(self.tracks) == 0: | |
| for object_info in objects_info: | |
| to = TrackedObject() | |
| to.update(step, object_info) | |
| self.tracks.append(to) | |
| latest_ids.append(len(self.tracks) - 1) | |
| else: | |
| matched_ids = set() | |
| for idx, object_info in enumerate(objects_info): | |
| min_id, min_error = -1, float('inf') | |
| pos_x, pos_y = object_info[:2] | |
| for _id in self.alive_ids: | |
| if _id in matched_ids: | |
| continue | |
| track_pos = self.tracks[_id].last_pos | |
| distance = np.sqrt((track_pos[0] - pos_x)**2 + (track_pos[1] - pos_y)**2) | |
| if distance < 2.0 and distance < min_error: | |
| min_error = distance | |
| min_id = _id | |
| if min_id != -1: | |
| self.tracks[min_id].update(step, objects_info[idx]) | |
| latest_ids.append(min_id) | |
| matched_ids.add(min_id) | |
| else: | |
| to = TrackedObject() | |
| to.update(step, object_info) | |
| self.tracks.append(to) | |
| latest_ids.append(len(self.tracks) - 1) | |
| self.alive_ids = [i for i, track in enumerate(self.tracks) if track.last_step > step - 6] | |
| return latest_ids | |
| def _predict(self, updates_ids): | |
| speed_results, heading_results = [], [] | |
| for each_id in updates_ids: | |
| to = self.tracks[each_id] | |
| avg_speed, avg_heading = [], [] | |
| for feature in to.historical_features: | |
| avg_speed.append(feature[2]) | |
| avg_heading.append(feature[:2]) | |
| if len(avg_speed) < 2: | |
| speed_results.append(None) | |
| heading_results.append(None) | |
| continue | |
| avg_speed = np.mean(avg_speed) | |
| avg_heading = np.mean(np.stack(avg_heading), axis=0) | |
| yaw_angle = get_yaw_angle(avg_heading) | |
| heading_results.append((4 - yaw_angle / np.pi) % 2) | |
| speed_results.append(avg_speed) | |
| return speed_results, heading_results | |
| # --- Control System --- | |
| class PIDController: | |
| def __init__(self, K_P=1.0, K_I=0.0, K_D=0.0, n=20): | |
| self._K_P = K_P | |
| self._K_I = K_I | |
| self._K_D = K_D | |
| self._window = deque([0 for _ in range(n)], maxlen=n) | |
| self._max = 0.0 | |
| self._min = 0.0 | |
| def step(self, error): | |
| self._window.append(error) | |
| self._max = max(self._max, abs(error)) | |
| self._min = -abs(self._max) | |
| if len(self._window) >= 2: | |
| integral = np.mean(self._window) | |
| derivative = self._window[-1] - self._window[-2] | |
| else: | |
| integral = 0.0 | |
| derivative = 0.0 | |
| return self._K_P * error + self._K_I * integral + self._K_D * derivative | |
| class InterfuserController: | |
| def __init__(self, config): | |
| self.turn_controller = PIDController(config.turn_KP, config.turn_KI, config.turn_KD, config.turn_n) | |
| self.speed_controller = PIDController(config.speed_KP, config.speed_KI, config.speed_KD, config.speed_n) | |
| self.config = config | |
| self.collision_buffer = np.array(config.collision_buffer) | |
| self.detect_threshold = config.detect_threshold | |
| self.stop_steps = 0 | |
| self.forced_forward_steps = 0 | |
| self.red_light_steps = 0 | |
| self.block_red_light = 0 | |
| self.in_stop_sign_effect = False | |
| self.block_stop_sign_distance = 0 | |
| self.stop_sign_timer = 0 | |
| self.stop_sign_trigger_times = 0 | |
| def run_step(self, speed, waypoints, junction, traffic_light_state, stop_sign, meta_data): | |
| if speed < 0.2: | |
| self.stop_steps += 1 | |
| else: | |
| self.stop_steps = max(0, self.stop_steps - 10) | |
| if speed < 0.06 and self.in_stop_sign_effect: | |
| self.in_stop_sign_effect = False | |
| if junction < 0.3: | |
| self.stop_sign_trigger_times = 0 | |
| if traffic_light_state > 0.7: | |
| self.red_light_steps += 1 | |
| else: | |
| self.red_light_steps = 0 | |
| if self.red_light_steps > 1000: | |
| self.block_red_light = 80 | |
| self.red_light_steps = 0 | |
| if self.block_red_light > 0: | |
| self.block_red_light -= 1 | |
| traffic_light_state = 0.01 | |
| if stop_sign < 0.6 and self.block_stop_sign_distance < 0.1: | |
| self.in_stop_sign_effect = True | |
| self.block_stop_sign_distance = 2.0 | |
| self.stop_sign_trigger_times = 3 | |
| self.block_stop_sign_distance = max(0, self.block_stop_sign_distance - 0.05 * speed) | |
| if self.block_stop_sign_distance < 0.1: | |
| if self.stop_sign_trigger_times > 0: | |
| self.block_stop_sign_distance = 2.0 | |
| self.stop_sign_trigger_times -= 1 | |
| self.in_stop_sign_effect = True | |
| aim = (waypoints[1] + waypoints[0]) / 2.0 | |
| angle = np.degrees(np.pi / 2 - np.arctan2(aim[1], aim[0])) / 90 | |
| if speed < 0.01: | |
| angle = 0 | |
| steer = self.turn_controller.step(angle) | |
| steer = np.clip(steer, -1.0, 1.0) | |
| desired_speed = self.config.max_speed | |
| downsampled_waypoints = downsample_waypoints(waypoints) | |
| safe_dis = get_max_safe_distance(meta_data, downsampled_waypoints, t=0, collision_buffer=self.collision_buffer, threshold=self.detect_threshold) | |
| if traffic_light_state > 0.5 or (stop_sign > 0.6 and self.stop_sign_timer < 20): | |
| desired_speed = 0.0 | |
| if stop_sign > 0.6: | |
| self.stop_sign_timer += 1 | |
| else: | |
| self.stop_sign_timer = 0 | |
| brake = speed > desired_speed * self.config.brake_ratio | |
| delta = np.clip(desired_speed - speed, 0.0, self.config.clip_delta) | |
| throttle = self.speed_controller.step(delta) | |
| throttle = np.clip(throttle, 0.0, self.config.max_throttle) | |
| meta_info_1 = f"speed: {speed:.2f}, target_speed: {desired_speed:.2f}" | |
| meta_info_2 = f"on_road_prob: {junction:.2f}, red_light_prob: {traffic_light_state:.2f}, stop_sign_prob: {1 - stop_sign:.2f}" | |
| meta_info_3 = f"stop_steps: {self.stop_steps}, block_stop_sign_distance: {self.block_stop_sign_distance:.1f}" | |
| if self.stop_steps > 1200: | |
| self.forced_forward_steps = 12 | |
| self.stop_steps = 0 | |
| if self.forced_forward_steps > 0: | |
| throttle = 0.8 | |
| brake = False | |
| self.forced_forward_steps -= 1 | |
| if self.in_stop_sign_effect: | |
| throttle = 0 | |
| brake = True | |
| return steer, throttle, brake, (meta_info_1, meta_info_2, meta_info_3, safe_dis) | |
| # --- Visualization & Helper Functions --- | |
| class DisplayInterface: | |
| def __init__(self, width=1200, height=600): | |
| self._width = width | |
| self._height = height | |
| def run_interface(self, data): | |
| dashboard = np.zeros((self._height, self._width, 3), dtype=np.uint8) | |
| font = cv2.FONT_HERSHEY_SIMPLEX | |
| dashboard[:, :800] = cv2.resize(data.get('camera_view'), (800, 600)) | |
| dashboard[:400, 800:1200] = cv2.resize(data['map_t0'], (400, 400)) | |
| dashboard[400:600, 800:1000] = cv2.resize(data['map_t1'], (200, 200)) | |
| dashboard[400:600, 1000:1200] = cv2.resize(data['map_t2'], (200, 200)) | |
| cv2.line(dashboard, (800, 0), (800, 600), (255, 255, 255), 2) | |
| cv2.line(dashboard, (800, 400), (1200, 400), (255, 255, 255), 2) | |
| cv2.line(dashboard, (1000, 400), (1000, 600), (255, 255, 255), 2) | |
| y_pos = 40 | |
| for key, text in data['text_info'].items(): | |
| cv2.putText(dashboard, text, (820, y_pos), font, 0.6, (255, 255, 255), 1) | |
| y_pos += 30 | |
| y_pos += 10 | |
| for t, counts in data['object_counts'].items(): | |
| count_str = f"{t}: C={counts['car']} B={counts['bike']} P={counts['pedestrian']}" | |
| cv2.putText(dashboard, count_str, (820, y_pos), font, 0.5, (255, 255, 255), 1) | |
| y_pos += 20 | |
| cv2.putText(dashboard, "t0", (1160, 30), font, 0.8, (0, 255, 255), 2) | |
| cv2.putText(dashboard, "t1", (960, 430), font, 0.8, (0, 255, 255), 2) | |
| cv2.putText(dashboard, "t2", (1160, 430), font, 0.8, (0, 255, 255), 2) | |
| return dashboard | |
| def get_yaw_angle(forward_vector): | |
| forward_vector = forward_vector / np.linalg.norm(forward_vector) | |
| return math.atan2(forward_vector[1], forward_vector[0]) | |
| def ensure_rgb(image): | |
| if len(image.shape) == 2 or image.shape[2] == 1: | |
| return cv2.cvtColor(image, cv2.COLOR_GRAY2BGR) | |
| return image | |
| def convert_grid_to_xy(i, j): | |
| return (j - 9.5) * 1.6, (19.5 - i) * 1.6 | |
| def find_peak_box(data): | |
| det_data = np.zeros((22, 22, 7)) | |
| det_data[1:21, 1:21] = data | |
| detected_objects = [] | |
| for i in range(1, 21): | |
| for j in range(1, 21): | |
| if det_data[i, j, 0] > 0.6 and (det_data[i, j, 0] > det_data[i, j - 1, 0] and det_data[i, j, 0] > det_data[i, j + 1, 0] and det_data[i, j, 0] > det_data[i - 1, j, 0] and det_data[i, j, 0] > det_data[i + 1, j, 0]): | |
| length, width, confidence = det_data[i, j, 4], det_data[i, j, 5], det_data[i, j, 0] | |
| obj_type = 'unknown' | |
| if length > 4.0: obj_type = 'car' | |
| elif length / width > 1.5: obj_type = 'bike' | |
| else: obj_type = 'pedestrian' | |
| detected_objects.append({'coords': (i - 1, j - 1), 'type': obj_type, 'confidence': confidence, 'raw_data': det_data[i, j]}) | |
| return detected_objects | |
| def add_rect(img, loc, ori, box, value, color): | |
| center_x = int(loc[0] * PIXELS_PER_METER + MAX_DISTANCE * PIXELS_PER_METER) | |
| center_y = int(loc[1] * PIXELS_PER_METER + MAX_DISTANCE * PIXELS_PER_METER) | |
| size_px = (int(box[0] * PIXELS_PER_METER), int(box[1] * PIXELS_PER_METER)) | |
| angle_deg = -np.degrees(math.atan2(ori[1], ori[0])) | |
| box_points = cv2.boxPoints(((center_x, center_y), size_px, angle_deg)) | |
| cv2.fillConvexPoly(img, np.int32(box_points), [int(x * value) for x in color]) | |
| return img | |
| def render(det_data, t=0): | |
| CLASS_COLORS = {'car': (0, 0, 255), 'bike': (0, 255, 0), 'pedestrian': (255, 0, 0), 'unknown': (128, 128, 128)} | |
| det_weighted = det_data * reweight_array | |
| detected_objects = find_peak_box(det_weighted) | |
| counts = {cls: 0 for cls in CLASS_COLORS.keys()} | |
| [counts.update({obj['type']: counts.get(obj['type'], 0) + 1}) for obj in detected_objects] | |
| img = np.zeros((IMG_SIZE, IMG_SIZE, 3), np.uint8) | |
| for obj in detected_objects: | |
| i, j = obj['coords'] | |
| obj_data = obj['raw_data'] | |
| speed, theta = obj_data[6], obj_data[3] * np.pi | |
| ori = np.array([math.cos(theta), math.sin(theta)]) | |
| loc_x = obj_data[1] + t * speed * ori[0] + convert_grid_to_xy(i, j)[0] | |
| loc_y = obj_data[2] - t * speed * ori[1] + convert_grid_to_xy(i, j)[1] | |
| box = np.array([obj_data[4], obj_data[5]]) * (1.5 if obj['type'] == 'pedestrian' else 1.0) | |
| add_rect(img, np.array([loc_x, loc_y]), ori, box, obj['confidence'], CLASS_COLORS[obj['type']]) | |
| return img, counts | |
| def render_self_car(loc, ori, box, pixels_per_meter=PIXELS_PER_METER): | |
| img = np.zeros((IMG_SIZE, IMG_SIZE, 3), np.uint8) | |
| center_x = int(loc[0] * pixels_per_meter + MAX_DISTANCE * pixels_per_meter) | |
| center_y = int(loc[1] * pixels_per_meter + MAX_DISTANCE * pixels_per_meter) | |
| size_px = (int(box[0] * pixels_per_meter), int(box[1] * pixels_per_meter)) | |
| angle_deg = -np.degrees(math.atan2(ori[1], ori[0])) | |
| box_points = np.int32(cv2.boxPoints(((center_x, center_y), size_px, angle_deg))) | |
| cv2.fillConvexPoly(img, box_points, (0, 255, 255)) | |
| return img | |
| def render_waypoints(waypoints, pixels_per_meter=PIXELS_PER_METER): | |
| global last_valid_waypoints | |
| img = np.zeros((IMG_SIZE, IMG_SIZE, 3), np.uint8) | |
| current_waypoints = waypoints if waypoints is not None and len(waypoints) > 2 else last_valid_waypoints | |
| if current_waypoints is not None: | |
| last_valid_waypoints = current_waypoints | |
| for i, point in enumerate(current_waypoints): | |
| px = int(EGO_CAR_X + point[1] * pixels_per_meter) | |
| py = int(EGO_CAR_Y - point[0] * pixels_per_meter) | |
| color = (0, 0, 255) if i == len(current_waypoints) - 1 else (0, 255, 0) | |
| cv2.circle(img, (px, py), 4, color, -1) | |
| return img | |
| def collision_detections(map1, map2, threshold=0.04): | |
| if len(map2.shape) == 3 and map2.shape[2] == 3: | |
| map2 = cv2.cvtColor(map2, cv2.COLOR_BGR2GRAY) | |
| assert map1.shape == map2.shape | |
| overlap_map = (map1 > 0.01) & (map2 > 0.01) | |
| return float(np.sum(overlap_map)) / np.sum(map2 > 0) < threshold | |
| def get_max_safe_distance(meta_data, downsampled_waypoints, t, collision_buffer, threshold): | |
| surround_map = meta_data.reshape(20, 20, 7)[..., :3][..., 0] | |
| if np.sum(surround_map) < 1: | |
| return np.linalg.norm(downsampled_waypoints[-3]) | |
| hero_bounding_box = np.array([2.45, 1.0]) + collision_buffer | |
| safe_distance = 0.0 | |
| for i in range(len(downsampled_waypoints) - 2): | |
| aim = (downsampled_waypoints[i + 1] + downsampled_waypoints[i + 2]) / 2.0 | |
| loc, ori = downsampled_waypoints[i], aim - downsampled_waypoints[i] | |
| self_car_map = render_self_car(loc=loc, ori=ori, box=hero_bounding_box, pixels_per_meter=PIXELS_PER_METER) | |
| self_car_map_gray = cv2.cvtColor(cv2.resize(self_car_map, (20, 20)), cv2.COLOR_BGR2GRAY) | |
| if not collision_detections(surround_map, self_car_map_gray, threshold): | |
| break | |
| safe_distance = max(safe_distance, np.linalg.norm(loc)) | |
| return safe_distance | |
| def downsample_waypoints(waypoints, precision=0.2): | |
| downsampled_waypoints = [] | |
| last_waypoint = np.array([0.0, 0.0]) | |
| for i in range(len(waypoints)): | |
| now_waypoint = waypoints[i] | |
| dis = np.linalg.norm(now_waypoint - last_waypoint) | |
| if dis > precision: | |
| interval = int(dis / precision) | |
| move_vector = (now_waypoint - last_waypoint) / (interval + 1) | |
| for j in range(interval): | |
| downsampled_waypoints.append(last_waypoint + move_vector * (j + 1)) | |
| downsampled_waypoints.append(now_waypoint) | |
| last_waypoint = now_waypoint | |
| return downsampled_waypoints |