Spaces:

BaseerAI
/

Baseer_Server

Sleeping

App Files Files Community

BaseerAI commited on Aug 8

Commit

e5c9178

verified ·

1 Parent(s): 3e2aeec

Update model_definition.py

Browse files

Files changed (1) hide show

model_definition.py +81 -122

model_definition.py CHANGED Viewed

@@ -115,48 +115,93 @@ class HybridEmbed(nn.Module):
         return x, global_x
-class PositionEmbeddingSine(nn.Module):
-    """
-    This is a more standard version of the position embedding, very similar to the one
-    used by the Attention is all you need paper, generalized to work on images.
     """
-    def __init__(
-        self, num_pos_feats=64, temperature=10000, normalize=False, scale=None
-    ):
         super().__init__()
         self.num_pos_feats = num_pos_feats
         self.temperature = temperature
         self.normalize = normalize
-        if scale is not None and normalize is False:
             raise ValueError("normalize should be True if scale is passed")
         if scale is None:
             scale = 2 * math.pi
         self.scale = scale
-    def forward(self, tensor):
-        x = tensor
-        bs, _, h, w = x.shape
-        not_mask = torch.ones((bs, h, w), device=x.device)
-        y_embed = not_mask.cumsum(1, dtype=torch.float32)
-        x_embed = not_mask.cumsum(2, dtype=torch.float32)
         if self.normalize:
-            eps = 1e-6
-            y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale
-            x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale
-        dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device)
-        dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
-        pos_x = x_embed[:, :, :, None] / dim_t
-        pos_y = y_embed[:, :, :, None] / dim_t
-        pos_x = torch.stack(
-            (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4
-        ).flatten(3)
-        pos_y = torch.stack(
-            (pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4
-        ).flatten(3)
-        pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2)
         return pos
@@ -663,7 +708,7 @@ def build_attn_mask(mask_type):
     return mask
 # class InterfuserModel(nn.Module):
-class InterfuserModel(nn.Module):
     def __init__(
         self,
         img_size=224,
@@ -870,7 +915,7 @@ class InterfuserModel(nn.Module):
                 *[nn.Linear(embed_dim, 64), nn.ReLU(), nn.Linear(64, 1), nn.Sigmoid()]
             )
-        self.position_encoding = PositionEmbeddingSine(embed_dim // 2, normalize=True)
         encoder_layer = TransformerEncoderLayer(
             embed_dim, num_heads, dim_feedforward, dropout, act_layer, normalize_before
@@ -1114,6 +1159,8 @@ class InterfuserModel(nn.Module):
         traffic_feature_with_vel = torch.cat([traffic_feature, velocity], dim=2)
         traffic = self.traffic_pred_head(traffic_feature_with_vel)
         return traffic, waypoints, is_junction, traffic_light_state, stop_sign, traffic_feature
     def load_pretrained(self, model_path, strict=False):
         """
         تحميل الأوزان المدربة مسبقاً - نسخة محسنة
@@ -1181,94 +1228,6 @@ class InterfuserModel(nn.Module):
             return False
-# ============================================================================
-# دوال مساعدة لتحميل النموذج
-# ============================================================================
-# ==============================================================================
-# ملف: config_and_loader.py
-# هذا هو المصدر الوحيد للحقيقة لجميع الإعدادات وعملية تحميل النموذج.
-# ==============================================================================
-# def get_master_config(model_path="model/best_model.pth"):
-#     """
-#     [النسخة الكاملة والنهائية]
-#     ينشئ ويدمج كل الإعدادات المطلوبة للتطبيق (النموذج، المتتبع، المتحكم).
-#     """
-#     model_params = {
-#         "img_size": 224, "embed_dim": 256, "enc_depth": 6, "dec_depth": 6,
-#         "rgb_backbone_name": 'r50', "lidar_backbone_name": 'r18',
-#         "waypoints_pred_head": 'gru', "use_different_backbone": True,
-#         "with_lidar": False, "with_right_left_sensors": False,
-#         "with_center_sensor": False, "multi_view_img_size": 112,
-#         "patch_size": 8, "in_chans": 3, "dim_feedforward": 2048,
-#         "normalize_before": False, "num_heads": 8, "dropout": 0.1,
-#         "end2end": False, "direct_concat": False, "separate_view_attention": False,
-#         "separate_all_attention": False, "freeze_num": -1,
-#         "traffic_pred_head_type": "det", "reverse_pos": True,
-#         "use_view_embed": False, "use_mmad_pretrain": None,
-#     }
-#     grid_conf = {
-#         'h': 20, 'w': 20, 'x_res': 1.0, 'y_res': 1.0,
-#         'y_min': 0.0, 'y_max': 20.0, 'x_min': -10.0, 'x_max': 10.0,
-#     }
-#     controller_params = {
-#         'turn_KP': 0.75, 'turn_KI': 0.05, 'turn_KD': 0.25, 'turn_n': 20,
-#         'speed_KP': 0.55, 'speed_KI': 0.05, 'speed_KD': 0.15, 'speed_n': 20,
-#         'max_speed': 8.0, 'max_throttle': 0.75, 'min_speed': 0.1,
-#         'brake_sensitivity': 0.3, 'light_threshold': 0.5, 'stop_threshold': 0.6,
-#         'stop_sign_duration': 20, 'max_stop_time': 250,
-#         'forced_move_duration': 20, 'forced_throttle': 0.5,
-#         'max_red_light_time': 150, 'red_light_block_duration': 80,
-#         'accel_rate': 0.1, 'decel_rate': 0.2, 'critical_distance': 4.0,
-#         'follow_distance': 10.0, 'speed_match_factor': 0.9,
-#         'tracker_match_thresh': 2.5, 'tracker_prune_age': 5,
-#         'follow_grace_period': 20
-#     }
-#     master_config = {
-#         'model_params': model_params,
-#         'grid_conf': grid_conf,
-#         'controller_params': controller_params,
-#         'paths': {'pretrained_weights': model_path},
-#         'simulation': {'frequency': 10.0}
-#     }
-#     return master_config
-# def load_and_prepare_model(device: torch.device) -> InterfuserModel:
-#     """
-#     [النسخة النهائية الصحيحة - تستقبل مدخلاً واحدًا فقط]
-#     تستخدم دالة الإعدادات الرئيسية لإنشاء وتحميل النموذج.
-#     """
-#     try:
-#         logging.info("Attempting to load model using master config...")
-#         # 1. الحصول على كل الإعدادات من المصدر الوحيد للحقيقة
-#         config = get_master_config()
-#         # 2. إنشاء النموذج باستخدام إعدادات النموذج فقط
-#         model = InterfuserModel(**config['model_params']).to(device)
-#         logging.info(f"Model instantiated on device: {device}")
-#         # 3. تحميل الأوزان باستخدام الدالة الداخلية للنموذج
-#         checkpoint_path = config['paths']['pretrained_weights']
-#         model.load_pretrained(checkpoint_path, strict=False)
-#         # 4. وضع النموذج في وضع التقييم
-#         model.eval()
-#         logging.info("✅ Model prepared and set to evaluation mode.")
-#         return model
-#     except Exception as e:
-#         logging.error(f"❌ CRITICAL ERROR in load_and_prepare_model: {e}", exc_info=True)
-#         raise
 # ==============================================================================
 #                 الدالة الأولى: get_master_config
@@ -1341,7 +1300,7 @@ def get_master_config():
 #                 الدالة الثانية: load_and_prepare_model
 # ==============================================================================
-def load_and_prepare_model(device: torch.device) -> InterfuserModel:
     """
     [النسخة الاحترافية]
     تستخدم الإعدادات الرئيسية من `get_master_config` لإنشاء وتحميل النموذج.
@@ -1374,7 +1333,7 @@ def load_and_prepare_model(device: torch.device) -> InterfuserModel:
         # 3. إنشاء نسخة من النموذج باستخدام الإعدادات الصحيحة
         logging.info("Instantiating model with specified parameters...")
-        model = InterfuserModel(**config['model_params']).to(device)
         # 4. تحميل الأوزان التي تم تنزيلها إلى النموذج
         # نستخدم الدالة المساعدة الموجودة داخل كلاس النموذج نفسه

         return x, global_x
+class HyperDimensionalPositionalEncoding(nn.Module):
     """
+    [GCPE v1.1 - Professional & Corrected Implementation]
+    A novel positional encoding scheme based on geometric centrality.
+    This class is designed as a drop-in replacement for the standard
+    PositionEmbeddingSine, accepting similar arguments and producing an
+    output of the same shape. This version corrects a type error in the
+    distance calculation.
+    """
+    def __init__(self, num_pos_feats=256, temperature=10000, normalize=True, scale=None):
+        """
+        Args:
+            num_pos_feats (int): The desired number of output channels for the positional encoding.
+                                 This must be an even number.
+            temperature (int): A constant used to scale the frequencies.
+            normalize (bool): If True, normalizes the coordinates to the range [0, scale].
+            scale (float, optional): The scaling factor for normalization. Defaults to 2*pi.
+        """
         super().__init__()
+        if num_pos_feats % 2 != 0:
+            raise ValueError(f"num_pos_feats must be an even number, but got {num_pos_feats}")
         self.num_pos_feats = num_pos_feats
         self.temperature = temperature
         self.normalize = normalize
+        if scale is not None and not normalize:
             raise ValueError("normalize should be True if scale is passed")
         if scale is None:
             scale = 2 * math.pi
         self.scale = scale
+    def forward(self, tensor: torch.Tensor) -> torch.Tensor:
+        """
+        Args:
+            tensor (torch.Tensor): A 4D tensor of shape (B, C, H, W). The content is not
+                                   used, only its shape and device.
+        Returns:
+            torch.Tensor: A 4D tensor of positional encodings with shape (B, num_pos_feats, H, W).
+        """
+        batch_size, _, h, w = tensor.shape
+        device = tensor.device
+        # 1. Create coordinate grids
+        y_embed = torch.arange(h, dtype=torch.float32, device=device).view(h, 1)
+        x_embed = torch.arange(w, dtype=torch.float32, device=device).view(1, w)
+        # 2. Calculate normalized distance from the center
+        # Use floating point division for center calculation
+        center_y, center_x = (h - 1) / 2.0, (w - 1) / 2.0
+        # Calculate the Euclidean distance for each pixel from the center
+        dist_map = torch.sqrt(
+            (y_embed - center_y)**2 + (x_embed - center_x)**2
+        )
+        # ✅ CORRECTION: The max distance is a scalar, no need for torch.sqrt on a float.
+        # We can calculate it with math.sqrt or just compute the squared value.
+        # To keep everything in tensors for consistency, we can do this:
+        max_dist_sq = torch.tensor(center_y**2 + center_x**2, device=device)
+        max_dist = torch.sqrt(max_dist_sq)
+        # Normalize the distance map to the range [0, 1]
+        normalized_dist_map = dist_map / (max_dist + 1e-6)
         if self.normalize:
+            normalized_dist_map = normalized_dist_map * self.scale
+        pos_dist = normalized_dist_map.unsqueeze(0).repeat(batch_size, 1, 1)
+        # 3. Create the frequency-based embedding
+        # This part remains the same as it operates on tensors correctly.
+        dim_t = torch.arange(self.num_pos_feats // 2, dtype=torch.float32, device=device)
+        dim_t = self.temperature ** (2 * dim_t / (self.num_pos_feats // 2))
+        pos = pos_dist.unsqueeze(-1) / dim_t
+        pos_sin = pos.sin()
+        pos_cos = pos.cos()
+        # 4. Concatenate and reshape to match the desired output format
+        pos = torch.cat((pos_sin, pos_cos), dim=3)
+        pos = pos.permute(0, 3, 1, 2)
         return pos
     return mask
 # class InterfuserModel(nn.Module):
+class InterfuserHDPE(nn.Module):
     def __init__(
         self,
         img_size=224,
                 *[nn.Linear(embed_dim, 64), nn.ReLU(), nn.Linear(64, 1), nn.Sigmoid()]
             )
+        self.position_encoding = HyperDimensionalPositionalEncoding(embed_dim , normalize=True)
         encoder_layer = TransformerEncoderLayer(
             embed_dim, num_heads, dim_feedforward, dropout, act_layer, normalize_before
         traffic_feature_with_vel = torch.cat([traffic_feature, velocity], dim=2)
         traffic = self.traffic_pred_head(traffic_feature_with_vel)
         return traffic, waypoints, is_junction, traffic_light_state, stop_sign, traffic_feature
     def load_pretrained(self, model_path, strict=False):
         """
         تحميل الأوزان المدربة مسبقاً - نسخة محسنة
             return False
 # ==============================================================================
 #                 الدالة الأولى: get_master_config
 #                 الدالة الثانية: load_and_prepare_model
 # ==============================================================================
+def load_and_prepare_model(device: torch.device) -> InterfuserHDPE:
     """
     [النسخة الاحترافية]
     تستخدم الإعدادات الرئيسية من `get_master_config` لإنشاء وتحميل النموذج.
         # 3. إنشاء نسخة من النموذج باستخدام الإعدادات الصحيحة
         logging.info("Instantiating model with specified parameters...")
+        model = InterfuserHDPE(**config['model_params']).to(device)
         # 4. تحميل الأوزان التي تم تنزيلها إلى النموذج
         # نستخدم الدالة المساعدة الموجودة داخل كلاس النموذج نفسه