Spaces:
Running
Running
Update model_definition.py
Browse files- model_definition.py +81 -122
model_definition.py
CHANGED
@@ -115,48 +115,93 @@ class HybridEmbed(nn.Module):
|
|
115 |
return x, global_x
|
116 |
|
117 |
|
118 |
-
class
|
119 |
-
"""
|
120 |
-
This is a more standard version of the position embedding, very similar to the one
|
121 |
-
used by the Attention is all you need paper, generalized to work on images.
|
122 |
"""
|
|
|
|
|
123 |
|
124 |
-
|
125 |
-
|
126 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
super().__init__()
|
|
|
|
|
|
|
|
|
128 |
self.num_pos_feats = num_pos_feats
|
129 |
self.temperature = temperature
|
130 |
self.normalize = normalize
|
131 |
-
|
|
|
132 |
raise ValueError("normalize should be True if scale is passed")
|
133 |
if scale is None:
|
134 |
scale = 2 * math.pi
|
135 |
self.scale = scale
|
136 |
|
137 |
-
def forward(self, tensor):
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
if self.normalize:
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
pos = torch.cat((
|
|
|
|
|
160 |
return pos
|
161 |
|
162 |
|
@@ -663,7 +708,7 @@ def build_attn_mask(mask_type):
|
|
663 |
return mask
|
664 |
# class InterfuserModel(nn.Module):
|
665 |
|
666 |
-
class
|
667 |
def __init__(
|
668 |
self,
|
669 |
img_size=224,
|
@@ -870,7 +915,7 @@ class InterfuserModel(nn.Module):
|
|
870 |
*[nn.Linear(embed_dim, 64), nn.ReLU(), nn.Linear(64, 1), nn.Sigmoid()]
|
871 |
)
|
872 |
|
873 |
-
self.position_encoding =
|
874 |
|
875 |
encoder_layer = TransformerEncoderLayer(
|
876 |
embed_dim, num_heads, dim_feedforward, dropout, act_layer, normalize_before
|
@@ -1114,6 +1159,8 @@ class InterfuserModel(nn.Module):
|
|
1114 |
traffic_feature_with_vel = torch.cat([traffic_feature, velocity], dim=2)
|
1115 |
traffic = self.traffic_pred_head(traffic_feature_with_vel)
|
1116 |
return traffic, waypoints, is_junction, traffic_light_state, stop_sign, traffic_feature
|
|
|
|
|
1117 |
def load_pretrained(self, model_path, strict=False):
|
1118 |
"""
|
1119 |
تحميل الأوزان المدربة مسبقاً - نسخة محسنة
|
@@ -1181,94 +1228,6 @@ class InterfuserModel(nn.Module):
|
|
1181 |
return False
|
1182 |
|
1183 |
|
1184 |
-
# ============================================================================
|
1185 |
-
# دوال مساعدة لتحميل النموذج
|
1186 |
-
# ============================================================================
|
1187 |
-
# ==============================================================================
|
1188 |
-
# ملف: config_and_loader.py
|
1189 |
-
# هذا هو المصدر الوحيد للحقيقة لجميع الإعدادات وعملية تحميل النموذج.
|
1190 |
-
# ==============================================================================
|
1191 |
-
|
1192 |
-
|
1193 |
-
|
1194 |
-
# def get_master_config(model_path="model/best_model.pth"):
|
1195 |
-
# """
|
1196 |
-
# [النسخة الكاملة والنهائية]
|
1197 |
-
# ينشئ ويدمج كل الإعدادات المطلوبة للتطبيق (النموذج، المتتبع، المتحكم).
|
1198 |
-
# """
|
1199 |
-
# model_params = {
|
1200 |
-
# "img_size": 224, "embed_dim": 256, "enc_depth": 6, "dec_depth": 6,
|
1201 |
-
# "rgb_backbone_name": 'r50', "lidar_backbone_name": 'r18',
|
1202 |
-
# "waypoints_pred_head": 'gru', "use_different_backbone": True,
|
1203 |
-
# "with_lidar": False, "with_right_left_sensors": False,
|
1204 |
-
# "with_center_sensor": False, "multi_view_img_size": 112,
|
1205 |
-
# "patch_size": 8, "in_chans": 3, "dim_feedforward": 2048,
|
1206 |
-
# "normalize_before": False, "num_heads": 8, "dropout": 0.1,
|
1207 |
-
# "end2end": False, "direct_concat": False, "separate_view_attention": False,
|
1208 |
-
# "separate_all_attention": False, "freeze_num": -1,
|
1209 |
-
# "traffic_pred_head_type": "det", "reverse_pos": True,
|
1210 |
-
# "use_view_embed": False, "use_mmad_pretrain": None,
|
1211 |
-
# }
|
1212 |
-
|
1213 |
-
# grid_conf = {
|
1214 |
-
# 'h': 20, 'w': 20, 'x_res': 1.0, 'y_res': 1.0,
|
1215 |
-
# 'y_min': 0.0, 'y_max': 20.0, 'x_min': -10.0, 'x_max': 10.0,
|
1216 |
-
# }
|
1217 |
-
|
1218 |
-
# controller_params = {
|
1219 |
-
# 'turn_KP': 0.75, 'turn_KI': 0.05, 'turn_KD': 0.25, 'turn_n': 20,
|
1220 |
-
# 'speed_KP': 0.55, 'speed_KI': 0.05, 'speed_KD': 0.15, 'speed_n': 20,
|
1221 |
-
# 'max_speed': 8.0, 'max_throttle': 0.75, 'min_speed': 0.1,
|
1222 |
-
# 'brake_sensitivity': 0.3, 'light_threshold': 0.5, 'stop_threshold': 0.6,
|
1223 |
-
# 'stop_sign_duration': 20, 'max_stop_time': 250,
|
1224 |
-
# 'forced_move_duration': 20, 'forced_throttle': 0.5,
|
1225 |
-
# 'max_red_light_time': 150, 'red_light_block_duration': 80,
|
1226 |
-
# 'accel_rate': 0.1, 'decel_rate': 0.2, 'critical_distance': 4.0,
|
1227 |
-
# 'follow_distance': 10.0, 'speed_match_factor': 0.9,
|
1228 |
-
# 'tracker_match_thresh': 2.5, 'tracker_prune_age': 5,
|
1229 |
-
# 'follow_grace_period': 20
|
1230 |
-
# }
|
1231 |
-
|
1232 |
-
# master_config = {
|
1233 |
-
# 'model_params': model_params,
|
1234 |
-
# 'grid_conf': grid_conf,
|
1235 |
-
# 'controller_params': controller_params,
|
1236 |
-
# 'paths': {'pretrained_weights': model_path},
|
1237 |
-
# 'simulation': {'frequency': 10.0}
|
1238 |
-
# }
|
1239 |
-
|
1240 |
-
# return master_config
|
1241 |
-
|
1242 |
-
|
1243 |
-
# def load_and_prepare_model(device: torch.device) -> InterfuserModel:
|
1244 |
-
# """
|
1245 |
-
# [النسخة النهائية الصحيحة - تستقبل مدخلاً واحدًا فقط]
|
1246 |
-
# تستخدم دالة الإعدادات الرئيسية لإنشاء وتحميل النموذج.
|
1247 |
-
# """
|
1248 |
-
# try:
|
1249 |
-
# logging.info("Attempting to load model using master config...")
|
1250 |
-
# # 1. الحصول على كل الإعدادات من المصدر الوحيد للحقيقة
|
1251 |
-
# config = get_master_config()
|
1252 |
-
|
1253 |
-
# # 2. إنشاء النموذج باستخدام إعدادات النموذج فقط
|
1254 |
-
# model = InterfuserModel(**config['model_params']).to(device)
|
1255 |
-
# logging.info(f"Model instantiated on device: {device}")
|
1256 |
-
|
1257 |
-
# # 3. تحميل الأوزان باستخدام الدالة الداخلية للنموذج
|
1258 |
-
# checkpoint_path = config['paths']['pretrained_weights']
|
1259 |
-
# model.load_pretrained(checkpoint_path, strict=False)
|
1260 |
-
|
1261 |
-
# # 4. وضع النموذج في وضع التقييم
|
1262 |
-
# model.eval()
|
1263 |
-
# logging.info("✅ Model prepared and set to evaluation mode.")
|
1264 |
-
|
1265 |
-
# return model
|
1266 |
-
|
1267 |
-
# except Exception as e:
|
1268 |
-
# logging.error(f"❌ CRITICAL ERROR in load_and_prepare_model: {e}", exc_info=True)
|
1269 |
-
# raise
|
1270 |
-
|
1271 |
-
|
1272 |
|
1273 |
# ==============================================================================
|
1274 |
# الدالة الأولى: get_master_config
|
@@ -1341,7 +1300,7 @@ def get_master_config():
|
|
1341 |
# الدالة الثانية: load_and_prepare_model
|
1342 |
# ==============================================================================
|
1343 |
|
1344 |
-
def load_and_prepare_model(device: torch.device) ->
|
1345 |
"""
|
1346 |
[النسخة الاحترافية]
|
1347 |
تستخدم الإعدادات الرئيسية من `get_master_config` لإنشاء وتحميل النموذج.
|
@@ -1374,7 +1333,7 @@ def load_and_prepare_model(device: torch.device) -> InterfuserModel:
|
|
1374 |
|
1375 |
# 3. إنشاء نسخة من النموذج باستخدام الإعدادات الصحيحة
|
1376 |
logging.info("Instantiating model with specified parameters...")
|
1377 |
-
model =
|
1378 |
|
1379 |
# 4. تحميل الأوزان التي تم تنزيلها إلى النموذج
|
1380 |
# نستخدم الدالة المساعدة الموجودة داخل كلاس النموذج نفسه
|
|
|
115 |
return x, global_x
|
116 |
|
117 |
|
118 |
+
class HyperDimensionalPositionalEncoding(nn.Module):
|
|
|
|
|
|
|
119 |
"""
|
120 |
+
[GCPE v1.1 - Professional & Corrected Implementation]
|
121 |
+
A novel positional encoding scheme based on geometric centrality.
|
122 |
|
123 |
+
This class is designed as a drop-in replacement for the standard
|
124 |
+
PositionEmbeddingSine, accepting similar arguments and producing an
|
125 |
+
output of the same shape. This version corrects a type error in the
|
126 |
+
distance calculation.
|
127 |
+
"""
|
128 |
+
def __init__(self, num_pos_feats=256, temperature=10000, normalize=True, scale=None):
|
129 |
+
"""
|
130 |
+
Args:
|
131 |
+
num_pos_feats (int): The desired number of output channels for the positional encoding.
|
132 |
+
This must be an even number.
|
133 |
+
temperature (int): A constant used to scale the frequencies.
|
134 |
+
normalize (bool): If True, normalizes the coordinates to the range [0, scale].
|
135 |
+
scale (float, optional): The scaling factor for normalization. Defaults to 2*pi.
|
136 |
+
"""
|
137 |
super().__init__()
|
138 |
+
|
139 |
+
if num_pos_feats % 2 != 0:
|
140 |
+
raise ValueError(f"num_pos_feats must be an even number, but got {num_pos_feats}")
|
141 |
+
|
142 |
self.num_pos_feats = num_pos_feats
|
143 |
self.temperature = temperature
|
144 |
self.normalize = normalize
|
145 |
+
|
146 |
+
if scale is not None and not normalize:
|
147 |
raise ValueError("normalize should be True if scale is passed")
|
148 |
if scale is None:
|
149 |
scale = 2 * math.pi
|
150 |
self.scale = scale
|
151 |
|
152 |
+
def forward(self, tensor: torch.Tensor) -> torch.Tensor:
|
153 |
+
"""
|
154 |
+
Args:
|
155 |
+
tensor (torch.Tensor): A 4D tensor of shape (B, C, H, W). The content is not
|
156 |
+
used, only its shape and device.
|
157 |
+
|
158 |
+
Returns:
|
159 |
+
torch.Tensor: A 4D tensor of positional encodings with shape (B, num_pos_feats, H, W).
|
160 |
+
"""
|
161 |
+
batch_size, _, h, w = tensor.shape
|
162 |
+
device = tensor.device
|
163 |
+
|
164 |
+
# 1. Create coordinate grids
|
165 |
+
y_embed = torch.arange(h, dtype=torch.float32, device=device).view(h, 1)
|
166 |
+
x_embed = torch.arange(w, dtype=torch.float32, device=device).view(1, w)
|
167 |
+
|
168 |
+
# 2. Calculate normalized distance from the center
|
169 |
+
# Use floating point division for center calculation
|
170 |
+
center_y, center_x = (h - 1) / 2.0, (w - 1) / 2.0
|
171 |
+
|
172 |
+
# Calculate the Euclidean distance for each pixel from the center
|
173 |
+
dist_map = torch.sqrt(
|
174 |
+
(y_embed - center_y)**2 + (x_embed - center_x)**2
|
175 |
+
)
|
176 |
+
|
177 |
+
# ✅ CORRECTION: The max distance is a scalar, no need for torch.sqrt on a float.
|
178 |
+
# We can calculate it with math.sqrt or just compute the squared value.
|
179 |
+
# To keep everything in tensors for consistency, we can do this:
|
180 |
+
max_dist_sq = torch.tensor(center_y**2 + center_x**2, device=device)
|
181 |
+
max_dist = torch.sqrt(max_dist_sq)
|
182 |
+
|
183 |
+
# Normalize the distance map to the range [0, 1]
|
184 |
+
normalized_dist_map = dist_map / (max_dist + 1e-6)
|
185 |
+
|
186 |
if self.normalize:
|
187 |
+
normalized_dist_map = normalized_dist_map * self.scale
|
188 |
+
|
189 |
+
pos_dist = normalized_dist_map.unsqueeze(0).repeat(batch_size, 1, 1)
|
190 |
+
|
191 |
+
# 3. Create the frequency-based embedding
|
192 |
+
# This part remains the same as it operates on tensors correctly.
|
193 |
+
dim_t = torch.arange(self.num_pos_feats // 2, dtype=torch.float32, device=device)
|
194 |
+
dim_t = self.temperature ** (2 * dim_t / (self.num_pos_feats // 2))
|
195 |
+
|
196 |
+
pos = pos_dist.unsqueeze(-1) / dim_t
|
197 |
+
|
198 |
+
pos_sin = pos.sin()
|
199 |
+
pos_cos = pos.cos()
|
200 |
+
|
201 |
+
# 4. Concatenate and reshape to match the desired output format
|
202 |
+
pos = torch.cat((pos_sin, pos_cos), dim=3)
|
203 |
+
pos = pos.permute(0, 3, 1, 2)
|
204 |
+
|
205 |
return pos
|
206 |
|
207 |
|
|
|
708 |
return mask
|
709 |
# class InterfuserModel(nn.Module):
|
710 |
|
711 |
+
class InterfuserHDPE(nn.Module):
|
712 |
def __init__(
|
713 |
self,
|
714 |
img_size=224,
|
|
|
915 |
*[nn.Linear(embed_dim, 64), nn.ReLU(), nn.Linear(64, 1), nn.Sigmoid()]
|
916 |
)
|
917 |
|
918 |
+
self.position_encoding = HyperDimensionalPositionalEncoding(embed_dim , normalize=True)
|
919 |
|
920 |
encoder_layer = TransformerEncoderLayer(
|
921 |
embed_dim, num_heads, dim_feedforward, dropout, act_layer, normalize_before
|
|
|
1159 |
traffic_feature_with_vel = torch.cat([traffic_feature, velocity], dim=2)
|
1160 |
traffic = self.traffic_pred_head(traffic_feature_with_vel)
|
1161 |
return traffic, waypoints, is_junction, traffic_light_state, stop_sign, traffic_feature
|
1162 |
+
|
1163 |
+
|
1164 |
def load_pretrained(self, model_path, strict=False):
|
1165 |
"""
|
1166 |
تحميل الأوزان المدربة مسبقاً - نسخة محسنة
|
|
|
1228 |
return False
|
1229 |
|
1230 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1231 |
|
1232 |
# ==============================================================================
|
1233 |
# الدالة الأولى: get_master_config
|
|
|
1300 |
# الدالة الثانية: load_and_prepare_model
|
1301 |
# ==============================================================================
|
1302 |
|
1303 |
+
def load_and_prepare_model(device: torch.device) -> InterfuserHDPE:
|
1304 |
"""
|
1305 |
[النسخة الاحترافية]
|
1306 |
تستخدم الإعدادات الرئيسية من `get_master_config` لإنشاء وتحميل النموذج.
|
|
|
1333 |
|
1334 |
# 3. إنشاء نسخة من النموذج باستخدام الإعدادات الصحيحة
|
1335 |
logging.info("Instantiating model with specified parameters...")
|
1336 |
+
model = InterfuserHDPE(**config['model_params']).to(device)
|
1337 |
|
1338 |
# 4. تحميل الأوزان التي تم تنزيلها إلى النموذج
|
1339 |
# نستخدم الدالة المساعدة الموجودة داخل كلاس النموذج نفسه
|