Commit
·
4bf0ec2
1
Parent(s):
d8a186f
Changed ablation to input zero-valued vactors
Browse files
protac_degradation_predictor/protac_dataset.py
CHANGED
@@ -215,40 +215,53 @@ class PROTAC_Dataset(Dataset):
|
|
215 |
|
216 |
def __getitem__(self, idx):
|
217 |
if 'smiles' in self.disabled_embeddings:
|
218 |
-
#
|
219 |
-
smiles_emb = np.
|
220 |
-
|
221 |
-
|
222 |
-
|
|
|
|
|
|
|
|
|
223 |
else:
|
224 |
smiles_emb = self.data['Smiles'].iloc[idx]
|
225 |
|
226 |
if 'poi' in self.disabled_embeddings:
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
|
|
|
|
|
|
232 |
else:
|
233 |
poi_emb = self.data['Uniprot'].iloc[idx]
|
234 |
|
235 |
if 'e3' in self.disabled_embeddings:
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
|
|
|
|
|
|
243 |
else:
|
244 |
e3_emb = self.data['E3 Ligase Uniprot'].iloc[idx]
|
245 |
|
246 |
if 'cell' in self.disabled_embeddings:
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
|
|
|
|
|
|
252 |
else:
|
253 |
cell_emb = self.data['Cell Line Identifier'].iloc[idx]
|
254 |
|
|
|
215 |
|
216 |
def __getitem__(self, idx):
|
217 |
if 'smiles' in self.disabled_embeddings:
|
218 |
+
# Get a zero vector for the fingerprint
|
219 |
+
smiles_emb = np.zeros(self.smiles_emb_dim).astype(np.float32)
|
220 |
+
|
221 |
+
# TODO: Remove random sampling in the future
|
222 |
+
# # Uniformly sample a binary vector for the fingerprint
|
223 |
+
# smiles_emb = np.random.randint(0, 2, size=self.smiles_emb_dim).astype(np.float32)
|
224 |
+
# if not self.use_single_scaler and self.scaler is not None:
|
225 |
+
# smiles_emb = smiles_emb[np.newaxis, :]
|
226 |
+
# smiles_emb = self.scaler['Smiles'].transform(smiles_emb).flatten()
|
227 |
else:
|
228 |
smiles_emb = self.data['Smiles'].iloc[idx]
|
229 |
|
230 |
if 'poi' in self.disabled_embeddings:
|
231 |
+
poi_emb = np.zeros(self.protein_emb_dim).astype(np.float32)
|
232 |
+
|
233 |
+
# TODO: Remove random sampling in the future
|
234 |
+
# # Uniformly sample a vector for the protein
|
235 |
+
# poi_emb = np.random.rand(self.protein_emb_dim).astype(np.float32)
|
236 |
+
# if not self.use_single_scaler and self.scaler is not None:
|
237 |
+
# poi_emb = poi_emb[np.newaxis, :]
|
238 |
+
# poi_emb = self.scaler['Uniprot'].transform(poi_emb).flatten()
|
239 |
else:
|
240 |
poi_emb = self.data['Uniprot'].iloc[idx]
|
241 |
|
242 |
if 'e3' in self.disabled_embeddings:
|
243 |
+
e3_emb = np.zeros(self.protein_emb_dim).astype(np.float32)
|
244 |
+
|
245 |
+
# TODO: Remove random sampling in the future
|
246 |
+
# # Uniformly sample a vector for the E3 ligase
|
247 |
+
# e3_emb = np.random.rand(self.protein_emb_dim).astype(np.float32)
|
248 |
+
# if not self.use_single_scaler and self.scaler is not None:
|
249 |
+
# # Add extra dimension for compatibility with the scaler
|
250 |
+
# e3_emb = e3_emb[np.newaxis, :]
|
251 |
+
# e3_emb = self.scaler['E3 Ligase Uniprot'].transform(e3_emb)
|
252 |
+
# e3_emb = e3_emb.flatten()
|
253 |
else:
|
254 |
e3_emb = self.data['E3 Ligase Uniprot'].iloc[idx]
|
255 |
|
256 |
if 'cell' in self.disabled_embeddings:
|
257 |
+
cell_emb = np.zeros(self.cell_emb_dim).astype(np.float32)
|
258 |
+
|
259 |
+
# TODO: Remove random sampling in the future
|
260 |
+
# # Uniformly sample a vector for the cell line
|
261 |
+
# cell_emb = np.random.rand(self.cell_emb_dim).astype(np.float32)
|
262 |
+
# if not self.use_single_scaler and self.scaler is not None:
|
263 |
+
# cell_emb = cell_emb[np.newaxis, :]
|
264 |
+
# cell_emb = self.scaler['Cell Line Identifier'].transform(cell_emb).flatten()
|
265 |
else:
|
266 |
cell_emb = self.data['Cell Line Identifier'].iloc[idx]
|
267 |
|