ribesstefano commited on
Commit
4bf0ec2
·
1 Parent(s): d8a186f

Changed ablation to input zero-valued vactors

Browse files
protac_degradation_predictor/protac_dataset.py CHANGED
@@ -215,40 +215,53 @@ class PROTAC_Dataset(Dataset):
215
 
216
  def __getitem__(self, idx):
217
  if 'smiles' in self.disabled_embeddings:
218
- # Uniformly sample a binary vector for the fingerprint
219
- smiles_emb = np.random.randint(0, 2, size=self.smiles_emb_dim).astype(np.float32)
220
- if not self.use_single_scaler and self.scaler is not None:
221
- smiles_emb = smiles_emb[np.newaxis, :]
222
- smiles_emb = self.scaler['Smiles'].transform(smiles_emb).flatten()
 
 
 
 
223
  else:
224
  smiles_emb = self.data['Smiles'].iloc[idx]
225
 
226
  if 'poi' in self.disabled_embeddings:
227
- # Uniformly sample a vector for the protein
228
- poi_emb = np.random.rand(self.protein_emb_dim).astype(np.float32)
229
- if not self.use_single_scaler and self.scaler is not None:
230
- poi_emb = poi_emb[np.newaxis, :]
231
- poi_emb = self.scaler['Uniprot'].transform(poi_emb).flatten()
 
 
 
232
  else:
233
  poi_emb = self.data['Uniprot'].iloc[idx]
234
 
235
  if 'e3' in self.disabled_embeddings:
236
- # Uniformly sample a vector for the E3 ligase
237
- e3_emb = np.random.rand(self.protein_emb_dim).astype(np.float32)
238
- if not self.use_single_scaler and self.scaler is not None:
239
- # Add extra dimension for compatibility with the scaler
240
- e3_emb = e3_emb[np.newaxis, :]
241
- e3_emb = self.scaler['E3 Ligase Uniprot'].transform(e3_emb)
242
- e3_emb = e3_emb.flatten()
 
 
 
243
  else:
244
  e3_emb = self.data['E3 Ligase Uniprot'].iloc[idx]
245
 
246
  if 'cell' in self.disabled_embeddings:
247
- # Uniformly sample a vector for the cell line
248
- cell_emb = np.random.rand(self.cell_emb_dim).astype(np.float32)
249
- if not self.use_single_scaler and self.scaler is not None:
250
- cell_emb = cell_emb[np.newaxis, :]
251
- cell_emb = self.scaler['Cell Line Identifier'].transform(cell_emb).flatten()
 
 
 
252
  else:
253
  cell_emb = self.data['Cell Line Identifier'].iloc[idx]
254
 
 
215
 
216
  def __getitem__(self, idx):
217
  if 'smiles' in self.disabled_embeddings:
218
+ # Get a zero vector for the fingerprint
219
+ smiles_emb = np.zeros(self.smiles_emb_dim).astype(np.float32)
220
+
221
+ # TODO: Remove random sampling in the future
222
+ # # Uniformly sample a binary vector for the fingerprint
223
+ # smiles_emb = np.random.randint(0, 2, size=self.smiles_emb_dim).astype(np.float32)
224
+ # if not self.use_single_scaler and self.scaler is not None:
225
+ # smiles_emb = smiles_emb[np.newaxis, :]
226
+ # smiles_emb = self.scaler['Smiles'].transform(smiles_emb).flatten()
227
  else:
228
  smiles_emb = self.data['Smiles'].iloc[idx]
229
 
230
  if 'poi' in self.disabled_embeddings:
231
+ poi_emb = np.zeros(self.protein_emb_dim).astype(np.float32)
232
+
233
+ # TODO: Remove random sampling in the future
234
+ # # Uniformly sample a vector for the protein
235
+ # poi_emb = np.random.rand(self.protein_emb_dim).astype(np.float32)
236
+ # if not self.use_single_scaler and self.scaler is not None:
237
+ # poi_emb = poi_emb[np.newaxis, :]
238
+ # poi_emb = self.scaler['Uniprot'].transform(poi_emb).flatten()
239
  else:
240
  poi_emb = self.data['Uniprot'].iloc[idx]
241
 
242
  if 'e3' in self.disabled_embeddings:
243
+ e3_emb = np.zeros(self.protein_emb_dim).astype(np.float32)
244
+
245
+ # TODO: Remove random sampling in the future
246
+ # # Uniformly sample a vector for the E3 ligase
247
+ # e3_emb = np.random.rand(self.protein_emb_dim).astype(np.float32)
248
+ # if not self.use_single_scaler and self.scaler is not None:
249
+ # # Add extra dimension for compatibility with the scaler
250
+ # e3_emb = e3_emb[np.newaxis, :]
251
+ # e3_emb = self.scaler['E3 Ligase Uniprot'].transform(e3_emb)
252
+ # e3_emb = e3_emb.flatten()
253
  else:
254
  e3_emb = self.data['E3 Ligase Uniprot'].iloc[idx]
255
 
256
  if 'cell' in self.disabled_embeddings:
257
+ cell_emb = np.zeros(self.cell_emb_dim).astype(np.float32)
258
+
259
+ # TODO: Remove random sampling in the future
260
+ # # Uniformly sample a vector for the cell line
261
+ # cell_emb = np.random.rand(self.cell_emb_dim).astype(np.float32)
262
+ # if not self.use_single_scaler and self.scaler is not None:
263
+ # cell_emb = cell_emb[np.newaxis, :]
264
+ # cell_emb = self.scaler['Cell Line Identifier'].transform(cell_emb).flatten()
265
  else:
266
  cell_emb = self.data['Cell Line Identifier'].iloc[idx]
267