Spaces:

HuangLab
/

CELL-E_2-Sequence_Prediction

Sleeping

Emaad commited on May 16, 2023

Commit

22f2c54

1 Parent(s): 61dc572

Update prediction.py

Files changed (1) hide show

prediction.py CHANGED Viewed

@@ -1,8 +1,9 @@
 from dataloader import CellLoader
-def run_image_prediction(
     sequence_input,
     nucleus_image,
     model,
     device
 ):
@@ -15,6 +16,7 @@ def run_image_prediction(
     :param model_ckpt_path: Path to model checkpoint
     :param model_config_path: Path to model config
     """
     # Instantiate dataset object
     dataset = CellLoader(
         sequence_mode="embedding",
@@ -28,20 +30,26 @@ def run_image_prediction(
         threshold="median",
     )
     # Convert SEQUENCE to sequence using dataset.tokenize_sequence()
     sequence = dataset.tokenize_sequence(sequence_input)
     # Sample from model using provided sequence and nucleus image
-    _, _, _, predicted_threshold, predicted_heatmap = model.celle.sample(
         text=sequence.to(device),
         condition=nucleus_image.to(device),
-        timesteps=1,
         temperature=1,
         progress=False,
     )
-    # Move predicted_threshold and predicted_heatmap to CPU and select first element of batch
-    predicted_threshold = predicted_threshold.cpu()[0, 0]
-    predicted_heatmap = predicted_heatmap.cpu()[0, 0]
-    return predicted_threshold, predicted_heatmap

 from dataloader import CellLoader
+def run_sequence_prediction(
     sequence_input,
     nucleus_image,
+    protein_image,
     model,
     device
 ):
     :param model_ckpt_path: Path to model checkpoint
     :param model_config_path: Path to model config
     """
     # Instantiate dataset object
     dataset = CellLoader(
         sequence_mode="embedding",
         threshold="median",
     )
+    # Check if sequence is provided and valid
+    if len(sequence_input) == 0:
+        raise ValueError("Sequence must be provided.")
+    if "<mask>" not in sequence_input:
+        print("Warning: Sequence does not contain any masked positions to predict.")
     # Convert SEQUENCE to sequence using dataset.tokenize_sequence()
     sequence = dataset.tokenize_sequence(sequence_input)
     # Sample from model using provided sequence and nucleus image
+    _, predicted_sequence, _ = model.celle.sample_text(
         text=sequence.to(device),
         condition=nucleus_image.to(device),
+        image=protein_image.to(device),
+        force_aas=True,
         temperature=1,
         progress=False,
     )
+    os.chdir(base_dir)
+    return predicted_sequence