tdc
/

scGPT

@@ -18,13 +18,27 @@ from tdc import tdc_hf_interface
 from tdc.model_server.tokenizers.scgpt import scGPTTokenizer
 import torch
 scgpt = tdc_hf_interface("scGPT")
-model = scgpt.load()  # this line can cause segmentation fault on inappropriate setups
 tokenizer = scGPTTokenizer()
-gene_ids = adata.var["feature_name"].to_numpy()  # Convert to numpy array
 tokenized_data = tokenizer.tokenize_cell_vectors(
     adata.X.toarray(), gene_ids)
-embeds = model(torch.tensor([x[1] for x in tokenized_data])).last_hidden_state
 ```
 # TDC Citation

 from tdc.model_server.tokenizers.scgpt import scGPTTokenizer
 import torch
+# an example dataset
+adata = DataLoader("cellxgene_sample_small",
+                   "./data",
+                   dataset_names=["cellxgene_sample_small"],
+                   no_convert=True).adata
+# code for loading the model and performing inference
 scgpt = tdc_hf_interface("scGPT")
+model = scgpt.load()  # This line can cause segmentation fault on inappropriate setup
 tokenizer = scGPTTokenizer()
+gene_ids = adata.var["feature_name"].to_numpy(
+)  # Convert to numpy array
 tokenized_data = tokenizer.tokenize_cell_vectors(
     adata.X.toarray(), gene_ids)
+mask = torch.tensor([x != 0 for x in tokenized_data[0][1]],
+                    dtype=torch.bool)
+# Extract first embedding
+first_embed = model(tokenized_data[0][0],
+                    tokenized_data[0][1],
+                    attention_mask=mask)
 ```
 # TDC Citation