Update README.md
Browse files
    	
        README.md
    CHANGED
    
    | @@ -18,13 +18,27 @@ from tdc import tdc_hf_interface | |
| 18 | 
             
            from tdc.model_server.tokenizers.scgpt import scGPTTokenizer
         | 
| 19 | 
             
            import torch
         | 
| 20 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 21 | 
             
            scgpt = tdc_hf_interface("scGPT")
         | 
| 22 | 
            -
            model = scgpt.load()  #  | 
| 23 | 
             
            tokenizer = scGPTTokenizer()
         | 
| 24 | 
            -
            gene_ids = adata.var["feature_name"].to_numpy( | 
|  | |
| 25 | 
             
            tokenized_data = tokenizer.tokenize_cell_vectors(
         | 
| 26 | 
             
                adata.X.toarray(), gene_ids)
         | 
| 27 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 28 | 
             
            ```
         | 
| 29 |  | 
| 30 | 
             
            # TDC Citation
         | 
|  | |
| 18 | 
             
            from tdc.model_server.tokenizers.scgpt import scGPTTokenizer
         | 
| 19 | 
             
            import torch
         | 
| 20 |  | 
| 21 | 
            +
            # an example dataset
         | 
| 22 | 
            +
            adata = DataLoader("cellxgene_sample_small",
         | 
| 23 | 
            +
                               "./data",
         | 
| 24 | 
            +
                               dataset_names=["cellxgene_sample_small"],
         | 
| 25 | 
            +
                               no_convert=True).adata
         | 
| 26 | 
            +
             | 
| 27 | 
            +
            # code for loading the model and performing inference
         | 
| 28 | 
             
            scgpt = tdc_hf_interface("scGPT")
         | 
| 29 | 
            +
            model = scgpt.load()  # This line can cause segmentation fault on inappropriate setup
         | 
| 30 | 
             
            tokenizer = scGPTTokenizer()
         | 
| 31 | 
            +
            gene_ids = adata.var["feature_name"].to_numpy(
         | 
| 32 | 
            +
            )  # Convert to numpy array
         | 
| 33 | 
             
            tokenized_data = tokenizer.tokenize_cell_vectors(
         | 
| 34 | 
             
                adata.X.toarray(), gene_ids)
         | 
| 35 | 
            +
            mask = torch.tensor([x != 0 for x in tokenized_data[0][1]],
         | 
| 36 | 
            +
                                dtype=torch.bool)
         | 
| 37 | 
            +
             | 
| 38 | 
            +
            # Extract first embedding
         | 
| 39 | 
            +
            first_embed = model(tokenized_data[0][0],
         | 
| 40 | 
            +
                                tokenized_data[0][1],
         | 
| 41 | 
            +
                                attention_mask=mask)
         | 
| 42 | 
             
            ```
         | 
| 43 |  | 
| 44 | 
             
            # TDC Citation
         | 
