Commit 
							
							·
						
						391404d
	
1
								Parent(s):
							
							049a0c7
								
Update README.md
Browse files
    	
        README.md
    CHANGED
    
    | 
         @@ -42,18 +42,18 @@ The details of three subnets are: 
     | 
|
| 42 | 
         
             
            ## Compute your speaker embeddings
         
     | 
| 43 | 
         | 
| 44 | 
         
             
            ```python
         
     | 
| 45 | 
         
            -
            import  
     | 
| 46 | 
         
             
            from sugar.models import WrappedModel
         
     | 
| 47 | 
         
            -
             
     | 
| 48 | 
         
            -
            signal, fs =torchaudio.load(wav_file)
         
     | 
| 49 | 
         | 
| 50 | 
         
             
            repo_id = "mechanicalsea/efficient-tdnn"
         
     | 
| 51 | 
         
             
            supernet_filename = "depth/depth.torchparams"
         
     | 
| 52 | 
         
             
            subnet_filename = "depth/depth.ecapa-tdnn.3.512.512.512.512.5.3.3.3.1536.bn.tar"
         
     | 
| 53 | 
         
            -
            subnet, info = WrappedModel.from_pretrained(
         
     | 
| 54 | 
         
            -
             
     | 
| 
         | 
|
| 55 | 
         | 
| 56 | 
         
            -
            embedding = subnet( 
     | 
| 57 | 
         
             
            ```
         
     | 
| 58 | 
         | 
| 59 | 
         
             
            ## Inference on GPU
         
     | 
| 
         @@ -112,14 +112,13 @@ More details about EfficentTDNN can be found in the paper [EfficientTDNN](https: 
     | 
|
| 112 | 
         
             
            Please, cite EfficientTDNN if you use it for your research or business.
         
     | 
| 113 | 
         | 
| 114 | 
         
             
            ```bibtex
         
     | 
| 115 | 
         
            -
            @article{ 
     | 
| 116 | 
         
            -
               
     | 
| 117 | 
         
            -
               
     | 
| 118 | 
         
            -
               
     | 
| 119 | 
         
            -
              year={ 
     | 
| 120 | 
         
            -
               
     | 
| 121 | 
         
            -
               
     | 
| 122 | 
         
            -
               
     | 
| 123 | 
         
            -
               
     | 
| 124 | 
         
            -
            }
         
     | 
| 125 | 
         
             
            ```
         
     | 
| 
         | 
|
| 42 | 
         
             
            ## Compute your speaker embeddings
         
     | 
| 43 | 
         | 
| 44 | 
         
             
            ```python
         
     | 
| 45 | 
         
            +
            import torch
         
     | 
| 46 | 
         
             
            from sugar.models import WrappedModel
         
     | 
| 47 | 
         
            +
            wav_input_16khz = torch.randn(1,10000).cuda()
         
     | 
| 
         | 
|
| 48 | 
         | 
| 49 | 
         
             
            repo_id = "mechanicalsea/efficient-tdnn"
         
     | 
| 50 | 
         
             
            supernet_filename = "depth/depth.torchparams"
         
     | 
| 51 | 
         
             
            subnet_filename = "depth/depth.ecapa-tdnn.3.512.512.512.512.5.3.3.3.1536.bn.tar"
         
     | 
| 52 | 
         
            +
            subnet, info = WrappedModel.from_pretrained(repo_id=repo_id, supernet_filename=supernet_filename, subnet_filename=subnet_filename)
         
     | 
| 53 | 
         
            +
            subnet = subnet.cuda()
         
     | 
| 54 | 
         
            +
            subnet = subnet.eval()
         
     | 
| 55 | 
         | 
| 56 | 
         
            +
            embedding = subnet(wav_input_16khz)
         
     | 
| 57 | 
         
             
            ```
         
     | 
| 58 | 
         | 
| 59 | 
         
             
            ## Inference on GPU
         
     | 
| 
         | 
|
| 112 | 
         
             
            Please, cite EfficientTDNN if you use it for your research or business.
         
     | 
| 113 | 
         | 
| 114 | 
         
             
            ```bibtex
         
     | 
| 115 | 
         
            +
            @article{wr-efficienttdnn-2022,
         
     | 
| 116 | 
         
            +
              author={Wang, Rui and Wei, Zhihua and Duan, Haoran and Ji, Shouling and Long, Yang and Hong, Zhen},
         
     | 
| 117 | 
         
            +
              journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing}, 
         
     | 
| 118 | 
         
            +
              title={EfficientTDNN: Efficient Architecture Search for Speaker Recognition}, 
         
     | 
| 119 | 
         
            +
              year={2022},
         
     | 
| 120 | 
         
            +
              volume={30},
         
     | 
| 121 | 
         
            +
              number={},
         
     | 
| 122 | 
         
            +
              pages={2267-2279},
         
     | 
| 123 | 
         
            +
              doi={10.1109/TASLP.2022.3182856}}
         
     | 
| 
         | 
|
| 124 | 
         
             
            ```
         
     |