Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	Count lines instead of using dataloader.dataset
Browse files- translate.py +3 -4
    	
        translate.py
    CHANGED
    
    | @@ -116,7 +116,7 @@ def main( | |
| 116 | 
             
                    "top_p": top_p,
         | 
| 117 | 
             
                }
         | 
| 118 |  | 
| 119 | 
            -
                 | 
| 120 |  | 
| 121 | 
             
                if accelerator.is_main_process:
         | 
| 122 | 
             
                    print(
         | 
| @@ -156,7 +156,7 @@ def main( | |
| 156 | 
             
                    samples_seen: int = 0
         | 
| 157 |  | 
| 158 | 
             
                    with tqdm(
         | 
| 159 | 
            -
                        total= | 
| 160 | 
             
                        desc="Dataset translation",
         | 
| 161 | 
             
                        leave=True,
         | 
| 162 | 
             
                        ascii=True,
         | 
| @@ -185,8 +185,7 @@ def main( | |
| 185 | 
             
                                if accelerator.is_main_process:
         | 
| 186 | 
             
                                    if step == len(data_loader) - 1:
         | 
| 187 | 
             
                                        tgt_text = tgt_text[
         | 
| 188 | 
            -
                                            : ( | 
| 189 | 
            -
                                            - samples_seen
         | 
| 190 | 
             
                                        ]
         | 
| 191 | 
             
                                    else:
         | 
| 192 | 
             
                                        samples_seen += len(tgt_text)
         | 
|  | |
| 116 | 
             
                    "top_p": top_p,
         | 
| 117 | 
             
                }
         | 
| 118 |  | 
| 119 | 
            +
                total_lines: int = count_lines(sentences_path)
         | 
| 120 |  | 
| 121 | 
             
                if accelerator.is_main_process:
         | 
| 122 | 
             
                    print(
         | 
|  | |
| 156 | 
             
                    samples_seen: int = 0
         | 
| 157 |  | 
| 158 | 
             
                    with tqdm(
         | 
| 159 | 
            +
                        total=total_lines,
         | 
| 160 | 
             
                        desc="Dataset translation",
         | 
| 161 | 
             
                        leave=True,
         | 
| 162 | 
             
                        ascii=True,
         | 
|  | |
| 185 | 
             
                                if accelerator.is_main_process:
         | 
| 186 | 
             
                                    if step == len(data_loader) - 1:
         | 
| 187 | 
             
                                        tgt_text = tgt_text[
         | 
| 188 | 
            +
                                            : (total_lines * num_return_sequences) - samples_seen
         | 
|  | |
| 189 | 
             
                                        ]
         | 
| 190 | 
             
                                    else:
         | 
| 191 | 
             
                                        samples_seen += len(tgt_text)
         | 
