Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | @@ -37,6 +37,11 @@ tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| 37 | 
             
            data = {"text": [clean_text]}
         | 
| 38 | 
             
            dataset = Dataset.from_dict(data)
         | 
| 39 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 40 | 
             
            # Tokenization function
         | 
| 41 | 
             
            def tokenize_function(examples):
         | 
| 42 | 
             
                tokens = tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)
         | 
|  | |
| 37 | 
             
            data = {"text": [clean_text]}
         | 
| 38 | 
             
            dataset = Dataset.from_dict(data)
         | 
| 39 |  | 
| 40 | 
            +
            # Set a padding token manually
         | 
| 41 | 
            +
            tokenizer.pad_token = tokenizer.eos_token  # Use EOS as PAD token
         | 
| 42 | 
            +
            # Alternatively, add a new custom pad token
         | 
| 43 | 
            +
            # tokenizer.add_special_tokens({'pad_token': '[PAD]'})
         | 
| 44 | 
            +
             | 
| 45 | 
             
            # Tokenization function
         | 
| 46 | 
             
            def tokenize_function(examples):
         | 
| 47 | 
             
                tokens = tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)
         | 
