import torch import torch.nn as nn from transformers import AutoTokenizer, AutoModelForMaskedLM from pathlib import Path import json import re import gc class BERTHandler: """ VRAM-safe BERT model handler for loading, tokenization, and saving Handles all token management and checkpoint operations with proper cleanup """ def __init__(self, symbolic_tokens=None): # Default symbolic tokens self.symbolic_tokens = symbolic_tokens or [ "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "