Tonic commited on
Commit
322840a
·
unverified ·
1 Parent(s): 85c5204

fix text classifier bias parameter thing

Browse files
Files changed (1) hide show
  1. tasks/text.py +12 -5
tasks/text.py CHANGED
@@ -35,11 +35,11 @@ class TextClassifier:
35
  def __init__(self):
36
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
37
  max_retries = 3
38
- model_name = "Tonic/climate-guard-toxic-agent"
39
 
40
  for attempt in range(max_retries):
41
  try:
42
- # Load config
43
  self.config = AutoConfig.from_pretrained(
44
  model_name,
45
  num_labels=8,
@@ -47,6 +47,12 @@ class TextClassifier:
47
  trust_remote_code=True
48
  )
49
 
 
 
 
 
 
 
50
  # Initialize tokenizer
51
  self.tokenizer = AutoTokenizer.from_pretrained(
52
  model_name,
@@ -56,12 +62,13 @@ class TextClassifier:
56
  trust_remote_code=True
57
  )
58
 
59
- # Initialize model
60
  self.model = AutoModelForSequenceClassification.from_pretrained(
61
  model_name,
62
  config=self.config,
63
  trust_remote_code=True,
64
- torch_dtype=torch.float32
 
65
  )
66
 
67
  # Move model to appropriate device
@@ -81,7 +88,7 @@ class TextClassifier:
81
  try:
82
  print(f"Processing batch {batch_idx} with {len(batch)} items")
83
 
84
- # Tokenize
85
  inputs = self.tokenizer(
86
  batch,
87
  return_tensors="pt",
 
35
  def __init__(self):
36
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
37
  max_retries = 3
38
+ model_name = "answerdotai/ModernBERT-base"
39
 
40
  for attempt in range(max_retries):
41
  try:
42
+ # Load config with modified settings
43
  self.config = AutoConfig.from_pretrained(
44
  model_name,
45
  num_labels=8,
 
47
  trust_remote_code=True
48
  )
49
 
50
+ # Remove problematic config attributes
51
+ if hasattr(self.config, 'norm_bias'):
52
+ delattr(self.config, 'norm_bias')
53
+ if hasattr(self.config, 'bias'):
54
+ delattr(self.config, 'bias')
55
+
56
  # Initialize tokenizer
57
  self.tokenizer = AutoTokenizer.from_pretrained(
58
  model_name,
 
62
  trust_remote_code=True
63
  )
64
 
65
+ # Initialize model with modified config
66
  self.model = AutoModelForSequenceClassification.from_pretrained(
67
  model_name,
68
  config=self.config,
69
  trust_remote_code=True,
70
+ torch_dtype=torch.float32,
71
+ ignore_mismatched_sizes=True
72
  )
73
 
74
  # Move model to appropriate device
 
88
  try:
89
  print(f"Processing batch {batch_idx} with {len(batch)} items")
90
 
91
+ # Tokenize with padding and truncation
92
  inputs = self.tokenizer(
93
  batch,
94
  return_tensors="pt",