Tonic commited on
Commit
275c5df
·
unverified ·
1 Parent(s): ed458ce

Passed configuration parameters directly to ModernBertConfig constructor

Browse files
Files changed (1) hide show
  1. tasks/text.py +29 -29
tasks/text.py CHANGED
@@ -60,42 +60,41 @@ async def evaluate_text(request: TextEvaluationRequest):
60
 
61
  # Model and tokenizer paths
62
  model_name = "Tonic/climate-guard-toxic-agent"
63
- tokenizer_name = "answerdotai/ModernBERT-base"
64
 
65
- # Define minimal configuration
66
- config_dict = {
67
- "_name_or_path": "answerdotai/ModernBERT-base",
68
- "architectures": ["ModernBertForSequenceClassification"],
69
- "model_type": "modernbert",
70
- "vocab_size": 50368,
71
- "hidden_size": 768,
72
- "num_hidden_layers": 22,
73
- "num_attention_heads": 12,
74
- "intermediate_size": 1152,
75
- "max_position_embeddings": 8192,
76
- "position_embedding_type": "absolute",
77
- "layer_norm_eps": 1e-5,
78
- "hidden_activation": "gelu",
79
- "classifier_activation": "gelu",
80
- "classifier_pooling": "mean",
81
- "num_labels": 8,
82
- "pad_token_id": 50283,
83
- "bos_token_id": 50281,
84
- "eos_token_id": 50282,
85
- "sep_token_id": 50282,
86
- "cls_token_id": 50281,
87
- "problem_type": "single_label_classification",
88
- "id2label": {str(i): label for i, label in enumerate(LABEL_MAPPING.keys())},
89
- "label2id": LABEL_MAPPING
90
- }
91
 
92
  # Load tokenizer
93
  tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
94
 
95
- # Load model with minimal config
96
  model = AutoModelForSequenceClassification.from_pretrained(
97
  model_name,
98
- config_dict=config_dict,
99
  trust_remote_code=True,
100
  ignore_mismatched_sizes=True,
101
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
@@ -149,6 +148,7 @@ async def evaluate_text(request: TextEvaluationRequest):
149
  except Exception as e:
150
  print(f"Error during model inference: {str(e)}")
151
  raise
 
152
  #--------------------------------------------------------------------------------------------
153
  # MODEL INFERENCE ENDS HERE
154
  #--------------------------------------------------------------------------------------------
 
60
 
61
  # Model and tokenizer paths
62
  model_name = "Tonic/climate-guard-toxic-agent"
63
+ tokenizer_name = "Tonic/climate-guard-toxic-agent"
64
 
65
+ # Create config
66
+ config = ModernBertConfig(
67
+ vocab_size=50368,
68
+ hidden_size=768,
69
+ num_hidden_layers=22,
70
+ num_attention_heads=12,
71
+ intermediate_size=1152,
72
+ max_position_embeddings=8192,
73
+ layer_norm_eps=1e-5,
74
+ position_embedding_type="absolute",
75
+ pad_token_id=50283,
76
+ bos_token_id=50281,
77
+ eos_token_id=50282,
78
+ sep_token_id=50282,
79
+ cls_token_id=50281,
80
+ hidden_activation="gelu",
81
+ classifier_activation="gelu",
82
+ classifier_pooling="mean",
83
+ num_labels=8,
84
+ id2label={str(i): label for i, label in enumerate(LABEL_MAPPING.keys())},
85
+ label2id=LABEL_MAPPING,
86
+ problem_type="single_label_classification",
87
+ architectures=["ModernBertForSequenceClassification"],
88
+ model_type="modernbert"
89
+ )
 
90
 
91
  # Load tokenizer
92
  tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
93
 
94
+ # Load model
95
  model = AutoModelForSequenceClassification.from_pretrained(
96
  model_name,
97
+ config=config,
98
  trust_remote_code=True,
99
  ignore_mismatched_sizes=True,
100
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
 
148
  except Exception as e:
149
  print(f"Error during model inference: {str(e)}")
150
  raise
151
+
152
  #--------------------------------------------------------------------------------------------
153
  # MODEL INFERENCE ENDS HERE
154
  #--------------------------------------------------------------------------------------------