frugal-ai-submission-template

Sleeping

App Files Files Community

Tonic commited on Feb 10

Commit

aaabc84

unverified ·

1 Parent(s): 5a0374b

ModernBertConfig class that inherits from PretrainedConfig

Browse files

Files changed (1) hide show

tasks/text.py +34 -17

tasks/text.py CHANGED Viewed

@@ -15,6 +15,32 @@ router = APIRouter()
 DESCRIPTION = "Climate Guard Toxic Agent is a ModernBERT for Climate Disinformation Detection"
 ROUTE = "/text"
 @router.post(ROUTE, tags=["Text Task"],
              description=DESCRIPTION)
 async def evaluate_text(request: TextEvaluationRequest):
@@ -52,35 +78,25 @@ async def evaluate_text(request: TextEvaluationRequest):
     #--------------------------------------------------------------------------------------------
     # MODEL INFERENCE CODE
     #--------------------------------------------------------------------------------------------
     try:
         # Set device
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         # Model and tokenizer paths
         model_name = "Tonic/climate-guard-toxic-agent"
-        tokenizer_name = "Tonic/climate-guard-toxic-agent"
-        # Create config with essential parameters
-        config = AutoConfig.from_pretrained(
-            model_name,
             num_labels=8,
-            problem_type="single_label_classification",
-            architectures=["ModernBertForSequenceClassification"],
-            model_type="modernbert",
-            hidden_size=768,
-            num_attention_heads=12,
-            num_hidden_layers=22,
-            intermediate_size=1152,
-            max_position_embeddings=8192,
-            layer_norm_eps=1e-05,
-            classifier_dropout=0.0
         )
         # Load tokenizer
-        tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
-        # Load model with modified config
         model = AutoModelForSequenceClassification.from_pretrained(
             model_name,
             config=config,
@@ -138,6 +154,7 @@ async def evaluate_text(request: TextEvaluationRequest):
         print(f"Error during model inference: {str(e)}")
         raise
     #--------------------------------------------------------------------------------------------
     # MODEL INFERENCE ENDS HERE
     #--------------------------------------------------------------------------------------------

 DESCRIPTION = "Climate Guard Toxic Agent is a ModernBERT for Climate Disinformation Detection"
 ROUTE = "/text"
+class ModernBertConfig(PretrainedConfig):
+    model_type = "modernbert"
+    def __init__(
+        self,
+        vocab_size=50368,
+        hidden_size=768,
+        num_hidden_layers=22,
+        num_attention_heads=12,
+        intermediate_size=1152,
+        max_position_embeddings=8192,
+        layer_norm_eps=1e-5,
+        classifier_dropout=0.0,
+        **kwargs
+    ):
+        super().__init__(**kwargs)
+        self.vocab_size = vocab_size
+        self.hidden_size = hidden_size
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+        self.intermediate_size = intermediate_size
+        self.max_position_embeddings = max_position_embeddings
+        self.layer_norm_eps = layer_norm_eps
+        self.classifier_dropout = classifier_dropout
 @router.post(ROUTE, tags=["Text Task"],
              description=DESCRIPTION)
 async def evaluate_text(request: TextEvaluationRequest):
     #--------------------------------------------------------------------------------------------
     # MODEL INFERENCE CODE
     #--------------------------------------------------------------------------------------------
     try:
         # Set device
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         # Model and tokenizer paths
         model_name = "Tonic/climate-guard-toxic-agent"
+        # Create custom config
+        config = ModernBertConfig(
             num_labels=8,
+            id2label={str(i): label for i, label in enumerate(LABEL_MAPPING.keys())},
+            label2id=LABEL_MAPPING
         )
         # Load tokenizer
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        # Load model with custom config
         model = AutoModelForSequenceClassification.from_pretrained(
             model_name,
             config=config,
         print(f"Error during model inference: {str(e)}")
         raise
     #--------------------------------------------------------------------------------------------
     # MODEL INFERENCE ENDS HERE
     #--------------------------------------------------------------------------------------------