frugal-ai-submission-template

Sleeping

Tonic commited on Feb 10

Commit

6af9c73

unverified ·

1 Parent(s): 4357468

complete code

Files changed (1) hide show

tasks/text.py CHANGED Viewed

@@ -7,7 +7,7 @@ import os
 from concurrent.futures import ThreadPoolExecutor
 from typing import List, Dict, Tuple
 import torch
-from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline, AutoConfig
 from huggingface_hub import login
 from dotenv import load_dotenv
@@ -123,18 +123,17 @@ async def evaluate_text(request: TextEvaluationRequest):
         # Load and prepare the dataset
         dataset = load_dataset("QuotaClimat/frugalaichallenge-text-train", token=HF_TOKEN)
-        # Convert string labels to integers with error handling
         def convert_label(example):
             try:
                 return {"label": LABEL_MAPPING[example["label"]]}
-            except KeyError as e:
                 print(f"Warning: Unknown label {example['label']}")
-                # Return default label or raise exception
-                return {"label": 0}  # or raise e if you want to fail on unknown labels
         dataset = dataset.map(convert_label)
-        # Split dataset
         test_dataset = dataset["test"]
         # Start tracking emissions
@@ -147,7 +146,7 @@ async def evaluate_text(request: TextEvaluationRequest):
         classifier = TextClassifier()
         # Prepare batches
-        batch_size = 24
         quotes = test_dataset["quote"]
         num_batches = len(quotes) // batch_size + (1 if len(quotes) % batch_size != 0 else 0)
         batches = [

 from concurrent.futures import ThreadPoolExecutor
 from typing import List, Dict, Tuple
 import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from huggingface_hub import login
 from dotenv import load_dotenv
         # Load and prepare the dataset
         dataset = load_dataset("QuotaClimat/frugalaichallenge-text-train", token=HF_TOKEN)
+        # Convert string labels to integers
         def convert_label(example):
             try:
                 return {"label": LABEL_MAPPING[example["label"]]}
+            except KeyError:
                 print(f"Warning: Unknown label {example['label']}")
+                return {"label": 0}
         dataset = dataset.map(convert_label)
+        # Get test dataset
         test_dataset = dataset["test"]
         # Start tracking emissions
         classifier = TextClassifier()
         # Prepare batches
+        batch_size = 16  # Reduced batch size for better stability
         quotes = test_dataset["quote"]
         num_batches = len(quotes) // batch_size + (1 if len(quotes) % batch_size != 0 else 0)
         batches = [