frugal-ai-submission-template

Sleeping

App Files Files Community

Tonic commited on Feb 10

Commit

21262c6

verified ·

1 Parent(s): ada5a12

fix dataset loading

Browse files

Files changed (1) hide show

tasks/text.py +12 -24

tasks/text.py CHANGED Viewed

@@ -8,10 +8,20 @@ from concurrent.futures import ThreadPoolExecutor
 from typing import List, Dict, Tuple
 import torch
 from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoConfig
 from .utils.evaluation import TextEvaluationRequest
 from .utils.emissions import tracker, clean_emissions_data, get_space_info, start_tracking, stop_tracking
 # Disable torch compile
 os.environ["TORCH_COMPILE_DISABLE"] = "1"
@@ -57,28 +67,6 @@ class TextClassifier:
                 print(f"Attempt {attempt + 1} failed, retrying... Error: {str(e)}")
                 time.sleep(1)
-    def predict_single(self, text: str) -> int:
-        """Predict single text instance"""
-        try:
-            # Tokenize with explicit padding and truncation
-            inputs = self.tokenizer(
-                text,
-                return_tensors="pt",
-                truncation=True,
-                max_length=512,
-                padding='max_length'
-            ).to(self.device)
-            # Get prediction
-            with torch.no_grad():
-                outputs = self.model(**inputs)
-                predictions = torch.argmax(outputs.logits, dim=-1)
-                return predictions.item()
-        except Exception as e:
-            print(f"Error in single prediction: {str(e)}")
-            return 0  # Return default prediction on error
     def process_batch(self, batch: List[str], batch_idx: int) -> Tuple[List[int], int]:
         """Process a batch of texts and return their predictions"""
         try:
@@ -124,8 +112,8 @@ async def evaluate_text(request: TextEvaluationRequest):
     }
     try:
-        # Load and prepare the dataset using the dataset name from the request
-        dataset = load_dataset(request.dataset_name)
         # Convert string labels to integers
         dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})

 from typing import List, Dict, Tuple
 import torch
 from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoConfig
+from huggingface_hub import login
+from dotenv import load_dotenv
 from .utils.evaluation import TextEvaluationRequest
 from .utils.emissions import tracker, clean_emissions_data, get_space_info, start_tracking, stop_tracking
+# Load environment variables
+load_dotenv()
+# Authenticate with Hugging Face
+HF_TOKEN = os.getenv('HF_TOKEN')
+if HF_TOKEN:
+    login(token=HF_TOKEN)
 # Disable torch compile
 os.environ["TORCH_COMPILE_DISABLE"] = "1"
                 print(f"Attempt {attempt + 1} failed, retrying... Error: {str(e)}")
                 time.sleep(1)
     def process_batch(self, batch: List[str], batch_idx: int) -> Tuple[List[int], int]:
         """Process a batch of texts and return their predictions"""
         try:
     }
     try:
+        # Load and prepare the dataset using the correct dataset name
+        dataset = load_dataset("QuotaClimat/frugalaichallenge-text-train", use_auth_token=HF_TOKEN)
         # Convert string labels to integers
         dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})